Skip to content
Snippets Groups Projects
Commit ca239e78 authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

Move Gallica importer to backend

parent 30e52848
No related branches found
No related tags found
1 merge request!24Import volumes from CSV
......@@ -221,6 +221,7 @@ class BaseIndexImporter(ABC):
class IndexImporter(BaseIndexImporter):
"""Basic index importer with image matching based on a regular expression."""
DEFAULT_MASK = r'(?:.*/)?([^/]+)\.idx\.gz'
......@@ -241,3 +242,25 @@ class IndexImporter(BaseIndexImporter):
return next(img for img in self.images if image_id in img.path)
except StopIteration:
raise Image.DoesNotExist
class GallicaIndexImporter(BaseIndexImporter):
"""Special importer due to Gallica's complicated URLs"""
REGEX = re.compile(r'.*_0*([0-9]+)(?:_[a-z]+)?\.idx\.gz')
def __init__(self, path, volume):
super().__init__(path, volume)
self.images = {
p.zone.image.path.rpartition('/')[2]: p.zone.image
for p in self.pages
}
def get_image(self, path):
try:
return self.images['f' + GallicaIndexImporter.REGEX.findall(path)[0]]
except IndexError:
logger.debug('Mask did not match path {}'.format(path))
raise Image.DoesNotExist
except KeyError:
raise Image.DoesNotExist
......@@ -34,27 +34,6 @@ def import_annotations(source, raw_path, name, index_root):
from arkindex.images.models import Image
from arkindex.images.importer import BaseIndexImporter, IndexImporter
class GallicaIndexImporter(BaseIndexImporter):
"""Special importer due to Gallica's complicated URLs"""
REGEX = re.compile(r'.*_0*([0-9]+)(?:_[a-z]+)?\.idx\.gz')
def __init__(self, path, volume):
super().__init__(path, volume)
self.images = {
p.zone.image.path.rpartition('/')[2]: p.zone.image
for p in self.pages
}
def get_image(self, path):
try:
return self.images['f' + GallicaIndexImporter.REGEX.findall(path)[0]]
except IndexError:
logger.debug('Mask did not match path {}'.format(path))
raise Image.DoesNotExist
except KeyError:
raise Image.DoesNotExist
if raw_path.startswith('/home/data/indexes'):
raw_path = raw_path[18:]
raw_path = raw_path.lstrip('/')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment