From ca239e78fc183eea6b1876a0ad11b0f00698ceec Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 17 May 2018 17:11:38 +0200
Subject: [PATCH] Move Gallica importer to backend

---
 arkindex/images/importer.py | 23 +++++++++++++++++++++++
 from_csv.py                 | 21 ---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/arkindex/images/importer.py b/arkindex/images/importer.py
index 377f7a49ea..e28d51e485 100644
--- a/arkindex/images/importer.py
+++ b/arkindex/images/importer.py
@@ -221,6 +221,7 @@ class BaseIndexImporter(ABC):
 
 
 class IndexImporter(BaseIndexImporter):
+    """Basic index importer with image matching based on a regular expression."""
 
     DEFAULT_MASK = r'(?:.*/)?([^/]+)\.idx\.gz'
 
@@ -241,3 +242,25 @@ class IndexImporter(BaseIndexImporter):
             return next(img for img in self.images if image_id in img.path)
         except StopIteration:
             raise Image.DoesNotExist
+
+
+class GallicaIndexImporter(BaseIndexImporter):
+    """Special importer due to Gallica's complicated URLs"""
+
+    REGEX = re.compile(r'.*_0*([0-9]+)(?:_[a-z]+)?\.idx\.gz')
+
+    def __init__(self, path, volume):
+        super().__init__(path, volume)
+        self.images = {
+            p.zone.image.path.rpartition('/')[2]: p.zone.image
+            for p in self.pages
+        }
+
+    def get_image(self, path):
+        try:
+            return self.images['f' + GallicaIndexImporter.REGEX.findall(path)[0]]
+        except IndexError:
+            logger.debug('Mask did not match path {}'.format(path))
+            raise Image.DoesNotExist
+        except KeyError:
+            raise Image.DoesNotExist
diff --git a/from_csv.py b/from_csv.py
index 4805a4e517..545474963c 100755
--- a/from_csv.py
+++ b/from_csv.py
@@ -34,27 +34,6 @@ def import_annotations(source, raw_path, name, index_root):
     from arkindex.images.models import Image
     from arkindex.images.importer import BaseIndexImporter, IndexImporter
 
-    class GallicaIndexImporter(BaseIndexImporter):
-        """Special importer due to Gallica's complicated URLs"""
-
-        REGEX = re.compile(r'.*_0*([0-9]+)(?:_[a-z]+)?\.idx\.gz')
-
-        def __init__(self, path, volume):
-            super().__init__(path, volume)
-            self.images = {
-                p.zone.image.path.rpartition('/')[2]: p.zone.image
-                for p in self.pages
-            }
-
-        def get_image(self, path):
-            try:
-                return self.images['f' + GallicaIndexImporter.REGEX.findall(path)[0]]
-            except IndexError:
-                logger.debug('Mask did not match path {}'.format(path))
-                raise Image.DoesNotExist
-            except KeyError:
-                raise Image.DoesNotExist
-
     if raw_path.startswith('/home/data/indexes'):
         raw_path = raw_path[18:]
     raw_path = raw_path.lstrip('/')
-- 
GitLab