From 7b38b7e23190631c15ccce17a6c11ea64af4d1e0 Mon Sep 17 00:00:00 2001
From: kermorvant <kermorvant@gmail.com>
Date: Fri, 23 Aug 2019 09:54:33 +0200
Subject: [PATCH]  example script to upload files to arkindex through S3

---
 commands/import_local_files.py | 163 +++++++++++++++++++++++++++++++++
 1 file changed, 163 insertions(+)
 create mode 100644 commands/import_local_files.py

diff --git a/commands/import_local_files.py b/commands/import_local_files.py
new file mode 100644
index 0000000..3499322
--- /dev/null
+++ b/commands/import_local_files.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""Import local images in a directory to Arkindex."""
+
+from apistar.exceptions import ErrorResponse
+from arkindex import ArkindexClient, options_from_env
+import argparse
+import glob
+import hashlib
+import imghdr
+import logging
+import os
+import requests
+import uuid
+
+SUPPORTED_IMG = ['jpeg', 'png']
+
+logging.basicConfig(
+    format='[%(levelname)s] %(message)s',
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
+ark_client = ArkindexClient()
+
+class LocalPage(object):
+    """A local image to be uploaded."""
+
+    def __init__(self, page_path, corpus_id, volume_id, index):
+        """Init a local image parameters."""
+        self.page_path = page_path
+        self.page_name = os.path.basename(self.page_path)
+        self.page_file = open(self.page_path, 'rb')
+        self.corpus_id = corpus_id
+        self.volume_id = volume_id
+        self.index = index
+        logger.info('import {}'.format(self.page_path))
+
+    def hash_image(self):
+        """Create the hash for image validations."""
+        md5 = hashlib.md5()
+        for chunk in iter(lambda: self.page_file.read(4096), b""):
+            md5.update(chunk)
+        return md5.hexdigest()
+
+    def upload_image(self):
+        """Upload the image on S3 and validate it."""
+        logger.info('Creating image for page {}'.format(self.page_path))
+        try:
+            self.image = ark_client.request('CreateImage', body={'hash': self.hash_image()})
+        except ErrorResponse as e:
+            if e.status_code == 400 and 'id' in e.content:
+                self.image = ark_client.request('RetrieveImage', id=e.content['id'])
+        else:
+            logger.info('Uploading image from page {} as {}'.format(self.page_path, self.image['id']))
+            self.page_file.seek(0)
+            requests.put(self.image['s3_put_url'], data=self.page_file)
+
+        logger.info('Validating image {}'.format(self.image['id']))
+        ark_client.request('PartialUpdateImage', id=self.image['id'], body={'status': 'checked'})
+
+    def create_page(self):
+        """Create a page in Arkindex with S3 images."""
+        logger.info('Creating page {}'.format(self.page_path))
+        self.page = ark_client.request('CreateElement', body={
+            'corpus': self.corpus_id,
+            'type': 'page',
+            'parent': self.volume_id,
+            'name': self.page_name,
+            'image': self.image['id'],
+            'metadata': {
+                'folio': str(self.index),
+            },
+        })
+
+    def run(self):
+        """Run the import of a local page."""
+        try:
+            self.upload_image()
+            self.create_page()
+        except AssertionError as e:
+            logger.error('Failed importing page {}: {}'.format(self.page_path, e))
+        except ErrorResponse as e:
+            logger.error('Failed importing page {}: {} - {}'.format(
+                self.page_path, e.status_code, e.content))
+
+
+class LocalVolume():
+    """Volume for a local directory."""
+
+    def __init__(self, directory=None, corpus=None, volume_name=None):
+        """Init a volume with a local directory."""
+        self.local_dir = directory
+        self.volume_name = volume_name
+        self.corpus_id = str(corpus)
+
+    def create_volume(self):
+        """Create the corresponding volume in Arkindex."""
+        logger.info('Creating volume {}'.format(self.volume_name))
+        self.volume = ark_client.request('CreateElement', body={
+            'corpus': self.corpus_id,
+            'type': 'volume',
+            'name': self.volume_name,
+        })
+
+    def run(self):
+        """Run the local directory import."""
+        try:
+            self.create_volume()
+            idx = 1
+            for page_path in glob.glob(os.path.join(self.local_dir, '*')):
+                # Check that the file is an image
+                if imghdr.what(page_path) in SUPPORTED_IMG:
+                    LocalPage(
+                        page_path,
+                        corpus_id=self.corpus_id,
+                        volume_id=self.volume['id'],
+                        index=idx,
+                    ).run()
+                    idx+=1
+                else:
+                    logger.info('Skip non image file {}'.format(page_path))
+        except AssertionError as e:
+            logger.error('Failed importing volume {}: {}'.format(self.volume_name, e))
+        except ErrorResponse as e:
+            logger.error('Failed importing volume {}: {} - {}'.format(self.volume_name, e.status_code, e.content))
+
+
+def main():
+    """Collect arguments and run."""
+    parser = argparse.ArgumentParser(
+        description='Import local files from directory',
+    )
+    parser.add_argument(
+        'directory',
+        help='path to local directory to import',
+    )
+    parser.add_argument(
+        '--volume-name',
+        help='name of the volume to be created',
+        required=True,
+    )
+    parser.add_argument(
+        '--corpus',
+        help='UUID of an existing corpus to import into',
+        type=uuid.UUID,
+        required=True,
+    )
+    parser.add_argument(
+        '--sleep',
+        help='Throttle API requests by waiting for a given number of seconds',
+        type=float,
+        default=0,
+    )
+    args = vars(parser.parse_args())
+
+
+    ark_client.configure(sleep=args.pop('sleep'), **options_from_env())
+
+    LocalVolume(**args).run()
+
+
+if __name__ == '__main__':
+    main()
-- 
GitLab