From 6460ef2d0a12318cfb99cd07e1ff2fdce0acfd1c Mon Sep 17 00:00:00 2001
From: mlbonhomme <bonhomme@teklia.com>
Date: Tue, 14 Nov 2023 11:26:32 +0100
Subject: [PATCH] Add setting to prefix or not image paths with bucket name in
 s3 import

---
 arkindex/process/builder.py                         | 2 ++
 arkindex/process/tests/test_create_s3_import.py     | 9 ++++++---
 arkindex/project/config.py                          | 1 +
 arkindex/project/settings.py                        | 1 +
 arkindex/project/tests/config_samples/defaults.yaml | 1 +
 arkindex/project/tests/config_samples/override.yaml | 1 +
 6 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py
index 376d4b3d5b..76458142ce 100644
--- a/arkindex/process/builder.py
+++ b/arkindex/process/builder.py
@@ -319,6 +319,8 @@ class ProcessBuilder(object):
             f' --page-type={shlex.quote(self.process.element_type.slug)}'
             f' --iiif-base-url={shlex.quote(ImageServer.objects.ingest.url)}'
         )
+        if settings.INGEST_PREFIX_BY_BUCKET_NAME:
+            command += ' --bucket-prefix'
         if self.process.prefix:
             command += f' --prefix={shlex.quote(self.process.prefix)}'
         if self.process.element:
diff --git a/arkindex/process/tests/test_create_s3_import.py b/arkindex/process/tests/test_create_s3_import.py
index b8c9565e40..1230c71934 100644
--- a/arkindex/process/tests/test_create_s3_import.py
+++ b/arkindex/process/tests/test_create_s3_import.py
@@ -124,6 +124,7 @@ class TestCreateS3Import(FixtureTestCase):
         INGEST_S3_REGION=None,
         INGEST_S3_ACCESS_KEY='🔑',
         INGEST_S3_SECRET_KEY='its-secret-i-wont-tell-you',
+        INGEST_PREFIX_BY_BUCKET_NAME=True,
     )
     def test_create(self):
         self.user.user_scopes.create(scope=Scope.S3Ingest)
@@ -166,8 +167,8 @@ class TestCreateS3Import(FixtureTestCase):
         self.assertEqual(task.image, 'arkindex-tasks-image')
         self.assertEqual(task.command, f'python -m arkindex_tasks.import_s3 --corpus={self.corpus.id} '
                                        '--bucket=blah --folder-type=volume --page-type=page '
-                                       '--iiif-base-url=https://dev.null.teklia.com --prefix=a/b/c '
-                                       f'--element={element.id}')
+                                       '--iiif-base-url=https://dev.null.teklia.com --bucket-prefix '
+                                       f'--prefix=a/b/c --element={element.id}')
         self.assertDictEqual(task.env, {
             'ARKINDEX_CORPUS_ID': str(self.corpus.id),
             'ARKINDEX_PROCESS_ID': str(process.id),
@@ -187,6 +188,7 @@ class TestCreateS3Import(FixtureTestCase):
         INGEST_S3_ACCESS_KEY='🔑',
         INGEST_S3_SECRET_KEY='its-secret-i-wont-tell-you',
         IMPORTS_WORKER_VERSION='aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
+        INGEST_PREFIX_BY_BUCKET_NAME=True,
     )
     def test_defaults(self):
         self.user.user_scopes.create(scope=Scope.S3Ingest)
@@ -225,7 +227,8 @@ class TestCreateS3Import(FixtureTestCase):
         self.assertEqual(task.image, 'arkindex-tasks-image')
         self.assertEqual(task.command, f'python -m arkindex_tasks.import_s3 --corpus={self.corpus.id} '
                                        '--bucket=blah --folder-type=folder --page-type=page '
-                                       '--iiif-base-url=https://dev.null.teklia.com')
+                                       '--iiif-base-url=https://dev.null.teklia.com '
+                                       '--bucket-prefix')
         self.assertDictEqual(task.env, {
             'ARKINDEX_CORPUS_ID': str(self.corpus.id),
             'ARKINDEX_PROCESS_ID': str(process.id),
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 9d58928a7d..528cc9ccfc 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -224,6 +224,7 @@ def get_settings_parser(base_dir):
     ingest_parser = add_s3_parser(parser, 'ingest')
     ingest_parser.add_option('imageserver_id', type=int, default=None)
     ingest_parser.add_option('extra_buckets', type=str, many=True, default=[])
+    ingest_parser.add_option('prefix_by_bucket_name', type=bool, default=True)
 
     license_parser = parser.add_subparser('license', default={})
     license_parser.add_option('key', type=str, default=None)
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index c1706deb60..7339c27935 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -497,6 +497,7 @@ INGEST_S3_ENDPOINT = conf['ingest']['endpoint']
 INGEST_S3_REGION = conf['ingest']['region']
 INGEST_IMAGESERVER_ID = conf['ingest']['imageserver_id']
 INGEST_EXTRA_BUCKETS = conf['ingest']['extra_buckets']
+INGEST_PREFIX_BY_BUCKET_NAME = conf['ingest']['prefix_by_bucket_name']
 
 # Ponos integration
 _ponos_env = {
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 152282022b..aee19f5bcf 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -48,6 +48,7 @@ ingest:
   endpoint: null
   extra_buckets: []
   imageserver_id: null
+  prefix_by_bucket_name: true
   region: null
   secret_access_key: null
 internal_group_id: 2
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index db5fb2a2e8..fd8c571f31 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -62,6 +62,7 @@ ingest:
   - a
   - b
   imageserver_id: 999
+  prefix_by_bucket_name: false
   region: middle-earth-1
   secret_access_key: hunter2
 internal_group_id: 4
-- 
GitLab