From 0e3d4668cdbffdbd5fb59528f2a470cf8ea7c7f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Lesage?= <tlesage@teklia.com>
Date: Tue, 26 Mar 2024 14:25:12 +0100
Subject: [PATCH] Remove thumbnail generation from process

---
 .../documents/tests/test_retrieve_elements.py |  1 -
 arkindex/process/builder.py                   | 22 ----------
 ...0032_remove_process_generate_thumbnails.py | 17 ++++++++
 arkindex/process/models.py                    |  3 --
 arkindex/process/serializers/imports.py       |  9 -----
 arkindex/process/tests/test_create_process.py | 40 -------------------
 arkindex/process/tests/test_processes.py      | 13 ++----
 .../process_elements_filter_ml_class.sql      |  1 -
 .../process_elements_filter_type.sql          |  1 -
 .../process_elements_top_level.sql            |  1 -
 .../process_elements_with_image.sql           |  1 -
 11 files changed, 20 insertions(+), 89 deletions(-)
 create mode 100644 arkindex/process/migrations/0032_remove_process_generate_thumbnails.py

diff --git a/arkindex/documents/tests/test_retrieve_elements.py b/arkindex/documents/tests/test_retrieve_elements.py
index 5ad132151a..6556490cbf 100644
--- a/arkindex/documents/tests/test_retrieve_elements.py
+++ b/arkindex/documents/tests/test_retrieve_elements.py
@@ -155,7 +155,6 @@ class TestRetrieveElements(FixtureAPITestCase):
         process = Process.objects.create(
             mode=ProcessMode.Workers,
             creator=self.user,
-            generate_thumbnails=True,
             farm=Farm.objects.first(),
             corpus=self.corpus
         )
diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py
index 6a122eeb07..74619b170e 100644
--- a/arkindex/process/builder.py
+++ b/arkindex/process/builder.py
@@ -62,24 +62,6 @@ class ProcessBuilder(object):
             for parent_slug in parent_slugs
         ) + 1
 
-    def _add_thumbnails(self, import_task_slug: str) -> None:
-        """
-        Automatically adds a thumbnail generation task at the end of each distributed chunk
-        If a specific parent slug is provided, attach the thumbnails generation task to it.
-        """
-        if not self.process.generate_thumbnails:
-            return
-
-        chunks = self._get_elements_json_chunks()
-        for index, chunk in enumerate(chunks, start=1):
-            slug = "thumbnails"
-            if len(chunks) > 1:
-                slug += f"_{index}"
-            elements_path = shlex.quote(path.join("/data", import_task_slug, chunk))
-            command = f"python3 -m arkindex_tasks.generate_thumbnails {elements_path}"
-            self._build_task(command=command, slug=slug, env=self.base_env)
-            self.tasks_parents[slug].append(import_task_slug)
-
     def _build_task(
         self,
         command,
@@ -204,8 +186,6 @@ class ProcessBuilder(object):
     def validate_dataset(self) -> None:
         self.validate_gpu_requirement()
         self.validate_archived()
-        if self.process.generate_thumbnails:
-            raise ValidationError("Thumbnails generation is incompatible with dataset mode processes.")
 
     def validate(self) -> None:
         """
@@ -275,8 +255,6 @@ class ProcessBuilder(object):
             slug=import_task_slug,
             env=self.base_env,
         )
-        # Add thumbnails
-        self._add_thumbnails(import_task_slug=import_task_slug)
 
         # Distribute worker run tasks
         worker_runs = list(self.process.worker_runs.all())
diff --git a/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py b/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py
new file mode 100644
index 0000000000..6b9de73864
--- /dev/null
+++ b/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py
@@ -0,0 +1,17 @@
+# Generated by Django 4.1.7 on 2024-03-26 08:33
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('process', '0031_process_corpus_check_and_remove_revision_field'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='process',
+            name='generate_thumbnails',
+        ),
+    ]
diff --git a/arkindex/process/models.py b/arkindex/process/models.py
index e32dd410fe..4498aabef6 100644
--- a/arkindex/process/models.py
+++ b/arkindex/process/models.py
@@ -135,9 +135,6 @@ class Process(IndexableModel):
     # Used to include sub-elements of the actual query in Workers processes
     load_children = models.BooleanField(default=False)
 
-    # Whether or not to add a thumbnail generation task
-    generate_thumbnails = models.BooleanField(default=False)
-
     chunks = models.PositiveIntegerField(
         default=1,
         validators=[
diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py
index bc071b1f72..a0a56d1430 100644
--- a/arkindex/process/serializers/imports.py
+++ b/arkindex/process/serializers/imports.py
@@ -258,9 +258,6 @@ class FilesProcessSerializer(serializers.ModelSerializer):
     # Automatically set the creator on the process
     creator = serializers.HiddenField(default=serializers.CurrentUserDefault())
 
-    # Always start the process with thumbnail generation enabled
-    generate_thumbnails = serializers.HiddenField(default=True)
-
     default_error_messages = {
         "mode_not_allowed": "This mode is not allowed when importing from files",
         "files_required": "At least one file is required to start an import process",
@@ -287,7 +284,6 @@ class FilesProcessSerializer(serializers.ModelSerializer):
             "element_type",
             "farm_id",
             "creator",
-            "generate_thumbnails",
         )
 
     def validate_mode(self, mode):
@@ -389,7 +385,6 @@ class StartProcessSerializer(serializers.Serializer):
         validators=[MaxValueValidator(lambda: settings.MAX_CHUNKS)],
         default=1,
     )
-    thumbnails = serializers.BooleanField(default=False, source="generate_thumbnails")
     farm = serializers.PrimaryKeyRelatedField(queryset=Farm.objects.all(), default=None, allow_null=True)
     use_cache = serializers.BooleanField(default=False)
     use_gpu = serializers.BooleanField(default=False, allow_null=True)
@@ -421,8 +416,6 @@ class StartProcessSerializer(serializers.Serializer):
             elif not any(dataset.corpus_id == self.instance.corpus.id for dataset in self.instance.datasets.all()):
                 errors["non_field_errors"].append("At least one of the process datasets must be from the same corpus as the process.")
 
-            if validated_data.get("generate_thumbnails"):
-                errors["thumbnails"].append("Thumbnails generation is not supported on Dataset processes.")
             if validated_data.get("worker_activity"):
                 errors["worker_activity"].append("Worker activities are not supported on Dataset processes.")
             if validated_data.get("use_cache"):
@@ -487,8 +480,6 @@ class StartProcessSerializer(serializers.Serializer):
                 errors["use_cache"].append("The process must have workers attached to use cached results.")
             if validated_data.get("use_gpu"):
                 errors["use_gpu"].append("The process must have workers attached to use GPUs.")
-            if not validated_data.get("generate_thumbnails"):
-                errors["__all__"].append("The process must either use thumbnail generation or have worker runs.")
 
         if errors:
             raise ValidationError(errors)
diff --git a/arkindex/process/tests/test_create_process.py b/arkindex/process/tests/test_create_process.py
index 59e454700c..1237df5d66 100644
--- a/arkindex/process/tests/test_create_process.py
+++ b/arkindex/process/tests/test_create_process.py
@@ -533,46 +533,6 @@ class TestCreateProcess(FixtureAPITestCase):
         process = Process.objects.get(id=data["id"])
         self.assertEqual(process.name, "blah")
 
-    def test_thumbnails_generation_only(self):
-        """
-        Generating thumbnails without any worker must generate a process task
-        and a thumbnails task, when the process is started
-        """
-        self.client.force_login(self.user)
-        corpus_type = self.corpus.types.first()
-        response = self.client.post(
-            reverse("api:corpus-process"),
-            {
-                "corpus": str(self.corpus.id),
-                "element_type": corpus_type.slug,
-            },
-            format="json"
-        )
-        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        data = response.json()
-        process = Process.objects.get(id=data["id"])
-        self.assertFalse(process.tasks.exists())
-
-        response = self.client.post(
-            reverse("api:process-start", kwargs={"pk": str(process.id)}),
-            {
-                "chunks": 3,
-                "thumbnails": True,
-            }
-        )
-        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        process.refresh_from_db()
-
-        self.assertEqual(process.tasks.count(), 4)
-        init_task = process.tasks.get(slug="initialisation")
-        self.assertEqual(init_task.command, f"python -m arkindex_tasks.init_elements {process.id} --chunks-number 3")
-
-        for i in range(1, 4):
-            self.assertEqual(
-                process.tasks.get(slug=f"thumbnails_{i}").command,
-                f"python3 -m arkindex_tasks.generate_thumbnails /data/initialisation/elements_chunk_{i}.json"
-            )
-
     @override_settings(
         ARKINDEX_TASKS_IMAGE="registry.teklia.com/tasks",
         PONOS_DEFAULT_ENV={}
diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py
index b897d3250d..daec192f38 100644
--- a/arkindex/process/tests/test_processes.py
+++ b/arkindex/process/tests/test_processes.py
@@ -2167,7 +2167,7 @@ class TestProcesses(FixtureAPITestCase):
 
         self.assertEqual(
             response.json(),
-            {"__all__": ["The process must either use thumbnail generation or have worker runs."]},
+            {"__all__": ["The process must have worker runs."]},
         )
 
     def test_start_process_unavailable_worker_version(self):
@@ -2351,7 +2351,6 @@ class TestProcesses(FixtureAPITestCase):
             response = self.client.post(
                 reverse("api:process-start", kwargs={"pk": str(process2.id)}),
                 {
-                    "thumbnails": True,
                     "use_cache": True,
                     "worker_activity": True,
                 }
@@ -2359,7 +2358,6 @@ class TestProcesses(FixtureAPITestCase):
             self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
 
         self.assertDictEqual(response.json(), {
-            "thumbnails": ["Thumbnails generation is not supported on Dataset processes."],
             "use_cache": ["Caching is not supported on Dataset processes."],
             "worker_activity": ["Worker activities are not supported on Dataset processes."],
         })
@@ -2503,7 +2501,6 @@ class TestProcesses(FixtureAPITestCase):
             ({"chunks": 0}, {"chunks": ["Ensure this value is greater than or equal to 1."]}),
             ({"chunks": 20}, {"chunks": ["Ensure this value is less than or equal to 10."]}),
             ({"chunks": "max"}, {"chunks": ["A valid integer is required."]}),
-            ({"thumbnails": "gloubiboulga"}, {"thumbnails": ["Must be a valid boolean."]})
         ]
         for (params, check) in wrong_params_checks:
             with self.subTest(**params), self.assertNumQueries(5):
@@ -2533,7 +2530,7 @@ class TestProcesses(FixtureAPITestCase):
 
     def test_start_process_workers_parameters(self):
         """
-        It should be possible to pass chunks and thumbnails parameters when starting a workers process
+        It should be possible to pass chunks parameters when starting a workers process
         """
         process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Workers)
         # Add a worker run to this process
@@ -2542,7 +2539,7 @@ class TestProcesses(FixtureAPITestCase):
         self.client.force_login(self.user)
         response = self.client.post(
             reverse("api:process-start", kwargs={"pk": str(process.id)}),
-            {"chunks": 3, "thumbnails": "true"}
+            {"chunks": 3}
         )
         self.assertEqual(response.status_code, status.HTTP_201_CREATED)
         process.refresh_from_db()
@@ -2552,9 +2549,6 @@ class TestProcesses(FixtureAPITestCase):
             f"{run.task_slug}_1",
             f"{run.task_slug}_2",
             f"{run.task_slug}_3",
-            "thumbnails_1",
-            "thumbnails_2",
-            "thumbnails_3"
         ])
 
     def test_start_process_dataset_chunks(self):
@@ -2600,7 +2594,6 @@ class TestProcesses(FixtureAPITestCase):
             )
         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertDictEqual(response.json(), {
-            "__all__": ["The process must either use thumbnail generation or have worker runs."],
             "use_cache": ["The process must have workers attached to use cached results."],
             "worker_activity": ["The process must have workers attached to handle their activity."],
             "use_gpu": ["The process must have workers attached to use GPUs."],
diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql
index b8169c5012..773198cab0 100644
--- a/arkindex/sql_validation/process_elements_filter_ml_class.sql
+++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql
@@ -29,7 +29,6 @@ SELECT "process_process"."id",
        "process_process"."name_contains",
        "process_process"."ml_class_id",
        "process_process"."load_children",
-       "process_process"."generate_thumbnails",
        "process_process"."chunks",
        "process_process"."use_cache",
        "process_process"."use_gpu",
diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql
index a0413952bd..6710d5e0f8 100644
--- a/arkindex/sql_validation/process_elements_filter_type.sql
+++ b/arkindex/sql_validation/process_elements_filter_type.sql
@@ -29,7 +29,6 @@ SELECT "process_process"."id",
        "process_process"."name_contains",
        "process_process"."ml_class_id",
        "process_process"."load_children",
-       "process_process"."generate_thumbnails",
        "process_process"."chunks",
        "process_process"."use_cache",
        "process_process"."use_gpu",
diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql
index 68df166d1a..3087532fd0 100644
--- a/arkindex/sql_validation/process_elements_top_level.sql
+++ b/arkindex/sql_validation/process_elements_top_level.sql
@@ -29,7 +29,6 @@ SELECT "process_process"."id",
        "process_process"."name_contains",
        "process_process"."ml_class_id",
        "process_process"."load_children",
-       "process_process"."generate_thumbnails",
        "process_process"."chunks",
        "process_process"."use_cache",
        "process_process"."use_gpu",
diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql
index ec4b65bf47..9d89dc7c0e 100644
--- a/arkindex/sql_validation/process_elements_with_image.sql
+++ b/arkindex/sql_validation/process_elements_with_image.sql
@@ -29,7 +29,6 @@ SELECT "process_process"."id",
        "process_process"."name_contains",
        "process_process"."ml_class_id",
        "process_process"."load_children",
-       "process_process"."generate_thumbnails",
        "process_process"."chunks",
        "process_process"."use_cache",
        "process_process"."use_gpu",
-- 
GitLab