From 7a014457b1e3bcb1bbd5c0b17891b950b2e294bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lesage?= <tlesage@teklia.com> Date: Tue, 26 Mar 2024 14:25:12 +0100 Subject: [PATCH] Remove thumbnail generation from process --- .../documents/tests/test_retrieve_elements.py | 1 - arkindex/process/builder.py | 22 ---------- ...0032_remove_process_generate_thumbnails.py | 17 ++++++++ arkindex/process/models.py | 3 -- arkindex/process/serializers/imports.py | 9 ----- arkindex/process/tests/test_create_process.py | 40 ------------------- arkindex/process/tests/test_processes.py | 13 ++---- .../process_elements_filter_ml_class.sql | 1 - .../process_elements_filter_type.sql | 1 - .../process_elements_top_level.sql | 1 - .../process_elements_with_image.sql | 1 - 11 files changed, 20 insertions(+), 89 deletions(-) create mode 100644 arkindex/process/migrations/0032_remove_process_generate_thumbnails.py diff --git a/arkindex/documents/tests/test_retrieve_elements.py b/arkindex/documents/tests/test_retrieve_elements.py index 5ad132151a..6556490cbf 100644 --- a/arkindex/documents/tests/test_retrieve_elements.py +++ b/arkindex/documents/tests/test_retrieve_elements.py @@ -155,7 +155,6 @@ class TestRetrieveElements(FixtureAPITestCase): process = Process.objects.create( mode=ProcessMode.Workers, creator=self.user, - generate_thumbnails=True, farm=Farm.objects.first(), corpus=self.corpus ) diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py index 6a122eeb07..74619b170e 100644 --- a/arkindex/process/builder.py +++ b/arkindex/process/builder.py @@ -62,24 +62,6 @@ class ProcessBuilder(object): for parent_slug in parent_slugs ) + 1 - def _add_thumbnails(self, import_task_slug: str) -> None: - """ - Automatically adds a thumbnail generation task at the end of each distributed chunk - If a specific parent slug is provided, attach the thumbnails generation task to it. - """ - if not self.process.generate_thumbnails: - return - - chunks = self._get_elements_json_chunks() - for index, chunk in enumerate(chunks, start=1): - slug = "thumbnails" - if len(chunks) > 1: - slug += f"_{index}" - elements_path = shlex.quote(path.join("/data", import_task_slug, chunk)) - command = f"python3 -m arkindex_tasks.generate_thumbnails {elements_path}" - self._build_task(command=command, slug=slug, env=self.base_env) - self.tasks_parents[slug].append(import_task_slug) - def _build_task( self, command, @@ -204,8 +186,6 @@ class ProcessBuilder(object): def validate_dataset(self) -> None: self.validate_gpu_requirement() self.validate_archived() - if self.process.generate_thumbnails: - raise ValidationError("Thumbnails generation is incompatible with dataset mode processes.") def validate(self) -> None: """ @@ -275,8 +255,6 @@ class ProcessBuilder(object): slug=import_task_slug, env=self.base_env, ) - # Add thumbnails - self._add_thumbnails(import_task_slug=import_task_slug) # Distribute worker run tasks worker_runs = list(self.process.worker_runs.all()) diff --git a/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py b/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py new file mode 100644 index 0000000000..6b9de73864 --- /dev/null +++ b/arkindex/process/migrations/0032_remove_process_generate_thumbnails.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1.7 on 2024-03-26 08:33 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('process', '0031_process_corpus_check_and_remove_revision_field'), + ] + + operations = [ + migrations.RemoveField( + model_name='process', + name='generate_thumbnails', + ), + ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index ba269d422b..94d0412029 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -135,9 +135,6 @@ class Process(IndexableModel): # Used to include sub-elements of the actual query in Workers processes load_children = models.BooleanField(default=False) - # Whether or not to add a thumbnail generation task - generate_thumbnails = models.BooleanField(default=False) - chunks = models.PositiveIntegerField( default=1, validators=[ diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py index 349781accf..d300e3cf21 100644 --- a/arkindex/process/serializers/imports.py +++ b/arkindex/process/serializers/imports.py @@ -258,9 +258,6 @@ class FilesProcessSerializer(serializers.ModelSerializer): # Automatically set the creator on the process creator = serializers.HiddenField(default=serializers.CurrentUserDefault()) - # Always start the process with thumbnail generation enabled - generate_thumbnails = serializers.HiddenField(default=True) - default_error_messages = { "mode_not_allowed": "This mode is not allowed when importing from files", "files_required": "At least one file is required to start an import process", @@ -287,7 +284,6 @@ class FilesProcessSerializer(serializers.ModelSerializer): "element_type", "farm_id", "creator", - "generate_thumbnails", ) def validate_mode(self, mode): @@ -389,7 +385,6 @@ class StartProcessSerializer(serializers.Serializer): validators=[MaxValueValidator(lambda: settings.MAX_CHUNKS)], default=1, ) - thumbnails = serializers.BooleanField(default=False, source="generate_thumbnails") farm = serializers.PrimaryKeyRelatedField(queryset=Farm.objects.all(), default=None, allow_null=True) use_cache = serializers.BooleanField(default=False) use_gpu = serializers.BooleanField(default=False, allow_null=True) @@ -421,8 +416,6 @@ class StartProcessSerializer(serializers.Serializer): elif not any(ds.dataset.corpus_id == self.instance.corpus.id for ds in self.instance.sets.all()): errors["non_field_errors"].append("At least one of the process sets must be from the same corpus as the process.") - if validated_data.get("generate_thumbnails"): - errors["thumbnails"].append("Thumbnails generation is not supported on Dataset processes.") if validated_data.get("worker_activity"): errors["worker_activity"].append("Worker activities are not supported on Dataset processes.") if validated_data.get("use_cache"): @@ -487,8 +480,6 @@ class StartProcessSerializer(serializers.Serializer): errors["use_cache"].append("The process must have workers attached to use cached results.") if validated_data.get("use_gpu"): errors["use_gpu"].append("The process must have workers attached to use GPUs.") - if not validated_data.get("generate_thumbnails"): - errors["__all__"].append("The process must either use thumbnail generation or have worker runs.") if errors: raise ValidationError(errors) diff --git a/arkindex/process/tests/test_create_process.py b/arkindex/process/tests/test_create_process.py index e64131810d..8fdc26705f 100644 --- a/arkindex/process/tests/test_create_process.py +++ b/arkindex/process/tests/test_create_process.py @@ -533,46 +533,6 @@ class TestCreateProcess(FixtureAPITestCase): process = Process.objects.get(id=data["id"]) self.assertEqual(process.name, "blah") - def test_thumbnails_generation_only(self): - """ - Generating thumbnails without any worker must generate a process task - and a thumbnails task, when the process is started - """ - self.client.force_login(self.user) - corpus_type = self.corpus.types.first() - response = self.client.post( - reverse("api:corpus-process"), - { - "corpus": str(self.corpus.id), - "element_type": corpus_type.slug, - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - process = Process.objects.get(id=data["id"]) - self.assertFalse(process.tasks.exists()) - - response = self.client.post( - reverse("api:process-start", kwargs={"pk": str(process.id)}), - { - "chunks": 3, - "thumbnails": True, - } - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - process.refresh_from_db() - - self.assertEqual(process.tasks.count(), 4) - init_task = process.tasks.get(slug="initialisation") - self.assertEqual(init_task.command, f"python -m arkindex_tasks.init_elements {process.id} --chunks-number 3") - - for i in range(1, 4): - self.assertEqual( - process.tasks.get(slug=f"thumbnails_{i}").command, - f"python3 -m arkindex_tasks.generate_thumbnails /data/initialisation/elements_chunk_{i}.json" - ) - @override_settings( ARKINDEX_TASKS_IMAGE="registry.teklia.com/tasks", PONOS_DEFAULT_ENV={} diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py index 84d6dcb925..576979a868 100644 --- a/arkindex/process/tests/test_processes.py +++ b/arkindex/process/tests/test_processes.py @@ -2168,7 +2168,7 @@ class TestProcesses(FixtureAPITestCase): self.assertEqual( response.json(), - {"__all__": ["The process must either use thumbnail generation or have worker runs."]}, + {"__all__": ["The process must have worker runs."]}, ) def test_start_process_unavailable_worker_version(self): @@ -2355,7 +2355,6 @@ class TestProcesses(FixtureAPITestCase): response = self.client.post( reverse("api:process-start", kwargs={"pk": str(process2.id)}), { - "thumbnails": True, "use_cache": True, "worker_activity": True, } @@ -2363,7 +2362,6 @@ class TestProcesses(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "thumbnails": ["Thumbnails generation is not supported on Dataset processes."], "use_cache": ["Caching is not supported on Dataset processes."], "worker_activity": ["Worker activities are not supported on Dataset processes."], }) @@ -2509,7 +2507,6 @@ class TestProcesses(FixtureAPITestCase): ({"chunks": 0}, {"chunks": ["Ensure this value is greater than or equal to 1."]}), ({"chunks": 20}, {"chunks": ["Ensure this value is less than or equal to 10."]}), ({"chunks": "max"}, {"chunks": ["A valid integer is required."]}), - ({"thumbnails": "gloubiboulga"}, {"thumbnails": ["Must be a valid boolean."]}) ] for (params, check) in wrong_params_checks: with self.subTest(**params), self.assertNumQueries(5): @@ -2539,7 +2536,7 @@ class TestProcesses(FixtureAPITestCase): def test_start_process_workers_parameters(self): """ - It should be possible to pass chunks and thumbnails parameters when starting a workers process + It should be possible to pass chunks parameters when starting a workers process """ process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Workers) # Add a worker run to this process @@ -2548,7 +2545,7 @@ class TestProcesses(FixtureAPITestCase): self.client.force_login(self.user) response = self.client.post( reverse("api:process-start", kwargs={"pk": str(process.id)}), - {"chunks": 3, "thumbnails": "true"} + {"chunks": 3} ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) process.refresh_from_db() @@ -2558,9 +2555,6 @@ class TestProcesses(FixtureAPITestCase): f"{run.task_slug}_1", f"{run.task_slug}_2", f"{run.task_slug}_3", - "thumbnails_1", - "thumbnails_2", - "thumbnails_3" ]) def test_start_process_dataset_chunks(self): @@ -2608,7 +2602,6 @@ class TestProcesses(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "__all__": ["The process must either use thumbnail generation or have worker runs."], "use_cache": ["The process must have workers attached to use cached results."], "worker_activity": ["The process must have workers attached to handle their activity."], "use_gpu": ["The process must have workers attached to use GPUs."], diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql index b8169c5012..773198cab0 100644 --- a/arkindex/sql_validation/process_elements_filter_ml_class.sql +++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql @@ -29,7 +29,6 @@ SELECT "process_process"."id", "process_process"."name_contains", "process_process"."ml_class_id", "process_process"."load_children", - "process_process"."generate_thumbnails", "process_process"."chunks", "process_process"."use_cache", "process_process"."use_gpu", diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql index a0413952bd..6710d5e0f8 100644 --- a/arkindex/sql_validation/process_elements_filter_type.sql +++ b/arkindex/sql_validation/process_elements_filter_type.sql @@ -29,7 +29,6 @@ SELECT "process_process"."id", "process_process"."name_contains", "process_process"."ml_class_id", "process_process"."load_children", - "process_process"."generate_thumbnails", "process_process"."chunks", "process_process"."use_cache", "process_process"."use_gpu", diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql index 68df166d1a..3087532fd0 100644 --- a/arkindex/sql_validation/process_elements_top_level.sql +++ b/arkindex/sql_validation/process_elements_top_level.sql @@ -29,7 +29,6 @@ SELECT "process_process"."id", "process_process"."name_contains", "process_process"."ml_class_id", "process_process"."load_children", - "process_process"."generate_thumbnails", "process_process"."chunks", "process_process"."use_cache", "process_process"."use_gpu", diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql index ec4b65bf47..9d89dc7c0e 100644 --- a/arkindex/sql_validation/process_elements_with_image.sql +++ b/arkindex/sql_validation/process_elements_with_image.sql @@ -29,7 +29,6 @@ SELECT "process_process"."id", "process_process"."name_contains", "process_process"."ml_class_id", "process_process"."load_children", - "process_process"."generate_thumbnails", "process_process"."chunks", "process_process"."use_cache", "process_process"."use_gpu", -- GitLab