From 5e747558842e66eaa94ff02c90d858c18295b276 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 8 Nov 2024 08:15:13 +0000 Subject: [PATCH] Corpus and instance-wide maximum task TTL --- .../0013_corpus_maximum_task_ttl.py | 22 +++++++ arkindex/documents/models.py | 11 ++++ arkindex/documents/serializers/elements.py | 8 +++ arkindex/documents/tests/test_corpus.py | 57 +++++++++++++++---- arkindex/project/config.py | 1 + arkindex/project/settings.py | 1 + .../tests/config_samples/defaults.yaml | 1 + .../project/tests/config_samples/errors.yaml | 1 + .../tests/config_samples/expected_errors.yaml | 1 + .../tests/config_samples/override.yaml | 1 + arkindex/sql_validation/corpus_delete.sql | 3 +- .../corpus_delete_top_level_type.sql | 3 +- .../sql_validation/corpus_rights_filter.sql | 1 + .../corpus_rights_filter_public.sql | 2 + arkindex/sql_validation/list_elements.sql | 3 +- .../process_elements_filter_ml_class.sql | 3 +- .../process_elements_filter_type.sql | 3 +- .../process_elements_top_level.sql | 3 +- .../process_elements_with_image.sql | 3 +- 19 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py diff --git a/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py b/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py new file mode 100644 index 0000000000..d5fc5fba74 --- /dev/null +++ b/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.8 on 2024-11-06 11:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("documents", "0012_alter_transcriptionentity_id"), + ] + + operations = [ + migrations.AddField( + model_name="corpus", + name="maximum_task_ttl", + field=models.PositiveIntegerField( + blank=True, + help_text="Maximum time-to-live (expressed in seconds) for any WorkerRun created in this corpus. 0 means infinite. When not set, the instance-wide maximum time-to-live will be used instead.", + null=True, + ), + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index e6eb2bc4a6..0353cd106e 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -52,12 +52,23 @@ class Corpus(IndexableModel): # Is this corpus indexable ? indexable = models.BooleanField(default=False) + maximum_task_ttl = models.PositiveIntegerField( + blank=True, + null=True, + help_text="Maximum time-to-live (expressed in seconds) for any WorkerRun created in this corpus. 0 means infinite. " + "When not set, the instance-wide maximum time-to-live will be used instead.", + ) + # Specific manager for ACL objects = CorpusManager() class Meta: verbose_name_plural = "corpora" + @property + def applied_maximum_task_ttl(self) -> int: + return settings.PONOS_MAXIMUM_TASK_TTL if self.maximum_task_ttl is None else self.maximum_task_ttl + def __str__(self): return self.name diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index 2125a6b089..ee9ec29171 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -263,6 +263,13 @@ class CorpusSerializer(serializers.ModelSerializer): read_only=True, help_text="Number of users or groups that have been granted access rights on this corpus.", ) + maximum_task_ttl = serializers.IntegerField( + min_value=0, + read_only=True, + # Use the actually applied TTL, which defaults to the instance-wide setting and is never null + source="applied_maximum_task_ttl", + help_text="Maximum time-to-live for any WorkerRun created in this corpus, expressed in seconds. `0` means infinite.", + ) class Meta: model = Corpus @@ -277,6 +284,7 @@ class CorpusSerializer(serializers.ModelSerializer): "created", "authorized_users", "indexable", + "maximum_task_ttl", ) extra_kwargs = { "public": { diff --git a/arkindex/documents/tests/test_corpus.py b/arkindex/documents/tests/test_corpus.py index b0953e78a7..805447a33f 100644 --- a/arkindex/documents/tests/test_corpus.py +++ b/arkindex/documents/tests/test_corpus.py @@ -4,6 +4,7 @@ from unittest.mock import call, patch from uuid import uuid4 from django.contrib.auth.models import AnonymousUser +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -56,6 +57,7 @@ EXPECTED_CORPUS_TYPES = [ ] +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) class TestCorpus(FixtureAPITestCase): @classmethod @@ -101,6 +103,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 1, "top_level_type": None, + "maximum_task_ttl": 3600, } ] ) @@ -134,6 +137,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 2, "top_level_type": None, + "maximum_task_ttl": 3600, }, { "id": str(self.corpus_public.id), @@ -145,6 +149,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 1, "top_level_type": None, + "maximum_task_ttl": 3600, } ] ) @@ -178,6 +183,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 2, "top_level_type": None, + "maximum_task_ttl": 3600, }, { "id": str(self.corpus_hidden.id), @@ -189,6 +195,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 0, "top_level_type": None, + "maximum_task_ttl": 3600, }, { "id": str(self.corpus_public.id), @@ -200,6 +207,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 1, "top_level_type": None, + "maximum_task_ttl": 3600, } ] ) @@ -347,6 +355,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 1, "top_level_type": None, + "maximum_task_ttl": 3600, }) def test_retrieve(self): @@ -365,6 +374,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 2, "top_level_type": None, + "maximum_task_ttl": 3600, }) def test_retrieve_not_found(self): @@ -394,6 +404,7 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 1, "top_level_type": None, + "maximum_task_ttl": 3600, }) @expectedFailure @@ -422,18 +433,38 @@ class TestCorpus(FixtureAPITestCase): "created": DB_CREATED, "authorized_users": 2, "top_level_type": None, + "maximum_task_ttl": 3600, }) + def test_retrieve_maximum_task_ttl(self): + """ + Setting Corpus.maximum_task_ttl to any value should return it instead of the setting + """ + self.corpus.maximum_task_ttl = 0 + self.corpus.save() + + with self.assertNumQueries(3): + response = self.client.get(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus.id})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + self.assertEqual(response.json()["maximum_task_ttl"], 0) + def test_partial_update(self): self.client.force_login(self.corpus_admin) - response = self.client.patch(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), { - "name": "new name", - "description": "new description", - }) - self.assertEqual(response.status_code, status.HTTP_200_OK) + + with self.assertNumQueries(7): + response = self.client.patch(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), { + "name": "new name", + "description": "new description", + # This field should be ignored + "maximum_task_ttl": 9999, + }) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.corpus_private.refresh_from_db() self.assertEqual(self.corpus_private.name, "new name") self.assertEqual(self.corpus_private.description, "new description") + self.assertIsNone(self.corpus_private.maximum_task_ttl) def test_partial_update_private_to_public_normal_user(self): """ @@ -526,14 +557,20 @@ class TestCorpus(FixtureAPITestCase): def test_update(self): self.client.force_login(self.corpus_admin) - response = self.client.put(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), { - "name": "new name", - "description": "new description", - }) - self.assertEqual(response.status_code, status.HTTP_200_OK) + + with self.assertNumQueries(7): + response = self.client.put(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), { + "name": "new name", + "description": "new description", + # This field should be ignored + "maximum_task_ttl": 9999, + }) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.corpus_private.refresh_from_db() self.assertEqual(self.corpus_private.name, "new name") self.assertEqual(self.corpus_private.description, "new description") + self.assertIsNone(self.corpus_private.maximum_task_ttl) def test_update_required_fields(self): self.client.force_login(self.corpus_admin) diff --git a/arkindex/project/config.py b/arkindex/project/config.py index 84e76f14ff..47a70f3801 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -175,6 +175,7 @@ def get_settings_parser(base_dir): ponos_parser.add_option("artifact_max_size", type=int, default=5 * 1024**3) # Default task expiry delay in days ponos_parser.add_option("task_expiry", type=int, default=30) + ponos_parser.add_option("maximum_task_ttl", type=int, default=3600) ponos_parser.add_option("auto_remove_container", type=bool, default=False) sentry_parser = parser.add_subparser("sentry", default={}) diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 67b66f202f..1b2d950cbc 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -561,6 +561,7 @@ PONOS_DOCKER_AUTO_REMOVE_CONTAINER = conf["ponos"]["auto_remove_container"] # Base data directory for RQ tasks execution (in the docker container) PONOS_DATA_DIR = "/data" PONOS_TASK_EXPIRY = conf["ponos"]["task_expiry"] +PONOS_MAXIMUM_TASK_TTL = conf["ponos"]["maximum_task_ttl"] # Robots.txt options ROBOTS_TXT_DISALLOW = conf["robots_txt_disallow"] diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index 5a3e67754c..d2e108d675 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -67,6 +67,7 @@ ponos: artifact_max_size: 5368709120 auto_remove_container: false default_env: {} + maximum_task_ttl: 3600 task_expiry: 30 public_hostname: https://default.config.arkindex.localhost redis: diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml index 3b5f91fa0b..5925b919da 100644 --- a/arkindex/project/tests/config_samples/errors.yaml +++ b/arkindex/project/tests/config_samples/errors.yaml @@ -50,6 +50,7 @@ ponos: artifact_max_size: .nan auto_remove_container: please default_env: {} + maximum_task_ttl: .inf private_key: /dev/zero task_expiry: zero public_hostname: darkindex.lol diff --git a/arkindex/project/tests/config_samples/expected_errors.yaml b/arkindex/project/tests/config_samples/expected_errors.yaml index f0eaf9603d..b9ec8b030b 100644 --- a/arkindex/project/tests/config_samples/expected_errors.yaml +++ b/arkindex/project/tests/config_samples/expected_errors.yaml @@ -33,6 +33,7 @@ job_timeouts: send_verification_email: "invalid literal for int() with base 10: 'lol'" ponos: artifact_max_size: cannot convert float NaN to integer + maximum_task_ttl: cannot convert float infinity to integer task_expiry: "invalid literal for int() with base 10: 'zero'" public_hostname: The hostname must include an HTTP or HTTPS scheme. redis: diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index 099d11b5ee..7a807a1fef 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -82,6 +82,7 @@ ponos: auto_remove_container: true default_env: A: B + maximum_task_ttl: 3600 task_expiry: 42 public_hostname: https://darkindex.lol redis: diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql index 9fbed056b3..bf5f227130 100644 --- a/arkindex/sql_validation/corpus_delete.sql +++ b/arkindex/sql_validation/corpus_delete.sql @@ -5,7 +5,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql index 712cefb7df..dbbe418e73 100644 --- a/arkindex/sql_validation/corpus_delete_top_level_type.sql +++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql @@ -5,7 +5,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/corpus_rights_filter.sql b/arkindex/sql_validation/corpus_rights_filter.sql index 9122e1515e..6456c7da35 100644 --- a/arkindex/sql_validation/corpus_rights_filter.sql +++ b/arkindex/sql_validation/corpus_rights_filter.sql @@ -22,6 +22,7 @@ SELECT "documents_corpus"."created", "documents_corpus"."top_level_type_id", "documents_corpus"."public", "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl", LEAST("users_right"."level", T5."level") AS "max_level" FROM "documents_corpus" INNER JOIN "users_right" ON ("documents_corpus"."id" = "users_right"."content_id" diff --git a/arkindex/sql_validation/corpus_rights_filter_public.sql b/arkindex/sql_validation/corpus_rights_filter_public.sql index 03bc0851e0..6042778029 100644 --- a/arkindex/sql_validation/corpus_rights_filter_public.sql +++ b/arkindex/sql_validation/corpus_rights_filter_public.sql @@ -23,6 +23,7 @@ LIMIT 21; "documents_corpus"."top_level_type_id", "documents_corpus"."public", "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl", LEAST("users_right"."level", T5."level") AS "max_level" FROM "documents_corpus" INNER JOIN "users_right" ON ("documents_corpus"."id" = "users_right"."content_id" @@ -42,6 +43,7 @@ UNION "documents_corpus"."top_level_type_id", "documents_corpus"."public", "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl", 10 AS "max_level" FROM "documents_corpus" WHERE "documents_corpus"."public") diff --git a/arkindex/sql_validation/list_elements.sql b/arkindex/sql_validation/list_elements.sql index 8dede3c39d..b04bd57d0d 100644 --- a/arkindex/sql_validation/list_elements.sql +++ b/arkindex/sql_validation/list_elements.sql @@ -5,7 +5,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql index fcd76a4123..fb1e9d4389 100644 --- a/arkindex/sql_validation/process_elements_filter_ml_class.sql +++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql @@ -51,7 +51,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql index 73e943b254..a566bc4a78 100644 --- a/arkindex/sql_validation/process_elements_filter_type.sql +++ b/arkindex/sql_validation/process_elements_filter_type.sql @@ -51,7 +51,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql index 7fe800743d..77423e4582 100644 --- a/arkindex/sql_validation/process_elements_top_level.sql +++ b/arkindex/sql_validation/process_elements_top_level.sql @@ -51,7 +51,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql index c8f0cd707c..2b6d311781 100644 --- a/arkindex/sql_validation/process_elements_with_image.sql +++ b/arkindex/sql_validation/process_elements_with_image.sql @@ -51,7 +51,8 @@ SELECT "documents_corpus"."created", "documents_corpus"."description", "documents_corpus"."top_level_type_id", "documents_corpus"."public", - "documents_corpus"."indexable" + "documents_corpus"."indexable", + "documents_corpus"."maximum_task_ttl" FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; -- GitLab