From 5e747558842e66eaa94ff02c90d858c18295b276 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Fri, 8 Nov 2024 08:15:13 +0000
Subject: [PATCH] Corpus and instance-wide maximum task TTL

---
 .../0013_corpus_maximum_task_ttl.py           | 22 +++++++
 arkindex/documents/models.py                  | 11 ++++
 arkindex/documents/serializers/elements.py    |  8 +++
 arkindex/documents/tests/test_corpus.py       | 57 +++++++++++++++----
 arkindex/project/config.py                    |  1 +
 arkindex/project/settings.py                  |  1 +
 .../tests/config_samples/defaults.yaml        |  1 +
 .../project/tests/config_samples/errors.yaml  |  1 +
 .../tests/config_samples/expected_errors.yaml |  1 +
 .../tests/config_samples/override.yaml        |  1 +
 arkindex/sql_validation/corpus_delete.sql     |  3 +-
 .../corpus_delete_top_level_type.sql          |  3 +-
 .../sql_validation/corpus_rights_filter.sql   |  1 +
 .../corpus_rights_filter_public.sql           |  2 +
 arkindex/sql_validation/list_elements.sql     |  3 +-
 .../process_elements_filter_ml_class.sql      |  3 +-
 .../process_elements_filter_type.sql          |  3 +-
 .../process_elements_top_level.sql            |  3 +-
 .../process_elements_with_image.sql           |  3 +-
 19 files changed, 111 insertions(+), 17 deletions(-)
 create mode 100644 arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py

diff --git a/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py b/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py
new file mode 100644
index 0000000000..d5fc5fba74
--- /dev/null
+++ b/arkindex/documents/migrations/0013_corpus_maximum_task_ttl.py
@@ -0,0 +1,22 @@
+# Generated by Django 5.0.8 on 2024-11-06 11:31
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "0012_alter_transcriptionentity_id"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="corpus",
+            name="maximum_task_ttl",
+            field=models.PositiveIntegerField(
+                blank=True,
+                help_text="Maximum time-to-live (expressed in seconds) for any WorkerRun created in this corpus. 0 means infinite. When not set, the instance-wide maximum time-to-live will be used instead.",
+                null=True,
+            ),
+        ),
+    ]
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index e6eb2bc4a6..0353cd106e 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -52,12 +52,23 @@ class Corpus(IndexableModel):
     # Is this corpus indexable ?
     indexable = models.BooleanField(default=False)
 
+    maximum_task_ttl = models.PositiveIntegerField(
+        blank=True,
+        null=True,
+        help_text="Maximum time-to-live (expressed in seconds) for any WorkerRun created in this corpus. 0 means infinite. "
+                  "When not set, the instance-wide maximum time-to-live will be used instead.",
+    )
+
     # Specific manager for ACL
     objects = CorpusManager()
 
     class Meta:
         verbose_name_plural = "corpora"
 
+    @property
+    def applied_maximum_task_ttl(self) -> int:
+        return settings.PONOS_MAXIMUM_TASK_TTL if self.maximum_task_ttl is None else self.maximum_task_ttl
+
     def __str__(self):
         return self.name
 
diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py
index 2125a6b089..ee9ec29171 100644
--- a/arkindex/documents/serializers/elements.py
+++ b/arkindex/documents/serializers/elements.py
@@ -263,6 +263,13 @@ class CorpusSerializer(serializers.ModelSerializer):
         read_only=True,
         help_text="Number of users or groups that have been granted access rights on this corpus.",
     )
+    maximum_task_ttl = serializers.IntegerField(
+        min_value=0,
+        read_only=True,
+        # Use the actually applied TTL, which defaults to the instance-wide setting and is never null
+        source="applied_maximum_task_ttl",
+        help_text="Maximum time-to-live for any WorkerRun created in this corpus, expressed in seconds. `0` means infinite.",
+    )
 
     class Meta:
         model = Corpus
@@ -277,6 +284,7 @@ class CorpusSerializer(serializers.ModelSerializer):
             "created",
             "authorized_users",
             "indexable",
+            "maximum_task_ttl",
         )
         extra_kwargs = {
             "public": {
diff --git a/arkindex/documents/tests/test_corpus.py b/arkindex/documents/tests/test_corpus.py
index b0953e78a7..805447a33f 100644
--- a/arkindex/documents/tests/test_corpus.py
+++ b/arkindex/documents/tests/test_corpus.py
@@ -4,6 +4,7 @@ from unittest.mock import call, patch
 from uuid import uuid4
 
 from django.contrib.auth.models import AnonymousUser
+from django.test import override_settings
 from django.urls import reverse
 from rest_framework import status
 
@@ -56,6 +57,7 @@ EXPECTED_CORPUS_TYPES = [
 ]
 
 
+@override_settings(PONOS_MAXIMUM_TASK_TTL=3600)
 class TestCorpus(FixtureAPITestCase):
 
     @classmethod
@@ -101,6 +103,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 1,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 }
             ]
         )
@@ -134,6 +137,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 2,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 },
                 {
                     "id": str(self.corpus_public.id),
@@ -145,6 +149,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 1,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 }
             ]
         )
@@ -178,6 +183,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 2,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 },
                 {
                     "id": str(self.corpus_hidden.id),
@@ -189,6 +195,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 0,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 },
                 {
                     "id": str(self.corpus_public.id),
@@ -200,6 +207,7 @@ class TestCorpus(FixtureAPITestCase):
                     "created": DB_CREATED,
                     "authorized_users": 1,
                     "top_level_type": None,
+                    "maximum_task_ttl": 3600,
                 }
             ]
         )
@@ -347,6 +355,7 @@ class TestCorpus(FixtureAPITestCase):
             "created": DB_CREATED,
             "authorized_users": 1,
             "top_level_type": None,
+            "maximum_task_ttl": 3600,
         })
 
     def test_retrieve(self):
@@ -365,6 +374,7 @@ class TestCorpus(FixtureAPITestCase):
             "created": DB_CREATED,
             "authorized_users": 2,
             "top_level_type": None,
+            "maximum_task_ttl": 3600,
         })
 
     def test_retrieve_not_found(self):
@@ -394,6 +404,7 @@ class TestCorpus(FixtureAPITestCase):
             "created": DB_CREATED,
             "authorized_users": 1,
             "top_level_type": None,
+            "maximum_task_ttl": 3600,
         })
 
     @expectedFailure
@@ -422,18 +433,38 @@ class TestCorpus(FixtureAPITestCase):
             "created": DB_CREATED,
             "authorized_users": 2,
             "top_level_type": None,
+            "maximum_task_ttl": 3600,
         })
 
+    def test_retrieve_maximum_task_ttl(self):
+        """
+        Setting Corpus.maximum_task_ttl to any value should return it instead of the setting
+        """
+        self.corpus.maximum_task_ttl = 0
+        self.corpus.save()
+
+        with self.assertNumQueries(3):
+            response = self.client.get(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus.id}))
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        self.assertEqual(response.json()["maximum_task_ttl"], 0)
+
     def test_partial_update(self):
         self.client.force_login(self.corpus_admin)
-        response = self.client.patch(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), {
-            "name": "new name",
-            "description": "new description",
-        })
-        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        with self.assertNumQueries(7):
+            response = self.client.patch(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), {
+                "name": "new name",
+                "description": "new description",
+                # This field should be ignored
+                "maximum_task_ttl": 9999,
+            })
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+
         self.corpus_private.refresh_from_db()
         self.assertEqual(self.corpus_private.name, "new name")
         self.assertEqual(self.corpus_private.description, "new description")
+        self.assertIsNone(self.corpus_private.maximum_task_ttl)
 
     def test_partial_update_private_to_public_normal_user(self):
         """
@@ -526,14 +557,20 @@ class TestCorpus(FixtureAPITestCase):
 
     def test_update(self):
         self.client.force_login(self.corpus_admin)
-        response = self.client.put(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), {
-            "name": "new name",
-            "description": "new description",
-        })
-        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        with self.assertNumQueries(7):
+            response = self.client.put(reverse("api:corpus-retrieve", kwargs={"pk": self.corpus_private.id}), {
+                "name": "new name",
+                "description": "new description",
+                # This field should be ignored
+                "maximum_task_ttl": 9999,
+            })
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+
         self.corpus_private.refresh_from_db()
         self.assertEqual(self.corpus_private.name, "new name")
         self.assertEqual(self.corpus_private.description, "new description")
+        self.assertIsNone(self.corpus_private.maximum_task_ttl)
 
     def test_update_required_fields(self):
         self.client.force_login(self.corpus_admin)
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 84e76f14ff..47a70f3801 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -175,6 +175,7 @@ def get_settings_parser(base_dir):
     ponos_parser.add_option("artifact_max_size", type=int, default=5 * 1024**3)
     # Default task expiry delay in days
     ponos_parser.add_option("task_expiry", type=int, default=30)
+    ponos_parser.add_option("maximum_task_ttl", type=int, default=3600)
     ponos_parser.add_option("auto_remove_container", type=bool, default=False)
 
     sentry_parser = parser.add_subparser("sentry", default={})
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 67b66f202f..1b2d950cbc 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -561,6 +561,7 @@ PONOS_DOCKER_AUTO_REMOVE_CONTAINER = conf["ponos"]["auto_remove_container"]
 # Base data directory for RQ tasks execution (in the docker container)
 PONOS_DATA_DIR = "/data"
 PONOS_TASK_EXPIRY = conf["ponos"]["task_expiry"]
+PONOS_MAXIMUM_TASK_TTL = conf["ponos"]["maximum_task_ttl"]
 
 # Robots.txt options
 ROBOTS_TXT_DISALLOW = conf["robots_txt_disallow"]
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 5a3e67754c..d2e108d675 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -67,6 +67,7 @@ ponos:
   artifact_max_size: 5368709120
   auto_remove_container: false
   default_env: {}
+  maximum_task_ttl: 3600
   task_expiry: 30
 public_hostname: https://default.config.arkindex.localhost
 redis:
diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml
index 3b5f91fa0b..5925b919da 100644
--- a/arkindex/project/tests/config_samples/errors.yaml
+++ b/arkindex/project/tests/config_samples/errors.yaml
@@ -50,6 +50,7 @@ ponos:
   artifact_max_size: .nan
   auto_remove_container: please
   default_env: {}
+  maximum_task_ttl: .inf
   private_key: /dev/zero
   task_expiry: zero
 public_hostname: darkindex.lol
diff --git a/arkindex/project/tests/config_samples/expected_errors.yaml b/arkindex/project/tests/config_samples/expected_errors.yaml
index f0eaf9603d..b9ec8b030b 100644
--- a/arkindex/project/tests/config_samples/expected_errors.yaml
+++ b/arkindex/project/tests/config_samples/expected_errors.yaml
@@ -33,6 +33,7 @@ job_timeouts:
   send_verification_email: "invalid literal for int() with base 10: 'lol'"
 ponos:
   artifact_max_size: cannot convert float NaN to integer
+  maximum_task_ttl: cannot convert float infinity to integer
   task_expiry: "invalid literal for int() with base 10: 'zero'"
 public_hostname: The hostname must include an HTTP or HTTPS scheme.
 redis:
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index 099d11b5ee..7a807a1fef 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -82,6 +82,7 @@ ponos:
   auto_remove_container: true
   default_env:
     A: B
+  maximum_task_ttl: 3600
   task_expiry: 42
 public_hostname: https://darkindex.lol
 redis:
diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql
index 9fbed056b3..bf5f227130 100644
--- a/arkindex/sql_validation/corpus_delete.sql
+++ b/arkindex/sql_validation/corpus_delete.sql
@@ -5,7 +5,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql
index 712cefb7df..dbbe418e73 100644
--- a/arkindex/sql_validation/corpus_delete_top_level_type.sql
+++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql
@@ -5,7 +5,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/corpus_rights_filter.sql b/arkindex/sql_validation/corpus_rights_filter.sql
index 9122e1515e..6456c7da35 100644
--- a/arkindex/sql_validation/corpus_rights_filter.sql
+++ b/arkindex/sql_validation/corpus_rights_filter.sql
@@ -22,6 +22,7 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
        "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl",
        LEAST("users_right"."level", T5."level") AS "max_level"
 FROM "documents_corpus"
 INNER JOIN "users_right" ON ("documents_corpus"."id" = "users_right"."content_id"
diff --git a/arkindex/sql_validation/corpus_rights_filter_public.sql b/arkindex/sql_validation/corpus_rights_filter_public.sql
index 03bc0851e0..6042778029 100644
--- a/arkindex/sql_validation/corpus_rights_filter_public.sql
+++ b/arkindex/sql_validation/corpus_rights_filter_public.sql
@@ -23,6 +23,7 @@ LIMIT 21;
             "documents_corpus"."top_level_type_id",
             "documents_corpus"."public",
             "documents_corpus"."indexable",
+            "documents_corpus"."maximum_task_ttl",
             LEAST("users_right"."level", T5."level") AS "max_level"
      FROM "documents_corpus"
      INNER JOIN "users_right" ON ("documents_corpus"."id" = "users_right"."content_id"
@@ -42,6 +43,7 @@ UNION
             "documents_corpus"."top_level_type_id",
             "documents_corpus"."public",
             "documents_corpus"."indexable",
+            "documents_corpus"."maximum_task_ttl",
             10 AS "max_level"
      FROM "documents_corpus"
      WHERE "documents_corpus"."public")
diff --git a/arkindex/sql_validation/list_elements.sql b/arkindex/sql_validation/list_elements.sql
index 8dede3c39d..b04bd57d0d 100644
--- a/arkindex/sql_validation/list_elements.sql
+++ b/arkindex/sql_validation/list_elements.sql
@@ -5,7 +5,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql
index fcd76a4123..fb1e9d4389 100644
--- a/arkindex/sql_validation/process_elements_filter_ml_class.sql
+++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql
@@ -51,7 +51,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql
index 73e943b254..a566bc4a78 100644
--- a/arkindex/sql_validation/process_elements_filter_type.sql
+++ b/arkindex/sql_validation/process_elements_filter_type.sql
@@ -51,7 +51,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql
index 7fe800743d..77423e4582 100644
--- a/arkindex/sql_validation/process_elements_top_level.sql
+++ b/arkindex/sql_validation/process_elements_top_level.sql
@@ -51,7 +51,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql
index c8f0cd707c..2b6d311781 100644
--- a/arkindex/sql_validation/process_elements_with_image.sql
+++ b/arkindex/sql_validation/process_elements_with_image.sql
@@ -51,7 +51,8 @@ SELECT "documents_corpus"."created",
        "documents_corpus"."description",
        "documents_corpus"."top_level_type_id",
        "documents_corpus"."public",
-       "documents_corpus"."indexable"
+       "documents_corpus"."indexable",
+       "documents_corpus"."maximum_task_ttl"
 FROM "documents_corpus"
 WHERE "documents_corpus"."id" = '{corpus_id}'::uuid
 LIMIT 21;
-- 
GitLab