From 3c3e5c12ff82879712fe0f695e71a3919f17f99e Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 29 Jul 2021 11:18:39 +0200
Subject: [PATCH] Add job timeout settings

---
 arkindex/dataimport/tasks.py                             | 5 +++--
 arkindex/documents/export/__init__.py                    | 2 +-
 arkindex/documents/tasks.py                              | 9 +++++----
 arkindex/project/config.py                               | 9 +++++++++
 arkindex/project/settings.py                             | 2 ++
 arkindex/project/tests/config_samples/defaults.yaml      | 8 ++++++++
 arkindex/project/tests/config_samples/errors.yaml        | 7 +++++++
 .../project/tests/config_samples/expected_errors.yaml    | 5 +++++
 arkindex/project/tests/config_samples/override.yaml      | 8 ++++++++
 9 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/arkindex/dataimport/tasks.py b/arkindex/dataimport/tasks.py
index e8bb8231cb..eb15974407 100644
--- a/arkindex/dataimport/tasks.py
+++ b/arkindex/dataimport/tasks.py
@@ -1,12 +1,13 @@
 from typing import Optional
 
+from django.conf import settings
 from django.db import transaction
 from django_rq import job
 
 from arkindex.dataimport.models import ActivityState, DataImport, WorkerActivity, WorkerActivityState
 
 
-@job('default', timeout=3600)
+@job('default', timeout=settings.RQ_TIMEOUTS['initialize_activity'])
 def initialize_activity(process: DataImport):
     """
     List all worker versions used in a process and initialize their activity on processed elements.
@@ -28,7 +29,7 @@ def initialize_activity(process: DataImport):
     process.save()
 
 
-@job('default', timeout=3600)
+@job('default', timeout=settings.RQ_TIMEOUTS['process_delete'])
 def process_delete(process: DataImport, delete_activity: Optional[WorkerActivityState] = None):
     """
     Delete a process and removes its reference from related worker activities.
diff --git a/arkindex/documents/export/__init__.py b/arkindex/documents/export/__init__.py
index 7b174f5f0b..d9b94f00ba 100644
--- a/arkindex/documents/export/__init__.py
+++ b/arkindex/documents/export/__init__.py
@@ -99,7 +99,7 @@ def send_email(subject, template_name, corpus_export, **context):
         logger.error(f'Failed to send email to {corpus_export.user.email}')
 
 
-@job('high', timeout=7200)
+@job('high', timeout=settings.RQ_TIMEOUTS['export_corpus'])
 def export_corpus(corpus_export: CorpusExport) -> None:
     _, db_path = tempfile.mkstemp(suffix='db')
     try:
diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py
index 4b6f0545c4..d58b268f3b 100644
--- a/arkindex/documents/tasks.py
+++ b/arkindex/documents/tasks.py
@@ -1,5 +1,6 @@
 import logging
 
+from django.conf import settings
 from django.db.models import Q
 from django_rq import job
 from rq import get_current_job
@@ -21,7 +22,7 @@ from arkindex.documents.models import (
 logger = logging.getLogger(__name__)
 
 
-@job('high')
+@job('high', timeout=settings.RQ_TIMEOUTS['corpus_delete'])
 def corpus_delete(corpus_id: str) -> None:
     # Note that this can be None when the task is run outside of a RQ worker (e.g. unit test)
     rq_job = get_current_job()
@@ -70,12 +71,12 @@ def corpus_delete(corpus_id: str) -> None:
     logger.info(f'Deleted corpus {corpus_id}')
 
 
-@job('high')
+@job('high', timeout=settings.RQ_TIMEOUTS['element_trash'])
 def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None:
     queryset.trash(delete_children=delete_children)
 
 
-@job('high')
+@job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete'])
 def worker_results_delete(corpus_id: str, version_id: str, parent_id: str) -> None:
     """
     Recursively delete all Worker Results produced by a specific WorkerVersion on a whole corpus or under
@@ -112,7 +113,7 @@ def worker_results_delete(corpus_id: str, version_id: str, parent_id: str) -> No
     transcriptions._raw_delete(using='default')
 
 
-@job('high')
+@job('high', timeout=settings.RQ_TIMEOUTS['move_element'])
 def move_element(source: Element, destination: Element) -> None:
     parents = Element.objects.filter(id__in=source.paths.values('path__last')).only('id')
     for parent in parents:
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index b50ae13468..71613fbcd0 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -114,6 +114,15 @@ def get_settings_parser(base_dir):
     redis_parser.add_option('password', type=str, default=None)
     redis_parser.add_option('timeout', type=int, default=1800)
 
+    job_timeouts_parser = parser.add_subparser('job_timeouts', default={})
+    job_timeouts_parser.add_option('export_corpus', type=int, default=7200)
+    job_timeouts_parser.add_option('corpus_delete', type=int, default=7200)
+    job_timeouts_parser.add_option('worker_results_delete', type=int, default=3600)
+    job_timeouts_parser.add_option('element_trash', type=int, default=3600)
+    job_timeouts_parser.add_option('move_element', type=int, default=3600)
+    job_timeouts_parser.add_option('initialize_activity', type=int, default=3600)
+    job_timeouts_parser.add_option('process_delete', type=int, default=3600)
+
     csrf_parser = parser.add_subparser('csrf', default={})
     csrf_parser.add_option('cookie_name', type=str, default='arkindex.csrf')
     csrf_parser.add_option('cookie_domain', type=str, default=None)
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 842c432110..f3f6612bdc 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -383,6 +383,8 @@ RQ_QUEUES = {
     }
 }
 
+RQ_TIMEOUTS = conf['job_timeouts']
+
 RQ = {
     'JOB_CLASS': 'arkindex.project.rq_overrides.Job',
     'QUEUE_CLASS': 'arkindex.project.rq_overrides.Queue'
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index f58e8c607e..ea5f88fc55 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -45,6 +45,14 @@ imports_worker_version: null
 influxdb:
   api_url: http://localhost:8086/
 internal_group_id: 2
+job_timeouts:
+  corpus_delete: 7200
+  element_trash: 3600
+  export_corpus: 7200
+  initialize_activity: 3600
+  move_element: 3600
+  process_delete: 3600
+  worker_results_delete: 3600
 jwt_signing_key: null
 local_imageserver_id: 1
 ponos:
diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml
index a2db06b62a..d8877b7914 100644
--- a/arkindex/project/tests/config_samples/errors.yaml
+++ b/arkindex/project/tests/config_samples/errors.yaml
@@ -30,6 +30,13 @@ gitlab:
 influxdb:
   api_url: no
 internal_group_id: 2
+job_timeouts:
+  corpus_delete: lol
+  element_trash: no
+  export_corpus: []
+  move_element:
+    a: b
+  worker_results_delete: null
 jwt_signing_key: null
 local_imageserver_id: 1
 ponos:
diff --git a/arkindex/project/tests/config_samples/expected_errors.yaml b/arkindex/project/tests/config_samples/expected_errors.yaml
index 930ec1a76e..0bb4c8530e 100644
--- a/arkindex/project/tests/config_samples/expected_errors.yaml
+++ b/arkindex/project/tests/config_samples/expected_errors.yaml
@@ -13,6 +13,11 @@ email:
   user: This option is required
 features:
   sv_cheats: This option does not exist
+job_timeouts:
+  corpus_delete: "invalid literal for int() with base 10: 'lol'"
+  export_corpus: "int() argument must be a string, a bytes-like object or a number, not 'list'"
+  move_element: "int() argument must be a string, a bytes-like object or a number, not 'dict'"
+  worker_results_delete: "int() argument must be a string, a bytes-like object or a number, not 'NoneType'"
 public_hostname: The hostname must include an HTTP or HTTPS scheme.
 redis:
   port: "invalid literal for int() with base 10: 'over nine thousand'"
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index 95aeb01fd9..be46d7c9bf 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -59,6 +59,14 @@ imports_worker_version: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
 influxdb:
   api_url: http://graph/
 internal_group_id: 4
+job_timeouts:
+  corpus_delete: 1
+  element_trash: 2
+  export_corpus: 3
+  initialize_activity: 4
+  move_element: 5
+  process_delete: 6
+  worker_results_delete: 7
 jwt_signing_key: deadbeef
 local_imageserver_id: 45
 ponos:
-- 
GitLab