From 77d6ee38c30250d51ba05e1d9c4eef1df2320060 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Fri, 14 Apr 2023 11:13:05 +0200
Subject: [PATCH] Optimize WorkerActivity deletion when deleting a corpus

---
 arkindex/documents/tasks.py                          |  5 +++--
 arkindex/sql_validation/corpus_delete.sql            | 12 +++++++++---
 .../sql_validation/corpus_delete_top_level_type.sql  | 12 +++++++++---
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py
index d42a9cbfb0..9712f69f41 100644
--- a/arkindex/documents/tasks.py
+++ b/arkindex/documents/tasks.py
@@ -51,8 +51,9 @@ def corpus_delete(corpus_id: str) -> None:
         # Process-DataFile M2M with implicit model
         Process.files.through.objects.filter(process__corpus_id=corpus_id),
         Process.files.through.objects.filter(datafile__corpus_id=corpus_id),
-        # Worker activities
-        WorkerActivity.objects.filter(Q(element__corpus_id=corpus_id) | Q(process__corpus_id=corpus_id)),
+        # Worker activities are deleted in two queries, as filtering using OR is slower
+        WorkerActivity.objects.filter(element__corpus_id=corpus_id),
+        WorkerActivity.objects.filter(process__corpus_id=corpus_id),
         corpus.files.all(),
         MetaData.objects.filter(element__corpus_id=corpus_id),
         EntityLink.objects.filter(role__corpus_id=corpus_id),
diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql
index 288247494a..423afc32af 100644
--- a/arkindex/sql_validation/corpus_delete.sql
+++ b/arkindex/sql_validation/corpus_delete.sql
@@ -64,9 +64,15 @@ WHERE "process_workeractivity"."id" IN
         (SELECT U0."id"
          FROM "process_workeractivity" U0
          INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id")
-         INNER JOIN "process_process" U3 ON (U0."process_id" = U3."id")
-         WHERE (U1."corpus_id" = '{corpus_id}'::uuid
-		OR U3."corpus_id" = '{corpus_id}'::uuid));
+         WHERE U1."corpus_id" = '{corpus_id}'::uuid);
+
+DELETE
+FROM "process_workeractivity"
+WHERE "process_workeractivity"."id" IN
+        (SELECT U0."id"
+         FROM "process_workeractivity" U0
+         INNER JOIN "process_process" U1 ON (U0."process_id" = U1."id")
+	 WHERE U1."corpus_id" = '{corpus_id}'::uuid);
 
 DELETE
 FROM "process_datafile"
diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql
index 5782b2cc8d..316ef633a8 100644
--- a/arkindex/sql_validation/corpus_delete_top_level_type.sql
+++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql
@@ -68,9 +68,15 @@ WHERE "process_workeractivity"."id" IN
         (SELECT U0."id"
          FROM "process_workeractivity" U0
          INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id")
-         INNER JOIN "process_process" U3 ON (U0."process_id" = U3."id")
-         WHERE (U1."corpus_id" = '{corpus_id}'::uuid
-		OR U3."corpus_id" = '{corpus_id}'::uuid));
+         WHERE U1."corpus_id" = '{corpus_id}'::uuid);
+
+DELETE
+FROM "process_workeractivity"
+WHERE "process_workeractivity"."id" IN
+        (SELECT U0."id"
+         FROM "process_workeractivity" U0
+         INNER JOIN "process_process" U1 ON (U0."process_id" = U1."id")
+	 WHERE U1."corpus_id" = '{corpus_id}'::uuid);
 
 DELETE
 FROM "process_datafile"
-- 
GitLab