From 77d6ee38c30250d51ba05e1d9c4eef1df2320060 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 14 Apr 2023 11:13:05 +0200 Subject: [PATCH] Optimize WorkerActivity deletion when deleting a corpus --- arkindex/documents/tasks.py | 5 +++-- arkindex/sql_validation/corpus_delete.sql | 12 +++++++++--- .../sql_validation/corpus_delete_top_level_type.sql | 12 +++++++++--- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index d42a9cbfb0..9712f69f41 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -51,8 +51,9 @@ def corpus_delete(corpus_id: str) -> None: # Process-DataFile M2M with implicit model Process.files.through.objects.filter(process__corpus_id=corpus_id), Process.files.through.objects.filter(datafile__corpus_id=corpus_id), - # Worker activities - WorkerActivity.objects.filter(Q(element__corpus_id=corpus_id) | Q(process__corpus_id=corpus_id)), + # Worker activities are deleted in two queries, as filtering using OR is slower + WorkerActivity.objects.filter(element__corpus_id=corpus_id), + WorkerActivity.objects.filter(process__corpus_id=corpus_id), corpus.files.all(), MetaData.objects.filter(element__corpus_id=corpus_id), EntityLink.objects.filter(role__corpus_id=corpus_id), diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql index 288247494a..423afc32af 100644 --- a/arkindex/sql_validation/corpus_delete.sql +++ b/arkindex/sql_validation/corpus_delete.sql @@ -64,9 +64,15 @@ WHERE "process_workeractivity"."id" IN (SELECT U0."id" FROM "process_workeractivity" U0 INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") - INNER JOIN "process_process" U3 ON (U0."process_id" = U3."id") - WHERE (U1."corpus_id" = '{corpus_id}'::uuid - OR U3."corpus_id" = '{corpus_id}'::uuid)); + WHERE U1."corpus_id" = '{corpus_id}'::uuid); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."id" IN + (SELECT U0."id" + FROM "process_workeractivity" U0 + INNER JOIN "process_process" U1 ON (U0."process_id" = U1."id") + WHERE U1."corpus_id" = '{corpus_id}'::uuid); DELETE FROM "process_datafile" diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql index 5782b2cc8d..316ef633a8 100644 --- a/arkindex/sql_validation/corpus_delete_top_level_type.sql +++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql @@ -68,9 +68,15 @@ WHERE "process_workeractivity"."id" IN (SELECT U0."id" FROM "process_workeractivity" U0 INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") - INNER JOIN "process_process" U3 ON (U0."process_id" = U3."id") - WHERE (U1."corpus_id" = '{corpus_id}'::uuid - OR U3."corpus_id" = '{corpus_id}'::uuid)); + WHERE U1."corpus_id" = '{corpus_id}'::uuid); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."id" IN + (SELECT U0."id" + FROM "process_workeractivity" U0 + INNER JOIN "process_process" U1 ON (U0."process_id" = U1."id") + WHERE U1."corpus_id" = '{corpus_id}'::uuid); DELETE FROM "process_datafile" -- GitLab