From 66d5b8ae07fdc30e1f0f3bce161d05fbd60dae94 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Wed, 2 Mar 2022 16:00:05 +0100
Subject: [PATCH] Allow deleting worker results on selection

---
 arkindex/documents/api/elements.py            |  47 ++++++--
 arkindex/documents/tasks.py                   |  17 +++
 .../test_selection_worker_results_delete.py   |  73 ++++++++++++
 .../tests/test_destroy_worker_results.py      | 106 ++++++++++++++++++
 arkindex/project/triggers.py                  |  19 ++++
 5 files changed, 254 insertions(+), 8 deletions(-)
 create mode 100644 arkindex/documents/tests/tasks/test_selection_worker_results_delete.py

diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py
index ea5dd24839..14e54fc2c4 100644
--- a/arkindex/documents/api/elements.py
+++ b/arkindex/documents/api/elements.py
@@ -68,7 +68,13 @@ from arkindex.project.openapi import AutoSchema
 from arkindex.project.pagination import LargePageNumberPagination, PageNumberPagination
 from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly
 from arkindex.project.tools import BulkMap
-from arkindex.project.triggers import corpus_delete, element_trash, move_element, worker_results_delete
+from arkindex.project.triggers import (
+    corpus_delete,
+    element_trash,
+    move_element,
+    selection_worker_results_delete,
+    worker_results_delete,
+)
 from arkindex.users.models import Role
 from arkindex.users.utils import filter_rights
 
@@ -1745,7 +1751,14 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView):
                 type=UUID,
                 required=False,
                 description='Only delete Worker Results under this parent element',
-            )
+            ),
+            OpenApiParameter(
+                'use_selection',
+                type=bool,
+                required=False,
+                description='Only delete Worker Results on selected elements in this corpus. '
+                            'Cannot be used together with `element_id`.',
+            ),
         ],
         tags=['ml'],
     )
@@ -1764,6 +1777,17 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
 
         errors = defaultdict(list)
 
+        use_selection = self.request.query_params.get('use_selection', 'false').lower() not in ('false', '0')
+        if use_selection:
+            # Only check for selected elements if the selection feature is enabled
+            if settings.ARKINDEX_FEATURES['selection']:
+                if 'element_id' in self.request.query_params:
+                    errors['use_selection'].append('use_selection and element_id cannot be used simultaneously.')
+                if not self.request.user.selected_elements.filter(corpus=corpus).exists():
+                    errors['use_selection'].append('No elements of the specified corpus have been selected.')
+            else:
+                errors['use_selection'].append('Selection is not available on this instance.')
+
         element_id = None
         if 'element_id' in self.request.query_params:
             try:
@@ -1790,12 +1814,19 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
         if errors:
             raise ValidationError(errors)
 
-        worker_results_delete(
-            corpus_id=corpus.id,
-            version=worker_version,
-            element_id=element_id,
-            user_id=self.request.user.id,
-        )
+        if use_selection:
+            selection_worker_results_delete(
+                corpus=corpus,
+                version=worker_version,
+                user_id=self.request.user.id,
+            )
+        else:
+            worker_results_delete(
+                corpus_id=corpus.id,
+                version=worker_version,
+                element_id=element_id,
+                user_id=self.request.user.id,
+            )
 
         return Response(status=status.HTTP_204_NO_CONTENT)
 
diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py
index d0f4bf94f9..f1016c64bf 100644
--- a/arkindex/documents/tasks.py
+++ b/arkindex/documents/tasks.py
@@ -84,6 +84,23 @@ def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None:
     queryset.trash(delete_children=delete_children)
 
 
+@job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete'])
+def selection_worker_results_delete(corpus_id: str, version_id: Optional[str] = None) -> None:
+    """
+    Delete all Worker Results produced by any WorkerVersion or a specific one
+    on a user's selected elements, including their children.
+    """
+    rq_job = get_current_job()
+    assert rq_job is not None, 'This task can only be run in a RQ job context.'
+    assert rq_job.user_id is not None, 'This task requires a user ID to be defined on the RQ job.'
+
+    queryset = Selection.objects.filter(user_id=rq_job.user_id, element__corpus_id=corpus_id)
+    total = queryset.count()
+    for i, element_id in enumerate(queryset.values_list('element_id', flat=True).iterator()):
+        rq_job.set_progress(i / total)
+        worker_results_delete(corpus_id=corpus_id, version_id=version_id, element_id=element_id)
+
+
 @job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete'])
 def worker_results_delete(
         corpus_id: str,
diff --git a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py
new file mode 100644
index 0000000000..322d3ef5a2
--- /dev/null
+++ b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py
@@ -0,0 +1,73 @@
+from unittest.mock import call, patch
+
+from arkindex.dataimport.models import WorkerVersion
+from arkindex.documents.tasks import selection_worker_results_delete
+from arkindex.project.tests import FixtureTestCase
+
+
+class TestDeleteSelectionWorkerResults(FixtureTestCase):
+
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.page1 = cls.corpus.elements.get(name='Volume 1, page 1r')
+        cls.page2 = cls.corpus.elements.get(name='Volume 1, page 1v')
+        cls.page3 = cls.corpus.elements.get(name='Volume 1, page 2r')
+        cls.version = WorkerVersion.objects.first()
+
+    def test_no_rq_job(self):
+        with self.assertRaises(AssertionError) as ctx:
+            selection_worker_results_delete(corpus_id=self.corpus.id)
+        self.assertEqual(str(ctx.exception), 'This task can only be run in a RQ job context.')
+
+    @patch('arkindex.documents.tasks.get_current_job')
+    def test_no_user_id(self, job_mock):
+        job_mock.return_value.user_id = None
+        with self.assertRaises(AssertionError) as ctx:
+            selection_worker_results_delete(corpus_id=self.corpus.id)
+        self.assertEqual(str(ctx.exception), 'This task requires a user ID to be defined on the RQ job.')
+
+    @patch('arkindex.documents.tasks.get_current_job')
+    @patch('arkindex.documents.tasks.worker_results_delete')
+    def test_run(self, worker_results_delete_mock, job_mock):
+        self.user.selected_elements.set([self.page1, self.page2])
+        # Another user's selection should not be used
+        self.superuser.selected_elements.set([self.page3])
+        job_mock.return_value.user_id = self.user.id
+
+        selection_worker_results_delete(corpus_id=self.corpus.id)
+
+        self.assertEqual(job_mock.call_count, 1)
+        self.assertEqual(job_mock().set_progress.call_count, 2)
+        self.assertListEqual(job_mock().set_progress.call_args_list, [
+            call(.0),
+            call(.5),
+        ])
+
+        self.assertEqual(worker_results_delete_mock.call_count, 2)
+        self.assertCountEqual(worker_results_delete_mock.call_args_list, [
+            call(corpus_id=self.corpus.id, version_id=None, element_id=self.page1.id),
+            call(corpus_id=self.corpus.id, version_id=None, element_id=self.page2.id),
+        ])
+
+    @patch('arkindex.documents.tasks.get_current_job')
+    @patch('arkindex.documents.tasks.worker_results_delete')
+    def test_run_version_filter(self, worker_results_delete_mock, job_mock):
+        self.user.selected_elements.set([self.page1, self.page2])
+        self.superuser.selected_elements.set([self.page3])
+        job_mock.return_value.user_id = self.user.id
+
+        selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id)
+
+        self.assertEqual(job_mock.call_count, 1)
+        self.assertEqual(job_mock().set_progress.call_count, 2)
+        self.assertListEqual(job_mock().set_progress.call_args_list, [
+            call(.0),
+            call(.5),
+        ])
+
+        self.assertEqual(worker_results_delete_mock.call_count, 2)
+        self.assertCountEqual(worker_results_delete_mock.call_args_list, [
+            call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page1.id),
+            call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page2.id),
+        ])
diff --git a/arkindex/documents/tests/test_destroy_worker_results.py b/arkindex/documents/tests/test_destroy_worker_results.py
index bf98302e78..f1cc1cb0b6 100644
--- a/arkindex/documents/tests/test_destroy_worker_results.py
+++ b/arkindex/documents/tests/test_destroy_worker_results.py
@@ -1,5 +1,6 @@
 from unittest.mock import call, patch
 
+from django.test import override_settings
 from django.urls import reverse
 from rest_framework import status
 
@@ -173,3 +174,108 @@ class TestDestroyWorkerResults(FixtureAPITestCase):
             response.json(),
             {'element_id': ['This element does not exist in the specified corpus.']}
         )
+
+    @override_settings(ARKINDEX_FEATURES={'selection': False})
+    def test_selection_feature_flag(self):
+        self.client.force_login(self.user)
+        with self.assertNumQueries(6):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)})
+                + '?use_selection=true'
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(
+            response.json(),
+            {'use_selection': ['Selection is not available on this instance.']}
+        )
+
+    @override_settings(ARKINDEX_FEATURES={'selection': True})
+    def test_selection_no_element_id(self):
+        self.client.force_login(self.user)
+        with self.assertNumQueries(8):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)})
+                + f'?use_selection=true&element_id={self.page.id}'
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(
+            response.json(),
+            {'use_selection': [
+                'use_selection and element_id cannot be used simultaneously.',
+                'No elements of the specified corpus have been selected.'
+            ]}
+        )
+
+    @override_settings(ARKINDEX_FEATURES={'selection': True})
+    def test_selection_empty(self):
+        self.client.force_login(self.user)
+        self.assertFalse(self.user.selected_elements.exists())
+        with self.assertNumQueries(7):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)})
+                + '?use_selection=true'
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(
+            response.json(),
+            {'use_selection': ['No elements of the specified corpus have been selected.']}
+        )
+
+    @override_settings(ARKINDEX_FEATURES={'selection': True})
+    def test_selection_empty_for_corpus(self):
+        self.superuser.selected_elements.add(self.page)
+        self.assertFalse(self.user.selected_elements.filter(corpus=self.private_corpus).exists())
+        self.client.force_login(self.superuser)
+        with self.assertNumQueries(4):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.private_corpus.id)})
+                + '?use_selection=true'
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+            self.assertDictEqual(
+                response.json(),
+                {'use_selection': ['No elements of the specified corpus have been selected.']}
+            )
+
+    @override_settings(ARKINDEX_FEATURES={'selection': True})
+    @patch('arkindex.project.triggers.documents_tasks.selection_worker_results_delete.delay')
+    def test_selection(self, delay_mock):
+        self.user.selected_elements.add(self.page)
+        self.client.force_login(self.user)
+
+        with self.assertNumQueries(7):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)})
+                + '?use_selection=true'
+            )
+            self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
+
+        self.assertEqual(delay_mock.call_count, 1)
+        self.assertEqual(delay_mock.call_args, call(
+            corpus_id=self.corpus.id,
+            version_id=None,
+            user_id=self.user.id,
+            description=f"Deletion of worker results on selected elements in {self.corpus.name}"
+        ))
+
+    @override_settings(ARKINDEX_FEATURES={'selection': True})
+    @patch('arkindex.project.triggers.documents_tasks.selection_worker_results_delete.delay')
+    def test_selection_version_filter(self, delay_mock):
+        self.user.selected_elements.add(self.page)
+        self.client.force_login(self.user)
+
+        with self.assertNumQueries(8):
+            response = self.client.delete(
+                reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)})
+                + f'?use_selection=true&worker_version_id={self.version.id}'
+            )
+            self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
+
+        self.assertEqual(delay_mock.call_count, 1)
+        self.assertEqual(delay_mock.call_args, call(
+            corpus_id=self.corpus.id,
+            version_id=self.version.id,
+            user_id=self.user.id,
+            description=f"Deletion of worker results on selected elements in {self.corpus.name} "
+                        f"produced by {self.version}"
+        ))
diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py
index 9ac334b770..112aadca64 100644
--- a/arkindex/project/triggers.py
+++ b/arkindex/project/triggers.py
@@ -64,6 +64,25 @@ def worker_results_delete(corpus_id: UUID,
     )
 
 
+def selection_worker_results_delete(corpus: Corpus,
+                                    user_id: int,
+                                    version: Optional[WorkerVersion] = None) -> None:
+    """
+    Delete all Worker Results produced by any WorkerVersion or a specific one on all elements
+    selected by a user in a corpus, including their child elements.
+    """
+    description = f'Deletion of worker results on selected elements in {corpus.name}'
+    if version is not None:
+        description += f' produced by {version}'
+
+    documents_tasks.selection_worker_results_delete.delay(
+        corpus_id=corpus.id,
+        version_id=version.id if version else None,
+        user_id=user_id,
+        description=description,
+    )
+
+
 def move_element(source: Element, destination: Element, user_id: Optional[int] = None) -> None:
     """
     Move a source Element (and all of its children) to a destination Element.
-- 
GitLab