diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index ea5dd24839551682922bc8197905e12a3fb91643..14e54fc2c49ba64a49edf644e5d70ce1a656dcb4 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -68,7 +68,13 @@ from arkindex.project.openapi import AutoSchema from arkindex.project.pagination import LargePageNumberPagination, PageNumberPagination from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly from arkindex.project.tools import BulkMap -from arkindex.project.triggers import corpus_delete, element_trash, move_element, worker_results_delete +from arkindex.project.triggers import ( + corpus_delete, + element_trash, + move_element, + selection_worker_results_delete, + worker_results_delete, +) from arkindex.users.models import Role from arkindex.users.utils import filter_rights @@ -1745,7 +1751,14 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView): type=UUID, required=False, description='Only delete Worker Results under this parent element', - ) + ), + OpenApiParameter( + 'use_selection', + type=bool, + required=False, + description='Only delete Worker Results on selected elements in this corpus. ' + 'Cannot be used together with `element_id`.', + ), ], tags=['ml'], ) @@ -1764,6 +1777,17 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): errors = defaultdict(list) + use_selection = self.request.query_params.get('use_selection', 'false').lower() not in ('false', '0') + if use_selection: + # Only check for selected elements if the selection feature is enabled + if settings.ARKINDEX_FEATURES['selection']: + if 'element_id' in self.request.query_params: + errors['use_selection'].append('use_selection and element_id cannot be used simultaneously.') + if not self.request.user.selected_elements.filter(corpus=corpus).exists(): + errors['use_selection'].append('No elements of the specified corpus have been selected.') + else: + errors['use_selection'].append('Selection is not available on this instance.') + element_id = None if 'element_id' in self.request.query_params: try: @@ -1790,12 +1814,19 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): if errors: raise ValidationError(errors) - worker_results_delete( - corpus_id=corpus.id, - version=worker_version, - element_id=element_id, - user_id=self.request.user.id, - ) + if use_selection: + selection_worker_results_delete( + corpus=corpus, + version=worker_version, + user_id=self.request.user.id, + ) + else: + worker_results_delete( + corpus_id=corpus.id, + version=worker_version, + element_id=element_id, + user_id=self.request.user.id, + ) return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index d0f4bf94f99922c5573a036d0d4b14e748b434b6..f1016c64bf8bf6a4e5505cddc734fcfa41787d98 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -84,6 +84,23 @@ def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None: queryset.trash(delete_children=delete_children) +@job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete']) +def selection_worker_results_delete(corpus_id: str, version_id: Optional[str] = None) -> None: + """ + Delete all Worker Results produced by any WorkerVersion or a specific one + on a user's selected elements, including their children. + """ + rq_job = get_current_job() + assert rq_job is not None, 'This task can only be run in a RQ job context.' + assert rq_job.user_id is not None, 'This task requires a user ID to be defined on the RQ job.' + + queryset = Selection.objects.filter(user_id=rq_job.user_id, element__corpus_id=corpus_id) + total = queryset.count() + for i, element_id in enumerate(queryset.values_list('element_id', flat=True).iterator()): + rq_job.set_progress(i / total) + worker_results_delete(corpus_id=corpus_id, version_id=version_id, element_id=element_id) + + @job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete']) def worker_results_delete( corpus_id: str, diff --git a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py new file mode 100644 index 0000000000000000000000000000000000000000..322d3ef5a2eec93a43d479c875207cb920211c50 --- /dev/null +++ b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py @@ -0,0 +1,73 @@ +from unittest.mock import call, patch + +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.tasks import selection_worker_results_delete +from arkindex.project.tests import FixtureTestCase + + +class TestDeleteSelectionWorkerResults(FixtureTestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.page1 = cls.corpus.elements.get(name='Volume 1, page 1r') + cls.page2 = cls.corpus.elements.get(name='Volume 1, page 1v') + cls.page3 = cls.corpus.elements.get(name='Volume 1, page 2r') + cls.version = WorkerVersion.objects.first() + + def test_no_rq_job(self): + with self.assertRaises(AssertionError) as ctx: + selection_worker_results_delete(corpus_id=self.corpus.id) + self.assertEqual(str(ctx.exception), 'This task can only be run in a RQ job context.') + + @patch('arkindex.documents.tasks.get_current_job') + def test_no_user_id(self, job_mock): + job_mock.return_value.user_id = None + with self.assertRaises(AssertionError) as ctx: + selection_worker_results_delete(corpus_id=self.corpus.id) + self.assertEqual(str(ctx.exception), 'This task requires a user ID to be defined on the RQ job.') + + @patch('arkindex.documents.tasks.get_current_job') + @patch('arkindex.documents.tasks.worker_results_delete') + def test_run(self, worker_results_delete_mock, job_mock): + self.user.selected_elements.set([self.page1, self.page2]) + # Another user's selection should not be used + self.superuser.selected_elements.set([self.page3]) + job_mock.return_value.user_id = self.user.id + + selection_worker_results_delete(corpus_id=self.corpus.id) + + self.assertEqual(job_mock.call_count, 1) + self.assertEqual(job_mock().set_progress.call_count, 2) + self.assertListEqual(job_mock().set_progress.call_args_list, [ + call(.0), + call(.5), + ]) + + self.assertEqual(worker_results_delete_mock.call_count, 2) + self.assertCountEqual(worker_results_delete_mock.call_args_list, [ + call(corpus_id=self.corpus.id, version_id=None, element_id=self.page1.id), + call(corpus_id=self.corpus.id, version_id=None, element_id=self.page2.id), + ]) + + @patch('arkindex.documents.tasks.get_current_job') + @patch('arkindex.documents.tasks.worker_results_delete') + def test_run_version_filter(self, worker_results_delete_mock, job_mock): + self.user.selected_elements.set([self.page1, self.page2]) + self.superuser.selected_elements.set([self.page3]) + job_mock.return_value.user_id = self.user.id + + selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id) + + self.assertEqual(job_mock.call_count, 1) + self.assertEqual(job_mock().set_progress.call_count, 2) + self.assertListEqual(job_mock().set_progress.call_args_list, [ + call(.0), + call(.5), + ]) + + self.assertEqual(worker_results_delete_mock.call_count, 2) + self.assertCountEqual(worker_results_delete_mock.call_args_list, [ + call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page1.id), + call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page2.id), + ]) diff --git a/arkindex/documents/tests/test_destroy_worker_results.py b/arkindex/documents/tests/test_destroy_worker_results.py index bf98302e78b3aa76825762ca16acfc1b16ecc987..f1cc1cb0b686532db9a7f34e3e273dc2370fdf7c 100644 --- a/arkindex/documents/tests/test_destroy_worker_results.py +++ b/arkindex/documents/tests/test_destroy_worker_results.py @@ -1,5 +1,6 @@ from unittest.mock import call, patch +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -173,3 +174,108 @@ class TestDestroyWorkerResults(FixtureAPITestCase): response.json(), {'element_id': ['This element does not exist in the specified corpus.']} ) + + @override_settings(ARKINDEX_FEATURES={'selection': False}) + def test_selection_feature_flag(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?use_selection=true' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'use_selection': ['Selection is not available on this instance.']} + ) + + @override_settings(ARKINDEX_FEATURES={'selection': True}) + def test_selection_no_element_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(8): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + f'?use_selection=true&element_id={self.page.id}' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'use_selection': [ + 'use_selection and element_id cannot be used simultaneously.', + 'No elements of the specified corpus have been selected.' + ]} + ) + + @override_settings(ARKINDEX_FEATURES={'selection': True}) + def test_selection_empty(self): + self.client.force_login(self.user) + self.assertFalse(self.user.selected_elements.exists()) + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?use_selection=true' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'use_selection': ['No elements of the specified corpus have been selected.']} + ) + + @override_settings(ARKINDEX_FEATURES={'selection': True}) + def test_selection_empty_for_corpus(self): + self.superuser.selected_elements.add(self.page) + self.assertFalse(self.user.selected_elements.filter(corpus=self.private_corpus).exists()) + self.client.force_login(self.superuser) + with self.assertNumQueries(4): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.private_corpus.id)}) + + '?use_selection=true' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'use_selection': ['No elements of the specified corpus have been selected.']} + ) + + @override_settings(ARKINDEX_FEATURES={'selection': True}) + @patch('arkindex.project.triggers.documents_tasks.selection_worker_results_delete.delay') + def test_selection(self, delay_mock): + self.user.selected_elements.add(self.page) + self.client.force_login(self.user) + + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?use_selection=true' + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + version_id=None, + user_id=self.user.id, + description=f"Deletion of worker results on selected elements in {self.corpus.name}" + )) + + @override_settings(ARKINDEX_FEATURES={'selection': True}) + @patch('arkindex.project.triggers.documents_tasks.selection_worker_results_delete.delay') + def test_selection_version_filter(self, delay_mock): + self.user.selected_elements.add(self.page) + self.client.force_login(self.user) + + with self.assertNumQueries(8): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + f'?use_selection=true&worker_version_id={self.version.id}' + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + version_id=self.version.id, + user_id=self.user.id, + description=f"Deletion of worker results on selected elements in {self.corpus.name} " + f"produced by {self.version}" + )) diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index 9ac334b77010068078ce77b44c0ba54a53d9d107..112aadca64f1a315b9e2cf6fdf99a61605e6ab1c 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -64,6 +64,25 @@ def worker_results_delete(corpus_id: UUID, ) +def selection_worker_results_delete(corpus: Corpus, + user_id: int, + version: Optional[WorkerVersion] = None) -> None: + """ + Delete all Worker Results produced by any WorkerVersion or a specific one on all elements + selected by a user in a corpus, including their child elements. + """ + description = f'Deletion of worker results on selected elements in {corpus.name}' + if version is not None: + description += f' produced by {version}' + + documents_tasks.selection_worker_results_delete.delay( + corpus_id=corpus.id, + version_id=version.id if version else None, + user_id=user_id, + description=description, + ) + + def move_element(source: Element, destination: Element, user_id: Optional[int] = None) -> None: """ Move a source Element (and all of its children) to a destination Element.