diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index 1091da1f81ec0d383232d2d38c35dff0698e6cc0..a5b9e55cfd9fc9817abaa2552ac334e572a91226 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -23,6 +23,7 @@ from rest_framework import permissions, status from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.generics import ( CreateAPIView, + DestroyAPIView, GenericAPIView, ListAPIView, ListCreateAPIView, @@ -1741,3 +1742,38 @@ class ApplyProcessTemplate(ProcessACLMixin, WorkerACLMixin, CreateAPIView): status=status.HTTP_200_OK, data=DataImportSerializer(dataimport, context={'request': self.request}).data ) + + +@extend_schema_view( + delete=extend_schema( + operation_id='ClearProcess', + tags=['imports'], + responses={ + 204: None, + }, + ) +) +class ClearProcess(ProcessACLMixin, DestroyAPIView): + """ + Remove all templates and/or workers from a (not started) process + """ + permission_classes = (IsVerified, ) + queryset = DataImport.objects.all() + + def check_object_permissions(self, request, process): + super().check_object_permissions(request, process) + + access_level = self.process_access_level(process) + required_access = Role.Admin.value + + if not access_level: + raise NotFound + if access_level < required_access: + raise PermissionDenied(detail='You do not have a sufficient access level to this process.') + if process.workflow_id is not None: + raise ValidationError({'__all__': ['A process can only be cleared before getting started.']}) + + def destroy(self, request, *args, **kwargs): + process = self.get_object() + process.clear() + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 6d0792de84b972240b7a91c6d013c99e44de3285..acdac7d8cc6f048b25caf8d8001500fec30b17a9 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -395,6 +395,11 @@ class DataImport(IndexableModel): else: self.start() + def clear(self): + self.worker_runs.all().delete() + self.template_id = None + self.save() + class DataImportElement(models.Model): """ diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index 11933509747f6336b1786add1cd4739c2d3c8520..eb67d9522e699fa40b6fa90327ce8a31c9962393 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -65,6 +65,7 @@ class TestImports(FixtureAPITestCase): ) cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug='page') cls.recognizer = WorkerVersion.objects.get(worker__slug='reco') + cls.dla = WorkerVersion.objects.get(worker__slug='dla') cls.version_gpu = WorkerVersion.objects.get(worker__slug='worker-gpu') cls.workers_process = cls.corpus.imports.get(mode=DataImportMode.Workers) cls.version_with_model = WorkerVersion.objects.get(worker__slug='generic') @@ -1469,3 +1470,133 @@ class TestImports(FixtureAPITestCase): } ] ) + + def test_clear_process(self): + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + + self.client.force_login(self.user) + with self.assertNumQueries(9): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + process.refresh_from_db() + self.assertEqual(process.worker_runs.count(), 0) + + def test_clear_process_requires_login(self): + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + + with self.assertNumQueries(0): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_clear_process_requires_verified(self): + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + + self.user.verified_email = False + self.user.save() + self.client.force_login(self.user) + with self.assertNumQueries(2): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have permission to perform this action.'}) + + def test_clear_process_does_not_exist(self): + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + + fake = uuid.uuid4() + + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(fake)})) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_clear_process_not_unscheduled(self): + """ + Cannot clear a process that has already started + """ + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + process.start() + process.workflow.tasks.update(state=State.Running) + self.assertEqual(process.state, State.Running) + + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {'__all__': ['A process can only be cleared before getting started.']}) + + def test_clear_process_unscheduled_workflow(self): + """ + Cannot clear a process that has a workflow, even unscheduled + """ + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + process.start() + self.assertEqual(process.state, State.Unscheduled) + + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {'__all__': ['A process can only be cleared before getting started.']}) + + def test_clear_process_requires_permissions(self): + process = self.corpus.imports.create( + creator=self.user, + mode=DataImportMode.Workers, + element_type=self.corpus.types.get(slug='page') + ) + process.worker_runs.create(version=self.dla, parents=[], configuration=None) + process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) + self.assertEqual(process.worker_runs.count(), 2) + + user2 = User.objects.create_user('email@mail.com', 'bob') + user2.verified_email = True + user2.save() + + self.corpus.memberships.create(user=user2, level=Role.Contributor.value) + self.client.force_login(user2) + with self.assertNumQueries(6): + response = self.client.delete(reverse('api:clear-process', kwargs={'pk': str(process.id)})) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have a sufficient access level to this process.'}) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 93fb0cb2c40dbd5e1a8c7e6410e37193ac90f19a..436214d3a2a1b743465250712679bfd7c6b054e7 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -4,6 +4,7 @@ from django.views.generic.base import RedirectView from arkindex.dataimport.api import ( ApplyProcessTemplate, AvailableRepositoriesList, + ClearProcess, CorpusWorkersActivity, CorpusWorkerVersionList, CorpusWorkflow, @@ -239,6 +240,7 @@ api = [ path('process/<uuid:pk>/activity-stats/', ProcessWorkersActivity.as_view(), name='process-activity-stats'), path('process/<uuid:pk>/template/', CreateProcessTemplate.as_view(), name='create-process-template'), path('process/<uuid:pk>/apply/', ApplyProcessTemplate.as_view(), name='apply-process-template'), + path('imports/<uuid:pk>/clear/', ClearProcess.as_view(), name='clear-process'), # ML models training path('modelversion/<uuid:pk>/', ModelVersionsRetrieve.as_view(), name='model-version-retrieve'),