diff --git a/arkindex/process/api.py b/arkindex/process/api.py index a7828117e344ff001c08fe4adda5448b25d0806e..9d82e3ff876289a69804000a01fee076c7510cb4 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -48,6 +48,7 @@ from arkindex.process.models import ( GitRef, GitRefType, Process, + ProcessDataset, ProcessMode, Repository, Revision, @@ -675,7 +676,7 @@ class ProcessDatasets(ProcessACLMixin, ListAPIView): @cached_property def process(self): process = get_object_or_404( - Process.objects.using('default'), + Process.objects.using('default').select_related('corpus', 'revision__repo'), Q(pk=self.kwargs['pk']) ) if not self.process_access_level(process): @@ -706,8 +707,18 @@ class ProcessDatasets(ProcessACLMixin, ListAPIView): """ ), ), + delete=extend_schema( + operation_id='DestroyProcessDataset', + description=dedent( + """ + Remove a dataset from a process. + + Requires an **admin** access to the process. + """ + ), + ), ) -class ProcessDataset(CreateAPIView): +class ProcessDatasetManage(CreateAPIView, DestroyAPIView): permission_classes = (IsVerified, ) serializer_class = ProcessDatasetSerializer @@ -723,6 +734,12 @@ class ProcessDataset(CreateAPIView): headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) + def destroy(self, request, *args, **kwargs): + serializer = self.get_serializer_from_params(**kwargs) + serializer.is_valid(raise_exception=True) + get_object_or_404(ProcessDataset, **serializer.validated_data).delete() + return Response(status=status.HTTP_204_NO_CONTENT) + @extend_schema(exclude=True) class GitRepositoryImportHook(APIView): diff --git a/arkindex/process/serializers/training.py b/arkindex/process/serializers/training.py index 11802e7a65b91fcdb1a0b80bf640788f1d0c623a..3bcf27262b76a2c8e8d69abfaf362fef107a01eb 100644 --- a/arkindex/process/serializers/training.py +++ b/arkindex/process/serializers/training.py @@ -197,8 +197,14 @@ class StartTrainingSerializer(serializers.ModelSerializer, WorkerACLMixin, Train class ProcessDatasetSerializer(serializers.ModelSerializer, ProcessACLMixin): - process = serializers.PrimaryKeyRelatedField(queryset=Process.objects.using('default')) - dataset = serializers.PrimaryKeyRelatedField(queryset=Dataset.objects.none()) + process = serializers.PrimaryKeyRelatedField( + queryset=Process.objects.using('default').select_related('corpus'), + style={'base_template': 'input.html'}, + ) + dataset = serializers.PrimaryKeyRelatedField( + queryset=Dataset.objects.none(), + style={'base_template': 'input.html'}, + ) class Meta(): model = ProcessDataset @@ -210,14 +216,21 @@ class ProcessDatasetSerializer(serializers.ModelSerializer, ProcessACLMixin): if not self.context.get('request'): # Do not raise Error in order to create OpenAPI schema return + + request_method = self.context['request'].method # Required for the ProcessACLMixin and readable corpora self._user = self.context['request'].user - self.fields['dataset'].queryset = Dataset.objects.filter(corpus__in=Corpus.objects.readable(self._user)) + + if request_method == 'DELETE': + # Allow deleting ProcessDatasets even if the user looses access to the corpus + self.fields['dataset'].queryset = Dataset.objects.all() + else: + self.fields['dataset'].queryset = Dataset.objects.filter(corpus__in=Corpus.objects.readable(self._user)) def validate_process(self, process): + if process.mode != ProcessMode.Dataset: + raise ValidationError(detail='Datasets can only be added to or removed from processes of mode "dataset".') access = self.process_access_level(process) if not access or not (access >= Role.Admin.value): raise PermissionDenied(detail='You do not have admin access to this process.') - if process.mode != ProcessMode.Dataset: - raise ValidationError(detail='Datasets can only be added to processes of mode "dataset".') return process diff --git a/arkindex/process/tests/test_process_datasets.py b/arkindex/process/tests/test_process_datasets.py index 755dbfb0e73de70ca9de92a598ecc67cb4b5c40c..1af573724d5b813fb1ad3148aeb43e11069d1fd7 100644 --- a/arkindex/process/tests/test_process_datasets.py +++ b/arkindex/process/tests/test_process_datasets.py @@ -53,6 +53,8 @@ class TestProcessDatasets(FixtureAPITestCase): cls.repo.memberships.create(user=cls.test_user, level=Role.Admin.value) cls.rev = cls.repo.revisions.get() + # List process datasets + def test_list_requires_login(self): with self.assertNumQueries(0): response = self.client.get(reverse('api:process-datasets', kwargs={'pk': self.dataset_process.id})) @@ -67,14 +69,14 @@ class TestProcessDatasets(FixtureAPITestCase): def test_list_process_access_level(self): self.private_corpus.memberships.filter(user=self.test_user).delete() self.client.force_login(self.test_user) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-datasets', kwargs={'pk': self.dataset_process.id})) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) self.assertDictEqual(response.json(), {'detail': 'You do not have guest access to this process.'}) def test_list_process_datasets(self): self.client.force_login(self.test_user) - with self.assertNumQueries(9): + with self.assertNumQueries(8): response = self.client.get(reverse('api:process-datasets', kwargs={'pk': self.dataset_process.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()['results'], [ @@ -104,6 +106,8 @@ class TestProcessDatasets(FixtureAPITestCase): } ]) + # Create process dataset + def test_create_process_dataset_requires_login(self): with self.assertNumQueries(0): response = self.client.post( @@ -128,7 +132,7 @@ class TestProcessDatasets(FixtureAPITestCase): if level: self.private_corpus.memberships.create(user=self.test_user, level=level.value) self.client.force_login(self.test_user) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.post( reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset2.id}), ) @@ -139,21 +143,21 @@ class TestProcessDatasets(FixtureAPITestCase): cases = set(ProcessMode) - {ProcessMode.Dataset, ProcessMode.Local} for mode in cases: with self.subTest(mode=mode): - request_count = 8 self.dataset_process.mode = mode self.dataset_process.corpus = self.private_corpus if mode == ProcessMode.Repository: self.dataset_process.corpus = None self.dataset_process.revision = self.rev - request_count = 10 self.dataset_process.save() self.client.force_login(self.test_user) - with self.assertNumQueries(request_count): + + with self.assertNumQueries(6): response = self.client.post( reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset2.id}), ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {'process': ['Datasets can only be added to processes of mode "dataset".']}) + + self.assertEqual(response.json(), {'process': ['Datasets can only be added to or removed from processes of mode "dataset".']}) def test_create_process_dataset_process_mode_local(self): self.client.force_login(self.user) @@ -162,8 +166,8 @@ class TestProcessDatasets(FixtureAPITestCase): response = self.client.post( reverse('api:process-dataset', kwargs={'process': local_process.id, 'dataset': self.dataset2.id}), ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - self.assertEqual(response.json(), {'detail': 'You do not have admin access to this process.'}) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {'process': ['Datasets can only be added to or removed from processes of mode "dataset".']}) def test_create_process_dataset_wrong_process_uuid(self): self.client.force_login(self.test_user) @@ -178,7 +182,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_process_dataset_wrong_dataset_uuid(self): self.client.force_login(self.test_user) wrong_id = uuid.uuid4() - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.post( reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': wrong_id}), ) @@ -189,7 +193,7 @@ class TestProcessDatasets(FixtureAPITestCase): new_corpus = Corpus.objects.create(name='NERV') new_dataset = new_corpus.datasets.create(name='Eva series', description='We created the Evas from Adam', creator=self.user) self.client.force_login(self.test_user) - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.post( reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': new_dataset.id}), ) @@ -200,7 +204,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.client.force_login(self.test_user) self.assertEqual(ProcessDataset.objects.count(), 3) self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process.id, dataset=self.dataset2.id).exists()) - with self.assertNumQueries(9): + with self.assertNumQueries(8): response = self.client.post( reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset2.id}), ) @@ -212,3 +216,103 @@ class TestProcessDatasets(FixtureAPITestCase): self.dataset1, self.dataset2 ]) + + # Destroy process dataset + + def test_destroy_requires_login(self): + with self.assertNumQueries(0): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.private_dataset.id}), + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_destroy_process_does_not_exist(self): + self.client.force_login(self.test_user) + wrong_id = uuid.uuid4() + with self.assertNumQueries(4): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': wrong_id, 'dataset': self.private_dataset.id}) + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {'process': [f'Invalid pk "{str(wrong_id)}" - object does not exist.']}) + + def test_destroy_dataset_does_not_exist(self): + self.client.force_login(self.test_user) + wrong_id = uuid.uuid4() + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': wrong_id}) + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {'dataset': [f'Invalid pk "{str(wrong_id)}" - object does not exist.']}) + + def test_destroy_not_found(self): + self.assertFalse(self.dataset_process.datasets.filter(id=self.dataset2.id).exists()) + self.client.force_login(self.test_user) + with self.assertNumQueries(8): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset2.id}), + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_destroy_process_access_level(self): + self.private_corpus.memberships.filter(user=self.test_user).delete() + self.client.force_login(self.test_user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.private_dataset.id}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have admin access to this process.'}) + + def test_destroy_no_dataset_access_requirement(self): + new_corpus = Corpus.objects.create(name='NERV') + new_dataset = new_corpus.datasets.create(name='Eva series', description='We created the Evas from Adam', creator=self.user) + self.dataset_process.datasets.add(new_dataset) + self.assertTrue(ProcessDataset.objects.filter(process=self.dataset_process, dataset=new_dataset).exists()) + self.client.force_login(self.test_user) + with self.assertNumQueries(9): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': new_dataset.id}), + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process, dataset=new_dataset).exists()) + + def test_destroy_process_mode(self): + cases = set(ProcessMode) - {ProcessMode.Dataset, ProcessMode.Local} + for mode in cases: + with self.subTest(mode=mode): + self.dataset_process.mode = mode + self.dataset_process.corpus = self.private_corpus + if mode == ProcessMode.Repository: + self.dataset_process.corpus = None + self.dataset_process.revision = self.rev + self.dataset_process.save() + self.client.force_login(self.test_user) + + with self.assertNumQueries(4): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset2.id}), + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), {'process': ['Datasets can only be added to or removed from processes of mode "dataset".']}) + + def test_destroy_process_mode_local(self): + self.client.force_login(self.user) + local_process = Process.objects.get(creator=self.user, mode=ProcessMode.Local) + with self.assertNumQueries(4): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': local_process.id, 'dataset': self.dataset2.id}), + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {'process': ['Datasets can only be added to or removed from processes of mode "dataset".']}) + + def test_destroy(self): + self.client.force_login(self.test_user) + with self.assertNumQueries(9): + response = self.client.delete( + reverse('api:process-dataset', kwargs={'process': self.dataset_process.id, 'dataset': self.dataset1.id}), + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process, dataset=self.dataset1).exists()) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index a545c81195939464562b42b55c7711e153da4efc..679f03a3d58b817c40ccd23da09455a7ec2d86c6 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -92,7 +92,7 @@ from arkindex.process.api import ( GitRepositoryImportHook, ImportTranskribus, ListProcessElements, - ProcessDataset, + ProcessDatasetManage, ProcessDatasets, ProcessDetails, ProcessList, @@ -292,7 +292,7 @@ api = [ path('process/training/', StartTraining.as_view(), name='process-training'), path('process/<uuid:pk>/select-failures/', SelectProcessFailures.as_view(), name='process-select-failures'), path('process/<uuid:pk>/datasets/', ProcessDatasets.as_view(), name='process-datasets'), - path('process/<uuid:process>/dataset/<uuid:dataset>/', ProcessDataset.as_view(), name='process-dataset'), + path('process/<uuid:process>/dataset/<uuid:dataset>/', ProcessDatasetManage.as_view(), name='process-dataset'), # ML models training path('modelversion/<uuid:pk>/', ModelVersionsRetrieve.as_view(), name='model-version-retrieve'),