From 2d71eae72c6e2b60223f77235faa336baef37fd7 Mon Sep 17 00:00:00 2001 From: mlbonhomme <bonhomme@teklia.com> Date: Mon, 25 Mar 2024 15:57:26 +0100 Subject: [PATCH] Prevent dataset deletion if some sets are used in a process --- arkindex/training/api.py | 14 +++++---- arkindex/training/tests/test_datasets_api.py | 30 ++++++++++++++++---- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arkindex/training/api.py b/arkindex/training/api.py index 46989ec6d6..ac8ca724ea 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -22,6 +22,7 @@ from rest_framework.generics import ( from rest_framework.response import Response from arkindex.documents.models import Corpus, Element +from arkindex.process.models import ProcessDatasetSet from arkindex.project.mixins import ACLMixin, CorpusACLMixin, TrainingModelMixin from arkindex.project.pagination import CountCursorPagination from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly @@ -677,6 +678,8 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView): """ Delete a dataset. Only datasets in an `open` state can be deleted. + A dataset cannot be deleted if one or more of its sets is selected in a process. + Requires an **admin** access to the dataset's corpus. """ ) @@ -696,13 +699,14 @@ class DatasetUpdate(ACLMixin, RetrieveUpdateDestroyAPIView): if request.method in permissions.SAFE_METHODS: return - role = Role.Contributor - if request.method == "DELETE": - role = Role.Admin - if obj.state != DatasetState.Open: - raise PermissionDenied(detail="Only datasets in an open state can be deleted.") + role = Role.Admin if request.method == "DELETE" else Role.Contributor if not self.has_access(obj.corpus, role.value): raise PermissionDenied(detail=f"You do not have {str(role).lower()} access to corpus {obj.corpus.name}.") + if self.request.method == "DELETE": + if obj.state != DatasetState.Open: + raise ValidationError(detail="Only datasets in an open state can be deleted.") + if ProcessDatasetSet.objects.filter(set__in=obj.sets.all()).exists(): + raise ValidationError(detail="This dataset cannot be deleted because at least one of its sets is used in a process.") # Prevent editing anything on a complete dataset if obj.state == DatasetState.Complete: diff --git a/arkindex/training/tests/test_datasets_api.py b/arkindex/training/tests/test_datasets_api.py index 2a4e11a32f..1d388e7294 100644 --- a/arkindex/training/tests/test_datasets_api.py +++ b/arkindex/training/tests/test_datasets_api.py @@ -1062,9 +1062,21 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual(response.json(), {"detail": "Not found."}) + def test_delete_dataset_in_process_forbidden(self): + self.client.force_login(self.user) + with self.assertNumQueries(4): + response = self.client.delete( + reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), ["This dataset cannot be deleted because at least one of its sets is used in a process."]) + self.dataset.refresh_from_db() + def test_delete(self): self.client.force_login(self.user) - with self.assertNumQueries(8): + # Remove dataset sets from process + ProcessDatasetSet.objects.filter(process_id=self.process.id, set__dataset_id=self.dataset.id).delete() + with self.assertNumQueries(7): response = self.client.delete( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), ) @@ -1072,8 +1084,9 @@ class TestDatasetsAPI(FixtureAPITestCase): with self.assertRaises(Dataset.DoesNotExist): self.dataset.refresh_from_db() - def test_delete_not_open(self): - self.client.force_login(self.user) + @patch("arkindex.project.mixins.has_access", return_value=True) + def test_delete_not_open(self, has_access_mock): + self.client.force_login(self.write_user) cases = [DatasetState.Building, DatasetState.Complete, DatasetState.Error] for state in cases: with self.subTest(state=state): @@ -1083,8 +1096,11 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.delete( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - self.assertDictEqual(response.json(), {"detail": "Only datasets in an open state can be deleted."}) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), ["Only datasets in an open state can be deleted."]) + self.assertEqual(has_access_mock.call_count, 1) + self.assertEqual(has_access_mock.call_args, call(self.write_user, self.corpus, Role.Admin.value, skip_public=False)) + has_access_mock.reset_mock() def test_delete_elements(self): """ @@ -1096,9 +1112,11 @@ class TestDatasetsAPI(FixtureAPITestCase): train_set.set_elements.create(element_id=self.page1.id, set="training") validation_set.set_elements.create(element_id=self.page2.id, set="validation") validation_set.set_elements.create(element_id=self.page3.id, set="validation") + # Remove dataset sets from process + ProcessDatasetSet.objects.filter(process_id=self.process.id, set__dataset_id=self.dataset.id).delete() self.client.force_login(self.user) - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), ) -- GitLab