From 2d71eae72c6e2b60223f77235faa336baef37fd7 Mon Sep 17 00:00:00 2001
From: mlbonhomme <bonhomme@teklia.com>
Date: Mon, 25 Mar 2024 15:57:26 +0100
Subject: [PATCH] Prevent dataset deletion if some sets are used in a process

---
 arkindex/training/api.py                     | 14 +++++----
 arkindex/training/tests/test_datasets_api.py | 30 ++++++++++++++++----
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/arkindex/training/api.py b/arkindex/training/api.py
index 46989ec6d6..ac8ca724ea 100644
--- a/arkindex/training/api.py
+++ b/arkindex/training/api.py
@@ -22,6 +22,7 @@ from rest_framework.generics import (
 from rest_framework.response import Response
 
 from arkindex.documents.models import Corpus, Element
+from arkindex.process.models import ProcessDatasetSet
 from arkindex.project.mixins import ACLMixin, CorpusACLMixin, TrainingModelMixin
 from arkindex.project.pagination import CountCursorPagination
 from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly
@@ -677,6 +678,8 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView):
             """
             Delete a dataset. Only datasets in an `open` state can be deleted.
 
+            A dataset cannot be deleted if one or more of its sets is selected in a process.
+
             Requires an **admin** access to the dataset's corpus.
             """
         )
@@ -696,13 +699,14 @@ class DatasetUpdate(ACLMixin, RetrieveUpdateDestroyAPIView):
 
         if request.method in permissions.SAFE_METHODS:
             return
-        role = Role.Contributor
-        if request.method == "DELETE":
-            role = Role.Admin
-            if obj.state != DatasetState.Open:
-                raise PermissionDenied(detail="Only datasets in an open state can be deleted.")
+        role = Role.Admin if request.method == "DELETE" else Role.Contributor
         if not self.has_access(obj.corpus, role.value):
             raise PermissionDenied(detail=f"You do not have {str(role).lower()} access to corpus {obj.corpus.name}.")
+        if self.request.method == "DELETE":
+            if obj.state != DatasetState.Open:
+                raise ValidationError(detail="Only datasets in an open state can be deleted.")
+            if ProcessDatasetSet.objects.filter(set__in=obj.sets.all()).exists():
+                raise ValidationError(detail="This dataset cannot be deleted because at least one of its sets is used in a process.")
 
         # Prevent editing anything on a complete dataset
         if obj.state == DatasetState.Complete:
diff --git a/arkindex/training/tests/test_datasets_api.py b/arkindex/training/tests/test_datasets_api.py
index 2a4e11a32f..1d388e7294 100644
--- a/arkindex/training/tests/test_datasets_api.py
+++ b/arkindex/training/tests/test_datasets_api.py
@@ -1062,9 +1062,21 @@ class TestDatasetsAPI(FixtureAPITestCase):
             self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
         self.assertDictEqual(response.json(), {"detail": "Not found."})
 
+    def test_delete_dataset_in_process_forbidden(self):
+        self.client.force_login(self.user)
+        with self.assertNumQueries(4):
+            response = self.client.delete(
+                reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}),
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), ["This dataset cannot be deleted because at least one of its sets is used in a process."])
+        self.dataset.refresh_from_db()
+
     def test_delete(self):
         self.client.force_login(self.user)
-        with self.assertNumQueries(8):
+        # Remove dataset sets from process
+        ProcessDatasetSet.objects.filter(process_id=self.process.id, set__dataset_id=self.dataset.id).delete()
+        with self.assertNumQueries(7):
             response = self.client.delete(
                 reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}),
             )
@@ -1072,8 +1084,9 @@ class TestDatasetsAPI(FixtureAPITestCase):
         with self.assertRaises(Dataset.DoesNotExist):
             self.dataset.refresh_from_db()
 
-    def test_delete_not_open(self):
-        self.client.force_login(self.user)
+    @patch("arkindex.project.mixins.has_access", return_value=True)
+    def test_delete_not_open(self, has_access_mock):
+        self.client.force_login(self.write_user)
         cases = [DatasetState.Building, DatasetState.Complete, DatasetState.Error]
         for state in cases:
             with self.subTest(state=state):
@@ -1083,8 +1096,11 @@ class TestDatasetsAPI(FixtureAPITestCase):
                     response = self.client.delete(
                         reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}),
                     )
-                    self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
-                self.assertDictEqual(response.json(), {"detail": "Only datasets in an open state can be deleted."})
+                    self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+                self.assertEqual(response.json(), ["Only datasets in an open state can be deleted."])
+            self.assertEqual(has_access_mock.call_count, 1)
+            self.assertEqual(has_access_mock.call_args, call(self.write_user, self.corpus, Role.Admin.value, skip_public=False))
+            has_access_mock.reset_mock()
 
     def test_delete_elements(self):
         """
@@ -1096,9 +1112,11 @@ class TestDatasetsAPI(FixtureAPITestCase):
         train_set.set_elements.create(element_id=self.page1.id, set="training")
         validation_set.set_elements.create(element_id=self.page2.id, set="validation")
         validation_set.set_elements.create(element_id=self.page3.id, set="validation")
+        # Remove dataset sets from process
+        ProcessDatasetSet.objects.filter(process_id=self.process.id, set__dataset_id=self.dataset.id).delete()
         self.client.force_login(self.user)
 
-        with self.assertNumQueries(8):
+        with self.assertNumQueries(7):
             response = self.client.delete(
                 reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}),
             )
-- 
GitLab