From ae38fc35040cf84ebb7f371422cd43090a7d2d9d Mon Sep 17 00:00:00 2001 From: ml bonhomme <bonhomme@teklia.com> Date: Thu, 28 Mar 2024 19:07:54 +0000 Subject: [PATCH] Dataset sets management API endpoints --- arkindex/project/api_v1.py | 4 + arkindex/training/api.py | 117 +++- arkindex/training/serializers.py | 13 +- .../training/tests/test_dataset_sets_api.py | 547 ++++++++++++++++++ 4 files changed, 670 insertions(+), 11 deletions(-) create mode 100644 arkindex/training/tests/test_dataset_sets_api.py diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 87433cb65a..70684fd8de 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -108,6 +108,8 @@ from arkindex.training.api import ( DatasetClone, DatasetElementDestroy, DatasetElements, + DatasetSetCreate, + DatasetSets, DatasetUpdate, ElementDatasetSets, MetricValueBulkCreate, @@ -189,6 +191,8 @@ api = [ path("datasets/<uuid:pk>/clone/", DatasetClone.as_view(), name="dataset-clone"), path("datasets/<uuid:pk>/elements/", DatasetElements.as_view(), name="dataset-elements"), path("datasets/<uuid:dataset>/elements/<uuid:element>/", DatasetElementDestroy.as_view(), name="dataset-element"), + path("datasets/<uuid:pk>/sets/", DatasetSetCreate.as_view(), name="dataset-sets"), + path("datasets/<uuid:dataset>/sets/<uuid:set>/", DatasetSets.as_view(), name="dataset-set"), # Moderation path("classifications/", ClassificationCreate.as_view(), name="classification-create"), diff --git a/arkindex/training/api.py b/arkindex/training/api.py index d8e952f157..66b6d00a37 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -42,6 +42,7 @@ from arkindex.training.serializers import ( DatasetElementInfoSerializer, DatasetElementSerializer, DatasetSerializer, + DatasetSetSerializer, ElementDatasetSetSerializer, MetricValueBulkSerializer, MetricValueCreateSerializer, @@ -656,11 +657,6 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView): """ Update a dataset. The dataset must not be in `complete` state. - The sets array can only be updated following those cases: - * Adding or removing sets, nothing specific is done. - * Updating a single set within the array, - all elements linked to the previous set will be moved to the new one. - Requires a **contributor** access to the dataset's corpus. """ ), @@ -670,11 +666,6 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView): """ Partially update a dataset. The dataset must not be in `complete` state. - The sets array can only be updated following those cases: - * Adding or removing sets, nothing specific is done. - * Updating a single set within the array, - all elements linked to the previous set will be moved to the new one. - Requires a **contributor** access to the dataset's corpus. """ ) @@ -747,6 +738,112 @@ class DatasetElementCursorPagination(CountCursorPagination): ordering = ("element_id", "id") +class DatasetSetBase(): + permission_classes = (IsVerified, ) + serializer_class = DatasetSetSerializer + + @cached_property + def dataset(self): + dataset_id = self.kwargs["pk"] if self.request.method == "POST" else self.kwargs["dataset"] + dataset = get_object_or_404( + Dataset.objects.filter(corpus__in=Corpus.objects.readable(self.request.user)) + .prefetch_related("sets") + .select_related("corpus"), + pk=dataset_id + ) + if self.request.method == "DELETE" and not Corpus.objects.admin(self.request.user).filter(pk=dataset.corpus_id).exists(): + raise PermissionDenied(detail="You do not have admin access to this dataset.") + elif self.request.method != "DELETE" and not Corpus.objects.writable(self.request.user).filter(pk=dataset.corpus_id).exists(): + raise PermissionDenied(detail="You do not have contributor access to this dataset.") + if dataset.state != DatasetState.Open: + raise ValidationError(detail="You can only add or update sets from a dataset in an open state.") + return dataset + + def get_serializer_context(self): + context = super().get_serializer_context() + # Ignore this step when generating the schema with OpenAPI + if not self.kwargs: + return context + context["dataset"] = self.dataset + return context + + +@extend_schema(tags=["datasets"]) +@extend_schema_view( + post=extend_schema( + operation_id="CreateDatasetSet", + description=dedent( + """ + Sets can only be created in **open** datasets. + + Requires **contributor** access to the dataset's corpus. + """ + ) + ), +) +class DatasetSetCreate(DatasetSetBase, CreateAPIView): + permission_classes = (IsVerified, ) + + +@extend_schema(tags=["datasets"]) +@extend_schema_view( + delete=extend_schema( + operation_id="DestroyDatasetSet", + description=dedent( + """ + Delete a set from a dataset. Sets can only be deleted from **open** datasets. + + Requires **admin** access to the dataset's corpus. + + It is not possible to delete a dataset set if that set is selected in a process, or + if it is the only set in the dataset. + """ + ) + ), + put=extend_schema( + operation_id="UpdateDatasetSet", + description=dedent( + """ + Sets can only be updated if the dataset is in the **open** state. + + Requires **contributor** access to the dataset's corpus. + """ + ) + ), + patch=extend_schema( + operation_id="PartialUpdateDatasetSet", + description=dedent( + """ + Sets can only be updated if the dataset is in the **open** state. + + Requires **contributor** access to the dataset's corpus. + """ + ) + ) +) +class DatasetSets(DatasetSetBase, RetrieveUpdateDestroyAPIView): + lookup_url_kwarg = "set" + queryset = DatasetSet.objects.none() + + def get_queryset(self): + return self.dataset.sets.all() + + def check_object_permissions(self, request, obj): + super().check_object_permissions(request, obj) + if request.method == "DELETE": + if ProcessDatasetSet.objects.filter(set_id=obj.id).exists(): + raise ValidationError("This dataset set is selected in a process and cannot be deleted.") + if self.dataset.sets.count() == 1: + raise ValidationError("This dataset set is the only one in its dataset and cannot be deleted.") + + def destroy(self, request, *args, **kwargs): + instance = self.get_object() + # Delete the dataset set's DatasetElement objects + DatasetElement.objects.filter(set_id=instance.id).delete() + instance.delete() + return Response(status=status.HTTP_204_NO_CONTENT) + + @extend_schema(tags=["datasets"]) @extend_schema_view( get=extend_schema( diff --git a/arkindex/training/serializers.py b/arkindex/training/serializers.py index f6b3831216..ce877bff5f 100644 --- a/arkindex/training/serializers.py +++ b/arkindex/training/serializers.py @@ -482,9 +482,20 @@ class MetricValueBulkSerializer(serializers.Serializer): class DatasetSetSerializer(serializers.ModelSerializer): + dataset = serializers.HiddenField( + default=_dataset_from_context, + write_only=True, + ) + class Meta: model = DatasetSet - fields = ("id", "name",) + fields = ("id", "name", "dataset", ) + read_only_fields = ("id", ) + + def validate_name(self, name): + if any(ds.name == name for ds in self.context["dataset"].sets.all()): + raise ValidationError("A set with this name already exists in this dataset.") + return name class DatasetSerializer(serializers.ModelSerializer): diff --git a/arkindex/training/tests/test_dataset_sets_api.py b/arkindex/training/tests/test_dataset_sets_api.py new file mode 100644 index 0000000000..17f43037e4 --- /dev/null +++ b/arkindex/training/tests/test_dataset_sets_api.py @@ -0,0 +1,547 @@ +from unittest.mock import call, patch + +from django.urls import reverse +from rest_framework import status + +from arkindex.documents.models import Corpus +from arkindex.process.models import Process, ProcessMode +from arkindex.project.tests import FixtureAPITestCase +from arkindex.training.models import Dataset, DatasetSet, DatasetState +from arkindex.users.models import Role, User + + +class TestDatasetSetsAPI(FixtureAPITestCase): + """ + Test dataset sets API endpoints + """ + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.page = cls.corpus.elements.get(name="Volume 1, page 1r") + cls.line = cls.corpus.elements.filter(type__slug="text_line").first() + cls.dataset = Dataset.objects.first() + private_user = User.objects.create(email="ikarigendou@mail.com", display_name="Commander Ikari", verified_email=True) + cls.private_corpus = Corpus.objects.create(name="Private Project") + cls.private_dataset = Dataset.objects.create(corpus=cls.private_corpus, name="Private Dataset", creator=private_user) + cls.train_set = cls.dataset.sets.get(name="training") + cls.train_set.set_elements.create(element=cls.page) + cls.train_set.set_elements.create(element=cls.line) + cls.dataset_process = Process.objects.create( + creator=cls.user, + corpus=cls.corpus, + mode=ProcessMode.Files, + ) + cls.dataset_process.process_sets.create(set=cls.train_set) + + # CreateDatasetSet + + def test_create_requires_login(self): + with self.assertNumQueries(0): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."}) + + def test_create_requires_verified(self): + user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False) + self.client.force_login(user) + with self.assertNumQueries(2): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."}) + + def test_create_dataset_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_create_no_read_access(self, filter_rights_mock): + filter_rights_mock.return_value = Corpus.objects.filter(pk=self.corpus.pk) + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": str(self.private_dataset.pk)}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + self.assertEqual(filter_rights_mock.call_count, 1) + self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value)) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_create_requires_contributor_access(self, filter_rights_mock): + filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.corpus.pk)] + self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value}) + self.client.force_login(self.user) + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.private_dataset.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."}) + + self.assertEqual(filter_rights_mock.call_count, 2) + self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)]) + + def test_create_requires_open_dataset(self): + self.client.force_login(self.user) + + for state in set(DatasetState) - {DatasetState.Open}: + with self.subTest(state=state): + self.dataset.state = state + self.dataset.save() + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."]) + + def test_create_duplicate_set_name(self): + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}), + data={"name": "training"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]}) + + def test_create(self): + self.client.force_login(self.user) + + with self.assertNumQueries(6): + response = self.client.post( + reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + created_set = DatasetSet.objects.get(dataset=self.dataset, name="Unit-00") + self.assertDictEqual(response.json(), { + "id": str(created_set.id), + "name": "Unit-00" + }) + + # UpdateDatasetSet + + def test_update_requires_login(self): + with self.assertNumQueries(0): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."}) + + def test_update_requires_verified(self): + user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False) + self.client.force_login(user) + with self.assertNumQueries(2): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."}) + + def test_update_dataset_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + def test_update_set_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_update_no_read_access(self, filter_rights_mock): + filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk) + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + self.assertEqual(filter_rights_mock.call_count, 1) + self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value)) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_update_requires_contributor_access(self, filter_rights_mock): + filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)] + self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value}) + self.client.force_login(self.user) + with self.assertNumQueries(5): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."}) + + self.assertEqual(filter_rights_mock.call_count, 2) + self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)]) + + def test_update_requires_open_dataset(self): + self.client.force_login(self.user) + + for state in set(DatasetState) - {DatasetState.Open}: + with self.subTest(state=state): + self.dataset.state = state + self.dataset.save() + + with self.assertNumQueries(5): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."]) + + def test_update_duplicate_set_name(self): + self.client.force_login(self.user) + + with self.assertNumQueries(6): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "validation"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]}) + + def test_update(self): + self.client.force_login(self.user) + + with self.assertNumQueries(7): + response = self.client.put( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + self.assertDictEqual(response.json(), { + "id": str(self.train_set.id), + "name": "Unit-00" + }) + self.train_set.refresh_from_db() + self.assertEqual(self.train_set.name, "Unit-00") + + # PartialUpdateDatasetSet + + def test_partial_update_requires_login(self): + with self.assertNumQueries(0): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."}) + + def test_partial_update_requires_verified(self): + user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False) + self.client.force_login(user) + with self.assertNumQueries(2): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."}) + + def test_partial_update_dataset_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + def test_partial_update_set_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_partial_update_no_read_access(self, filter_rights_mock): + filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk) + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + self.assertEqual(filter_rights_mock.call_count, 1) + self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value)) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_partial_update_requires_contributor_access(self, filter_rights_mock): + filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)] + self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value}) + self.client.force_login(self.user) + with self.assertNumQueries(5): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."}) + + self.assertEqual(filter_rights_mock.call_count, 2) + self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)]) + + def test_partial_update_requires_open_dataset(self): + self.client.force_login(self.user) + + for state in set(DatasetState) - {DatasetState.Open}: + with self.subTest(state=state): + self.dataset.state = state + self.dataset.save() + + with self.assertNumQueries(5): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."]) + + def test_partial_update_duplicate_set_name(self): + self.client.force_login(self.user) + + with self.assertNumQueries(6): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "validation"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]}) + + def test_partial_update(self): + self.client.force_login(self.user) + + with self.assertNumQueries(7): + response = self.client.patch( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + data={"name": "Unit-00"}, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + self.assertDictEqual(response.json(), { + "id": str(self.train_set.id), + "name": "Unit-00" + }) + self.train_set.refresh_from_db() + self.assertEqual(self.train_set.name, "Unit-00") + + # DestroyDatasetSet + + def test_destroy_requires_login(self): + with self.assertNumQueries(0): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."}) + + def test_destroy_requires_verified(self): + user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False) + self.client.force_login(user) + with self.assertNumQueries(2): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."}) + + def test_destroy_dataset_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + def test_destroy_set_doesnt_exist(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_destroy_no_read_access(self, filter_rights_mock): + filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk) + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "Not found."}) + + self.assertEqual(filter_rights_mock.call_count, 1) + self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value)) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_destroy_requires_admin_access(self, filter_rights_mock): + for role in [Role.Guest, Role.Contributor]: + filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)] + with self.subTest(role=role): + filter_rights_mock.reset_mock() + self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": role.value}) + self.client.force_login(self.user) + with self.assertNumQueries(5): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this dataset."}) + + self.assertEqual(filter_rights_mock.call_count, 2) + self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Admin.value)]) + + def test_destroy_requires_open_dataset(self): + self.client.force_login(self.user) + + for state in set(DatasetState) - {DatasetState.Open}: + with self.subTest(state=state): + self.dataset.state = state + self.dataset.save() + + with self.assertNumQueries(5): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."]) + + def test_destroy_set_in_process_forbidden(self): + self.client.force_login(self.user) + + with self.assertNumQueries(7): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), ["This dataset set is selected in a process and cannot be deleted."]) + + # Check that self.train_set still exists + self.train_set.refresh_from_db() + + def test_destroy_only_set_forbidden(self): + self.client.force_login(self.user) + test_dataset = self.corpus.datasets.create(name="Tokyo 3", description="第3新æ±äº¬å¸‚", creator=self.user) + test_set = test_dataset.sets.create(name="NERV HQ") + + with self.assertNumQueries(7): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": test_dataset.pk, "set": test_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), ["This dataset set is the only one in its dataset and cannot be deleted."]) + + # Check that self.train_set still exists + test_set.refresh_from_db() + + def test_destroy(self): + self.client.force_login(self.user) + # Remove train_set from dataset_process + self.dataset_process.process_sets.all().delete() + with self.assertNumQueries(9): + response = self.client.delete( + reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}), + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + with self.assertRaises(DatasetSet.DoesNotExist): + self.train_set.refresh_from_db() -- GitLab