Skip to content
Snippets Groups Projects
test_dataset_sets_api.py 25.72 KiB
from unittest.mock import call, patch

from django.urls import reverse
from rest_framework import status

from arkindex.documents.models import Corpus
from arkindex.process.models import Process, ProcessMode
from arkindex.project.tests import FixtureAPITestCase
from arkindex.training.models import Dataset, DatasetSet, DatasetState
from arkindex.users.models import Role, User


class TestDatasetSetsAPI(FixtureAPITestCase):
    """
    Test dataset sets API endpoints
    """
    @classmethod
    def setUpTestData(cls):
        super().setUpTestData()
        cls.page = cls.corpus.elements.get(name="Volume 1, page 1r")
        cls.line = cls.corpus.elements.filter(type__slug="text_line").first()
        cls.dataset = Dataset.objects.first()
        private_user = User.objects.create(email="ikarigendou@mail.com", display_name="Commander Ikari", verified_email=True)
        cls.private_corpus = Corpus.objects.create(name="Private Project")
        cls.private_dataset = Dataset.objects.create(corpus=cls.private_corpus, name="Private Dataset", creator=private_user)
        cls.train_set = cls.dataset.sets.get(name="training")
        cls.train_set.set_elements.create(element=cls.page)
        cls.train_set.set_elements.create(element=cls.line)
        cls.dataset_process = Process.objects.create(
            creator=cls.user,
            corpus=cls.corpus,
            mode=ProcessMode.Files,
        )
        cls.dataset_process.process_sets.create(set=cls.train_set)

    # CreateDatasetSet

    def test_create_requires_login(self):
        with self.assertNumQueries(0):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})

    def test_create_requires_verified(self):
        user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
        self.client.force_login(user)
        with self.assertNumQueries(2):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})

    def test_create_dataset_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_create_no_read_access(self, filter_rights_mock):
        filter_rights_mock.return_value = Corpus.objects.filter(pk=self.corpus.pk)
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": str(self.private_dataset.pk)}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

        self.assertEqual(filter_rights_mock.call_count, 1)
        self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_create_requires_contributor_access(self, filter_rights_mock):
        filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.corpus.pk)]
        self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
        self.client.force_login(self.user)
        with self.assertNumQueries(5):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": self.private_dataset.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

        self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})

        self.assertEqual(filter_rights_mock.call_count, 2)
        self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])

    def test_create_requires_open_dataset(self):
        self.client.force_login(self.user)

        for state in set(DatasetState) - {DatasetState.Open}:
            with self.subTest(state=state):
                self.dataset.state = state
                self.dataset.save()

                with self.assertNumQueries(5):
                    response = self.client.post(
                        reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
                        data={"name": "Unit-00"},
                        format="json"
                    )
                    self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

                self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])

    def test_create_duplicate_set_name(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(5):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
                data={"name": "training"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

        self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})

    def test_create(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(6):
            response = self.client.post(
                reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        created_set = DatasetSet.objects.get(dataset=self.dataset, name="Unit-00")
        self.assertDictEqual(response.json(), {
            "id": str(created_set.id),
            "name": "Unit-00"
        })

    # UpdateDatasetSet

    def test_update_requires_login(self):
        with self.assertNumQueries(0):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})

    def test_update_requires_verified(self):
        user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
        self.client.force_login(user)
        with self.assertNumQueries(2):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})

    def test_update_dataset_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    def test_update_set_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(6):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_update_no_read_access(self, filter_rights_mock):
        filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

        self.assertEqual(filter_rights_mock.call_count, 1)
        self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_update_requires_contributor_access(self, filter_rights_mock):
        filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
        self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
        self.client.force_login(self.user)
        with self.assertNumQueries(5):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

        self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})

        self.assertEqual(filter_rights_mock.call_count, 2)
        self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])

    def test_update_requires_open_dataset(self):
        self.client.force_login(self.user)

        for state in set(DatasetState) - {DatasetState.Open}:
            with self.subTest(state=state):
                self.dataset.state = state
                self.dataset.save()

                with self.assertNumQueries(5):
                    response = self.client.put(
                        reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                        data={"name": "Unit-00"},
                        format="json"
                    )
                    self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

                self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])

    def test_update_duplicate_set_name(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(6):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "validation"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

        self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})

    def test_update(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(7):
            response = self.client.put(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_200_OK)

        self.assertDictEqual(response.json(), {
            "id": str(self.train_set.id),
            "name": "Unit-00"
        })
        self.train_set.refresh_from_db()
        self.assertEqual(self.train_set.name, "Unit-00")

    # PartialUpdateDatasetSet

    def test_partial_update_requires_login(self):
        with self.assertNumQueries(0):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})

    def test_partial_update_requires_verified(self):
        user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
        self.client.force_login(user)
        with self.assertNumQueries(2):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})

    def test_partial_update_dataset_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    def test_partial_update_set_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(6):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_partial_update_no_read_access(self, filter_rights_mock):
        filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

        self.assertEqual(filter_rights_mock.call_count, 1)
        self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_partial_update_requires_contributor_access(self, filter_rights_mock):
        filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
        self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
        self.client.force_login(self.user)
        with self.assertNumQueries(5):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

        self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})

        self.assertEqual(filter_rights_mock.call_count, 2)
        self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])

    def test_partial_update_requires_open_dataset(self):
        self.client.force_login(self.user)

        for state in set(DatasetState) - {DatasetState.Open}:
            with self.subTest(state=state):
                self.dataset.state = state
                self.dataset.save()

                with self.assertNumQueries(5):
                    response = self.client.patch(
                        reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                        data={"name": "Unit-00"},
                        format="json"
                    )
                    self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

                self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])

    def test_partial_update_duplicate_set_name(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(6):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "validation"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

        self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})

    def test_partial_update(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(7):
            response = self.client.patch(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                data={"name": "Unit-00"},
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_200_OK)

        self.assertDictEqual(response.json(), {
            "id": str(self.train_set.id),
            "name": "Unit-00"
        })
        self.train_set.refresh_from_db()
        self.assertEqual(self.train_set.name, "Unit-00")

    # DestroyDatasetSet

    def test_destroy_requires_login(self):
        with self.assertNumQueries(0):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})

    def test_destroy_requires_verified(self):
        user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
        self.client.force_login(user)
        with self.assertNumQueries(2):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})

    def test_destroy_dataset_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    def test_destroy_set_doesnt_exist(self):
        self.client.force_login(self.user)
        with self.assertNumQueries(6):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_destroy_no_read_access(self, filter_rights_mock):
        filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
        self.client.force_login(self.user)
        with self.assertNumQueries(3):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
        self.assertDictEqual(response.json(), {"detail": "Not found."})

        self.assertEqual(filter_rights_mock.call_count, 1)
        self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))

    @patch("arkindex.users.managers.BaseACLManager.filter_rights")
    def test_destroy_requires_admin_access(self, filter_rights_mock):
        for role in [Role.Guest, Role.Contributor]:
            filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
            with self.subTest(role=role):
                filter_rights_mock.reset_mock()
                self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": role.value})
                self.client.force_login(self.user)
                with self.assertNumQueries(5):
                    response = self.client.delete(
                        reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                        format="json"
                    )
                    self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

                self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this dataset."})

                self.assertEqual(filter_rights_mock.call_count, 2)
                self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Admin.value)])

    def test_destroy_requires_open_dataset(self):
        self.client.force_login(self.user)

        for state in set(DatasetState) - {DatasetState.Open}:
            with self.subTest(state=state):
                self.dataset.state = state
                self.dataset.save()

                with self.assertNumQueries(5):
                    response = self.client.delete(
                        reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                        format="json"
                    )
                    self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)

                self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])

    def test_destroy_set_in_process_forbidden(self):
        self.client.force_login(self.user)

        with self.assertNumQueries(7):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertEqual(response.json(), ["This dataset set is selected in a process and cannot be deleted."])

        # Check that self.train_set still exists
        self.train_set.refresh_from_db()

    def test_destroy_only_set_forbidden(self):
        self.client.force_login(self.user)
        test_dataset = self.corpus.datasets.create(name="Tokyo 3", description="第3新東京市", creator=self.user)
        test_set = test_dataset.sets.create(name="NERV HQ")

        with self.assertNumQueries(7):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": test_dataset.pk, "set": test_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertEqual(response.json(), ["This dataset set is the only one in its dataset and cannot be deleted."])

        # Check that self.train_set still exists
        test_set.refresh_from_db()

    def test_destroy(self):
        self.client.force_login(self.user)
        # Remove train_set from dataset_process
        self.dataset_process.process_sets.all().delete()
        with self.assertNumQueries(9):
            response = self.client.delete(
                reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
                format="json"
            )
            self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)

        with self.assertRaises(DatasetSet.DoesNotExist):
            self.train_set.refresh_from_db()