Skip to content
Snippets Groups Projects

Dataset sets management API endpoints

Merged ml bonhomme requested to merge sets-mgmt-api into master
All threads resolved!
Files
2
from unittest.mock import call, patch
from django.urls import reverse
from rest_framework import status
from arkindex.documents.models import Corpus
from arkindex.process.models import Process, ProcessMode
from arkindex.project.tests import FixtureAPITestCase
from arkindex.training.models import Dataset, DatasetSet, DatasetState
from arkindex.users.models import Role, User
class TestDatasetSetsAPI(FixtureAPITestCase):
"""
Test dataset sets API endpoints
"""
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.page = cls.corpus.elements.get(name="Volume 1, page 1r")
cls.line = cls.corpus.elements.filter(type__slug="text_line").first()
cls.dataset = Dataset.objects.first()
private_user = User.objects.create(email="ikarigendou@mail.com", display_name="Commander Ikari", verified_email=True)
cls.private_corpus = Corpus.objects.create(name="Private Project")
cls.private_dataset = Dataset.objects.create(corpus=cls.private_corpus, name="Private Dataset", creator=private_user)
cls.train_set = cls.dataset.sets.get(name="training")
cls.train_set.set_elements.create(element=cls.page)
cls.train_set.set_elements.create(element=cls.line)
cls.dataset_process = Process.objects.create(
creator=cls.user,
corpus=cls.corpus,
mode=ProcessMode.Files,
)
cls.dataset_process.process_sets.create(set=cls.train_set)
# CreateDatasetSet
def test_create_requires_login(self):
with self.assertNumQueries(0):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})
def test_create_requires_verified(self):
user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
self.client.force_login(user)
with self.assertNumQueries(2):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})
def test_create_dataset_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_create_no_read_access(self, filter_rights_mock):
filter_rights_mock.return_value = Corpus.objects.filter(pk=self.corpus.pk)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": str(self.private_dataset.pk)}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
self.assertEqual(filter_rights_mock.call_count, 1)
self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_create_requires_contributor_access(self, filter_rights_mock):
filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.corpus.pk)]
self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
self.client.force_login(self.user)
with self.assertNumQueries(5):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.private_dataset.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})
self.assertEqual(filter_rights_mock.call_count, 2)
self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])
def test_create_requires_open_dataset(self):
self.client.force_login(self.user)
for state in set(DatasetState) - {DatasetState.Open}:
with self.subTest(state=state):
self.dataset.state = state
self.dataset.save()
with self.assertNumQueries(5):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])
def test_create_duplicate_set_name(self):
self.client.force_login(self.user)
with self.assertNumQueries(5):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
data={"name": "training"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})
def test_create(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.post(
reverse("api:dataset-sets", kwargs={"pk": self.dataset.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
created_set = DatasetSet.objects.get(dataset=self.dataset, name="Unit-00")
self.assertDictEqual(response.json(), {
"id": str(created_set.id),
"name": "Unit-00"
})
# UpdateDatasetSet
def test_update_requires_login(self):
with self.assertNumQueries(0):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})
def test_update_requires_verified(self):
user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
self.client.force_login(user)
with self.assertNumQueries(2):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})
def test_update_dataset_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
def test_update_set_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_update_no_read_access(self, filter_rights_mock):
filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
self.assertEqual(filter_rights_mock.call_count, 1)
self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_update_requires_contributor_access(self, filter_rights_mock):
filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
self.client.force_login(self.user)
with self.assertNumQueries(5):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})
self.assertEqual(filter_rights_mock.call_count, 2)
self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])
def test_update_requires_open_dataset(self):
self.client.force_login(self.user)
for state in set(DatasetState) - {DatasetState.Open}:
with self.subTest(state=state):
self.dataset.state = state
self.dataset.save()
with self.assertNumQueries(5):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])
def test_update_duplicate_set_name(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "validation"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})
def test_update(self):
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.put(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(response.json(), {
"id": str(self.train_set.id),
"name": "Unit-00"
})
self.train_set.refresh_from_db()
self.assertEqual(self.train_set.name, "Unit-00")
# PartialUpdateDatasetSet
def test_partial_update_requires_login(self):
with self.assertNumQueries(0):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})
def test_partial_update_requires_verified(self):
user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
self.client.force_login(user)
with self.assertNumQueries(2):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})
def test_partial_update_dataset_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
def test_partial_update_set_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_partial_update_no_read_access(self, filter_rights_mock):
filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
self.assertEqual(filter_rights_mock.call_count, 1)
self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_partial_update_requires_contributor_access(self, filter_rights_mock):
filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": Role.Guest.value})
self.client.force_login(self.user)
with self.assertNumQueries(5):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this dataset."})
self.assertEqual(filter_rights_mock.call_count, 2)
self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Contributor.value)])
def test_partial_update_requires_open_dataset(self):
self.client.force_login(self.user)
for state in set(DatasetState) - {DatasetState.Open}:
with self.subTest(state=state):
self.dataset.state = state
self.dataset.save()
with self.assertNumQueries(5):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])
def test_partial_update_duplicate_set_name(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "validation"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {"name": ["A set with this name already exists in this dataset."]})
def test_partial_update(self):
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.patch(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
data={"name": "Unit-00"},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(response.json(), {
"id": str(self.train_set.id),
"name": "Unit-00"
})
self.train_set.refresh_from_db()
self.assertEqual(self.train_set.name, "Unit-00")
# DestroyDatasetSet
def test_destroy_requires_login(self):
with self.assertNumQueries(0):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."})
def test_destroy_requires_verified(self):
user = User.objects.create(email="not_verified@mail.com", display_name="Not Verified", verified_email=False)
self.client.force_login(user)
with self.assertNumQueries(2):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."})
def test_destroy_dataset_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
def test_destroy_set_doesnt_exist(self):
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.id, "set": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_destroy_no_read_access(self, filter_rights_mock):
filter_rights_mock.return_value = Corpus.objects.filter(pk=self.private_corpus.pk)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
self.assertDictEqual(response.json(), {"detail": "Not found."})
self.assertEqual(filter_rights_mock.call_count, 1)
self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value))
@patch("arkindex.users.managers.BaseACLManager.filter_rights")
def test_destroy_requires_admin_access(self, filter_rights_mock):
for role in [Role.Guest, Role.Contributor]:
filter_rights_mock.side_effect = [Corpus.objects.all(), Corpus.objects.filter(pk=self.private_corpus.pk)]
with self.subTest(role=role):
filter_rights_mock.reset_mock()
self.private_corpus.memberships.update_or_create(user=self.user, defaults={"level": role.value})
self.client.force_login(self.user)
with self.assertNumQueries(5):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this dataset."})
self.assertEqual(filter_rights_mock.call_count, 2)
self.assertEqual(filter_rights_mock.call_args_list, [call(self.user, Corpus, Role.Guest.value), call(self.user, Corpus, Role.Admin.value)])
def test_destroy_requires_open_dataset(self):
self.client.force_login(self.user)
for state in set(DatasetState) - {DatasetState.Open}:
with self.subTest(state=state):
self.dataset.state = state
self.dataset.save()
with self.assertNumQueries(5):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["You can only add or update sets from a dataset in an open state."])
def test_destroy_set_in_process_forbidden(self):
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["This dataset set is selected in a process and cannot be deleted."])
# Check that self.train_set still exists
self.train_set.refresh_from_db()
def test_destroy_only_set_forbidden(self):
self.client.force_login(self.user)
test_dataset = self.corpus.datasets.create(name="Tokyo 3", description="第3新東京市", creator=self.user)
test_set = test_dataset.sets.create(name="NERV HQ")
with self.assertNumQueries(7):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": test_dataset.pk, "set": test_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), ["This dataset set is the only one in its dataset and cannot be deleted."])
# Check that self.train_set still exists
test_set.refresh_from_db()
def test_destroy(self):
self.client.force_login(self.user)
# Remove train_set from dataset_process
self.dataset_process.process_sets.all().delete()
with self.assertNumQueries(9):
response = self.client.delete(
reverse("api:dataset-set", kwargs={"dataset": self.dataset.pk, "set": self.train_set.pk}),
format="json"
)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
with self.assertRaises(DatasetSet.DoesNotExist):
self.train_set.refresh_from_db()
Loading