diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index 3663810f0eb63401f679fe7a9f7496527487c747..6c40298fa46f915b41c1f1e3331e1dd1a9c47251 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -86,7 +86,6 @@ from arkindex.dataimport.serializers.workers import ( WorkerVersionEditSerializer, WorkerVersionSerializer, ) -from arkindex.dataimport.utils import hash_object from arkindex.documents.models import Corpus, Element from arkindex.project.aws import get_ingest_resource from arkindex.project.fields import ArrayRemove @@ -1074,7 +1073,7 @@ class WorkerRetrieve(WorkerACLMixin, RetrieveAPIView): status_codes=['400'], response_only=True, name="config-exists", - value={'id': "01fd00d9-1020-4d84-85f2-d0785e2c5b4a", 'configuration': ["A worker configuration with this configuration already exists for this worker."]}, + value={'id': ["01fd00d9-1020-4d84-85f2-d0785e2c5b4a"], 'configuration': ["A worker configuration with this configuration already exists for this worker."]}, description="An error where a worker configuration with this configuration already exists, including the existing worker configuration's UUID." )] ), @@ -1084,19 +1083,26 @@ class WorkerConfigurationList(WorkerACLMixin, ListCreateAPIView): serializer_class = WorkerConfigurationListSerializer queryset = WorkerConfiguration.objects.none() - def get_object(self): - return get_object_or_404( + @cached_property + def worker(self): + worker = get_object_or_404( Worker.objects.select_related('repository'), pk=self.kwargs['pk'] ) - - def get_queryset(self): - worker = self.get_object() - - if not self.has_read_access(worker): + if self.request.method in permissions.SAFE_METHODS and not self.has_read_access(worker): raise PermissionDenied(detail='You do not have a guest access to this worker.') + if self.request.method not in permissions.SAFE_METHODS and not self.has_execution_access(worker): + raise PermissionDenied(detail='You do not have contributor access to this worker.') + return worker + + def get_serializer_context(self): + context = super().get_serializer_context() + if 'pk' in self.kwargs: + context['worker'] = self.worker + return context - return worker.configurations.order_by('name') + def get_queryset(self): + return self.worker.configurations.order_by('name') def filter_queryset(self, queryset): # Consider any value that does not look like a false, including the empty string (`?archived` alone), @@ -1104,52 +1110,6 @@ class WorkerConfigurationList(WorkerACLMixin, ListCreateAPIView): archived = self.request.query_params.get('archived', 'false').lower() not in ('false', '0') return queryset.filter(archived=archived) - def create(self, request, *args, **kwargs): - worker = self.get_object() - - if not self.has_execution_access(worker): - raise PermissionDenied(detail='You do not have contributor access to this worker.') - - serializer = self.get_serializer(data=request.data) - serializer.is_valid(raise_exception=True) - - # Check unique constraints - # TODO: Move all of this into the serializer itself; overriding CreateAPIView.create is a bad practice - name = serializer.validated_data['name'] - configuration = serializer.validated_data['configuration'] - configuration_hash = hash_object(configuration) - existing_configurations = list(worker.configurations.filter(Q(configuration_hash=configuration_hash) | Q(name=name))) - errors = defaultdict(list) - for config in existing_configurations: - if configuration_hash == config.configuration_hash: - errors['configuration'].append("A worker configuration with this configuration already exists for this worker.") - errors['id'] = config.id - if name == config.name: - errors['name'].append("A worker configuration with this name already exists for this worker.") - # Due to the defaultdict, writing e.g. len(errors['configuration']) > 1 would add {'configuration': []} to - # the errors dict if the key is not already in the dict. - if len(errors.get('name', [])) > 1 or len(errors.get('configuration', [])) > 1: - # If there is more than one configuration with the same name and/or configuration as the one the user - # is trying to create, raise a MultipleObjectsReturned exception because this shouldn't be possible - # due to the unicity constraints. There can however be more than one worker configurations in - # existing_configurations, if both a worker configuration with the same name and the same configuration - # exist; but checking the length of existing_configurations wouldn't differentiate between this case - # and an actual duplicated worker configuration. - raise WorkerConfiguration.MultipleObjectsReturned - if errors: - raise ValidationError(errors) - - archived = serializer.validated_data.get('archived', False) - - worker_configuration = WorkerConfiguration.objects.create( - worker=worker, - name=name, - configuration=configuration, - archived=archived, - ) - - return Response(WorkerConfigurationSerializer(worker_configuration).data, status=status.HTTP_201_CREATED) - @extend_schema_view( get=extend_schema(description=dedent( diff --git a/arkindex/dataimport/serializers/workers.py b/arkindex/dataimport/serializers/workers.py index f0a2c76396b4f7d20a9d34f8b92bbef7983bc2f3..2888f6cca9ff8091ed71172f8a78498bdf061cef 100644 --- a/arkindex/dataimport/serializers/workers.py +++ b/arkindex/dataimport/serializers/workers.py @@ -2,6 +2,7 @@ import urllib from collections import defaultdict from enum import Enum +from django.db.models import Q from drf_spectacular.utils import extend_schema_field from rest_framework import serializers from rest_framework.exceptions import ValidationError @@ -20,6 +21,7 @@ from arkindex.dataimport.models import ( WorkerVersionState, ) from arkindex.dataimport.serializers.git import RevisionWithRefsSerializer +from arkindex.dataimport.utils import hash_object from arkindex.project.serializer_fields import EnumField @@ -312,9 +314,44 @@ class WorkerConfigurationListSerializer(serializers.ModelSerializer): 'id', 'name', 'configuration', - 'archived', + 'archived' ) + def validate(self, data): + # Check unique constraints + name = data.get('name') + configuration = data.get('configuration') + configuration_hash = hash_object(configuration) + data['archived'] = data.get('archived', False) + worker = self.context.get('worker') + # Since WorkerConfigurationSerializer is built from this serializer, this validation function can be triggered by a call + # to the WorkerConfigurationRetrieve API endpoint, updating an existing configuration. In this case, there is no worker + # cached property and this validation is not necessary anyway because the name and configuration fields are read_only. + if not worker: + return data + data['worker_id'] = worker.id + existing_configurations = list(worker.configurations.filter(Q(configuration_hash=configuration_hash) | Q(name=name))) + errors = defaultdict(list) + for config in existing_configurations: + if configuration_hash == config.configuration_hash: + errors['configuration'].append("A worker configuration with this configuration already exists for this worker.") + errors['id'] = config.id + if name == config.name: + errors['name'].append("A worker configuration with this name already exists for this worker.") + # Due to the defaultdict, writing e.g. len(errors['configuration']) > 1 would add {'configuration': []} to + # the errors dict if the key is not already in the dict. + if len(errors.get('name', [])) > 1 or len(errors.get('configuration', [])) > 1: + # If there is more than one configuration with the same name and/or configuration as the one the user + # is trying to create, raise a MultipleObjectsReturned exception because this shouldn't be possible + # due to the unicity constraints. There can however be more than one worker configurations in + # existing_configurations, if both a worker configuration with the same name and the same configuration + # exist; but checking the length of existing_configurations wouldn't differentiate between this case + # and an actual duplicated worker configuration. + raise WorkerConfiguration.MultipleObjectsReturned + if errors: + raise ValidationError(errors) + return data + class WorkerConfigurationSerializer(WorkerConfigurationListSerializer): configuration = serializers.DictField(allow_empty=False, read_only=True) diff --git a/arkindex/dataimport/tests/test_worker_configurations.py b/arkindex/dataimport/tests/test_worker_configurations.py index ca363911d3db5873f12d5fb3c61635b1a88d78bc..3503fd2f1cd21dfe00f4836fbd9afd0d936cc002 100644 --- a/arkindex/dataimport/tests/test_worker_configurations.py +++ b/arkindex/dataimport/tests/test_worker_configurations.py @@ -210,7 +210,7 @@ class TestWorkerConfigurations(FixtureAPITestCase): response.json(), { 'configuration': ['A worker configuration with this configuration already exists for this worker.'], - 'id': str(test_config.id), + 'id': [str(test_config.id)], }, ) @@ -230,7 +230,7 @@ class TestWorkerConfigurations(FixtureAPITestCase): response.json(), { 'configuration': ['A worker configuration with this configuration already exists for this worker.'], - 'id': str(test_config.id), + 'id': [str(test_config.id)], 'name': ['A worker configuration with this name already exists for this worker.'] } ) @@ -253,7 +253,7 @@ class TestWorkerConfigurations(FixtureAPITestCase): response.json(), { 'configuration': ['A worker configuration with this configuration already exists for this worker.'], - 'id': str(test_config_2.id), + 'id': [str(test_config_2.id)], 'name': ['A worker configuration with this name already exists for this worker.'] } )