From d45579a3c0e5722410a33038119c543c3005ab09 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Mon, 27 May 2019 10:31:39 +0000 Subject: [PATCH] Split DataImport serializers into a package --- arkindex/dataimport/api.py | 12 +- arkindex/dataimport/serializers/__init__.py | 0 arkindex/dataimport/serializers/files.py | 92 +++++++ arkindex/dataimport/serializers/git.py | 119 ++++++++++ .../imports.py} | 224 +----------------- arkindex/dataimport/serializers/ml_tool.py | 13 + arkindex/documents/serializers/elements.py | 2 +- 7 files changed, 236 insertions(+), 226 deletions(-) create mode 100644 arkindex/dataimport/serializers/__init__.py create mode 100644 arkindex/dataimport/serializers/files.py create mode 100644 arkindex/dataimport/serializers/git.py rename arkindex/dataimport/{serializers.py => serializers/imports.py} (50%) create mode 100644 arkindex/dataimport/serializers/ml_tool.py diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index 48abf86eac..4349bab8fb 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -17,12 +17,12 @@ from arkindex.project.permissions import IsVerified, IsAuthenticated, IsAdminUse from arkindex.documents.models import Corpus, Right, Element, ElementType from arkindex.dataimport.models import \ DataImport, DataFile, DataImportFailure, Repository, Event, EventType -from arkindex.dataimport.serializers import ( - DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer, - DataImportFailureSerializer, DataFileSerializer, DataFileCreateSerializer, - RepositorySerializer, RepositoryStartImportSerializer, - ExternalRepositorySerializer, EventSerializer, MLToolSerializer, -) +from arkindex.dataimport.serializers.ml_tool import MLToolSerializer +from arkindex.dataimport.serializers.files import DataFileSerializer, DataFileCreateSerializer +from arkindex.dataimport.serializers.git import \ + RepositorySerializer, RepositoryStartImportSerializer, ExternalRepositorySerializer, EventSerializer +from arkindex.dataimport.serializers.imports import \ + DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer, DataImportFailureSerializer from arkindex.users.models import OAuthCredentials from arkindex_common.ml_tool import MLTool from arkindex_common.enums import DataImportMode diff --git a/arkindex/dataimport/serializers/__init__.py b/arkindex/dataimport/serializers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/arkindex/dataimport/serializers/files.py b/arkindex/dataimport/serializers/files.py new file mode 100644 index 0000000000..056784035a --- /dev/null +++ b/arkindex/dataimport/serializers/files.py @@ -0,0 +1,92 @@ +from rest_framework import serializers +from arkindex.project.serializer_fields import EnumField +from arkindex.project.aws import S3FileStatus +from arkindex.dataimport.models import DataFile +from arkindex.images.serializers import ImageSerializer +import re + + +class DataFileSerializer(serializers.ModelSerializer): + """ + Serialize a single uploaded file + """ + + images = ImageSerializer(many=True, read_only=True) + status = EnumField(S3FileStatus) + s3_url = serializers.SerializerMethodField() + + class Meta: + model = DataFile + fields = ( + 'id', + 'name', + 'hash', + 'content_type', + 'size', + 'images', + 'status', + 's3_url', + ) + read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', ) + + def validate_status(self, value): + if value == S3FileStatus.Checked: + # Status has been resquested to be checked, perform validation + try: + self.instance.perform_check(raise_exc=True) + except (AssertionError, ValueError) as e: + raise serializers.ValidationError(str(e)) + return value + + def get_s3_url(self, obj): + if 'request' not in self.context: + return + # Only allow the S3 URL for internal users or admins + user = self.context['request'].user + if user.is_authenticated and (user.is_admin or user.is_internal): + return obj.s3_url + + +class DataFileCreateSerializer(serializers.ModelSerializer): + """ + Serialize a Datafile creation with Amazon S3 PUT uri + """ + + status = EnumField(S3FileStatus, read_only=True) + hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32) + s3_put_url = serializers.SerializerMethodField() + + class Meta: + model = DataFile + fields = ( + 'id', + 'name', + 'hash', + 'size', + 'corpus', + 'status', + 's3_url', + 's3_put_url', + ) + read_only_fields = ('id', 'status', 's3_url', 's3_put_url') + + def get_s3_put_url(self, obj): + if obj.status == S3FileStatus.Checked: + return None + return obj.s3_put_url + + def run_validation(self, data): + existing_datafile = DataFile.objects.filter(hash=data['hash']).first() + if existing_datafile: + message = { + 'hash': ['DataFile with this hash already exists'], + 'id': str(existing_datafile.id), + 'status': existing_datafile.status.value, + } + if existing_datafile.status != S3FileStatus.Checked: + message['s3_put_url'] = existing_datafile.s3_put_url + else: + message['s3_url'] = existing_datafile.s3_url + self._errors = message + raise serializers.ValidationError(message) + return super().run_validation(data) diff --git a/arkindex/dataimport/serializers/git.py b/arkindex/dataimport/serializers/git.py new file mode 100644 index 0000000000..ad1e379bde --- /dev/null +++ b/arkindex/dataimport/serializers/git.py @@ -0,0 +1,119 @@ +from rest_framework import serializers +from arkindex.project.serializer_fields import EnumField +from arkindex.documents.models import Corpus +from arkindex.dataimport.models import DataImport, Repository, Revision, Event, EventType +import gitlab.v4.objects + + +class RevisionSerializer(serializers.ModelSerializer): + """ + Serialize a repository revision + """ + + date = serializers.DateTimeField(source='created') + + class Meta: + model = Revision + fields = ( + 'id', + 'date', + 'hash', + 'ref', + 'message', + 'author', + 'commit_url', + 'repo_id', + ) + + +class RepositorySerializer(serializers.ModelSerializer): + """ + Serialize a repository + """ + + class Meta: + model = Repository + fields = ( + 'id', + 'url', + 'enabled', + 'corpus', + ) + extra_kwargs = { + 'id': {'read_only': True}, + 'url': {'read_only': True}, + 'enabled': {'read_only': True}, + } + + +class RepositoryStartImportSerializer(serializers.ModelSerializer): + """ + A serializer used by the RepositoryStartImport endpoint to return a DataImport ID. + This serializer is required to get the OpenAPI schema generation to work. + """ + + import_id = serializers.UUIDField(source='id') + + class Meta: + model = DataImport + fields = ('import_id',) + read_only_fields = ('import_id',) + + +class ExternalRepositorySerializer(serializers.Serializer): + """ + Serialize a Git repository from an external API + """ + + # Useless fields, added to prevent 500 errors when opening + # an API endpoint in the browser + id = serializers.IntegerField(min_value=0) + corpus = serializers.UUIDField() + + def to_representation(self, obj): + if isinstance(obj, gitlab.v4.objects.Project): + return { + "id": obj.id, + "name": obj.name_with_namespace, + "url": obj.web_url, + } + else: + raise NotImplementedError + + def to_internal_value(self, data): + """ + Deserializing only requires a 'id' attribute + """ + if not data.get('id'): + raise serializers.ValidationError({ + 'id': 'This field is required.' + }) + if not data.get('corpus'): + raise serializers.ValidationError({ + 'corpus': 'This field is required.' + }) + + return { + 'id': data['id'], + 'corpus': Corpus.objects.writable(self.context['request'].user) + .get(id=data['corpus']) + } + + +class EventSerializer(serializers.ModelSerializer): + """ + Serialize a diff event for an element on a revision + """ + + type = EnumField(EventType) + revision = RevisionSerializer() + date = serializers.DateTimeField(source='created') + + class Meta: + model = Event + fields = ( + 'id', + 'type', + 'date', + 'revision', + ) diff --git a/arkindex/dataimport/serializers.py b/arkindex/dataimport/serializers/imports.py similarity index 50% rename from arkindex/dataimport/serializers.py rename to arkindex/dataimport/serializers/imports.py index fc1afdfc59..20580598d5 100644 --- a/arkindex/dataimport/serializers.py +++ b/arkindex/dataimport/serializers/imports.py @@ -1,17 +1,13 @@ from rest_framework import serializers from rest_framework.utils import model_meta +from ponos.models import State +from arkindex_common.enums import DataImportMode, DataImportPDFEngine +from arkindex_common.ml_tool import MLToolType from arkindex.project.serializer_fields import EnumField, MLToolField -from arkindex.project.aws import S3FileStatus -from arkindex.dataimport.models import \ - DataImport, DataImportFailure, DataFile, Repository, Revision, Event, EventType from arkindex.documents.models import Corpus, Element, ElementType from arkindex.documents.serializers.light import ElementLightSerializer -from arkindex.images.serializers import ImageSerializer -from arkindex_common.ml_tool import MLToolType -from arkindex_common.enums import DataImportMode, DataImportPDFEngine -from ponos.models import State -import gitlab.v4.objects -import re +from arkindex.dataimport.models import DataImport, DataImportFailure, DataFile +from arkindex.dataimport.serializers.git import RevisionSerializer class DataImportLightSerializer(serializers.ModelSerializer): @@ -59,27 +55,6 @@ class PDFPayloadSerializer(ImagesPayloadSerializer): pdf_engine = EnumField(DataImportPDFEngine, default=DataImportPDFEngine.Convert) -class RevisionSerializer(serializers.ModelSerializer): - """ - Serialize a repository revision - """ - - date = serializers.DateTimeField(source='created') - - class Meta: - model = Revision - fields = ( - 'id', - 'date', - 'hash', - 'ref', - 'message', - 'author', - 'commit_url', - 'repo_id', - ) - - class DataImportSerializer(DataImportLightSerializer): """ Serialize a data importing workflow with its payload @@ -181,92 +156,6 @@ class DataImportFromFilesSerializer(serializers.Serializer): return data -class DataFileSerializer(serializers.ModelSerializer): - """ - Serialize a single uploaded file - """ - - images = ImageSerializer(many=True, read_only=True) - status = EnumField(S3FileStatus) - s3_url = serializers.SerializerMethodField() - - class Meta: - model = DataFile - fields = ( - 'id', - 'name', - 'hash', - 'content_type', - 'size', - 'images', - 'status', - 's3_url', - ) - read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', ) - - def validate_status(self, value): - if value == S3FileStatus.Checked: - # Status has been resquested to be checked, perform validation - try: - self.instance.perform_check(raise_exc=True) - except (AssertionError, ValueError) as e: - raise serializers.ValidationError(str(e)) - return value - - def get_s3_url(self, obj): - if 'request' not in self.context: - return - # Only allow the S3 URL for internal users or admins - user = self.context['request'].user - if user.is_authenticated and (user.is_admin or user.is_internal): - return obj.s3_url - - -class DataFileCreateSerializer(serializers.ModelSerializer): - """ - Serialize a Datafile creation with Amazon S3 PUT uri - """ - - status = EnumField(S3FileStatus, read_only=True) - hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32) - s3_put_url = serializers.SerializerMethodField() - - class Meta: - model = DataFile - fields = ( - 'id', - 'name', - 'hash', - 'size', - 'corpus', - 'status', - 's3_url', - 's3_put_url', - ) - read_only_fields = ('id', 'status', 's3_url', 's3_put_url') - - def get_s3_put_url(self, obj): - if obj.status == S3FileStatus.Checked: - return None - return obj.s3_put_url - - def run_validation(self, data): - existing_datafile = DataFile.objects.filter(hash=data['hash']).first() - if existing_datafile: - message = { - 'hash': ['DataFile with this hash already exists'], - 'id': str(existing_datafile.id), - 'status': existing_datafile.status.value, - } - if existing_datafile.status != S3FileStatus.Checked: - message['s3_put_url'] = existing_datafile.s3_put_url - else: - message['s3_url'] = existing_datafile.s3_url - self._errors = message - raise serializers.ValidationError(message) - return super().run_validation(data) - - class DataImportFailureSerializer(serializers.ModelSerializer): """ Serialize a data import error log @@ -285,106 +174,3 @@ class DataImportFailureSerializer(serializers.ModelSerializer): 'context', 'view_url', ) - - -class RepositorySerializer(serializers.ModelSerializer): - """ - Serialize a repository - """ - - class Meta: - model = Repository - fields = ( - 'id', - 'url', - 'enabled', - 'corpus', - ) - extra_kwargs = { - 'id': {'read_only': True}, - 'url': {'read_only': True}, - 'enabled': {'read_only': True}, - } - - -class RepositoryStartImportSerializer(serializers.ModelSerializer): - """ - A serializer used by the RepositoryStartImport endpoint to return a DataImport ID. - This serializer is required to get the OpenAPI schema generation to work. - """ - - import_id = serializers.UUIDField(source='id') - - class Meta: - model = DataImport - fields = ('import_id',) - read_only_fields = ('import_id',) - - -class ExternalRepositorySerializer(serializers.Serializer): - """ - Serialize a Git repository from an external API - """ - - # Useless fields, added to prevent 500 errors when opening - # an API endpoint in the browser - id = serializers.IntegerField(min_value=0) - corpus = serializers.UUIDField() - - def to_representation(self, obj): - if isinstance(obj, gitlab.v4.objects.Project): - return { - "id": obj.id, - "name": obj.name_with_namespace, - "url": obj.web_url, - } - else: - raise NotImplementedError - - def to_internal_value(self, data): - """ - Deserializing only requires a 'id' attribute - """ - if not data.get('id'): - raise serializers.ValidationError({ - 'id': 'This field is required.' - }) - if not data.get('corpus'): - raise serializers.ValidationError({ - 'corpus': 'This field is required.' - }) - - return { - 'id': data['id'], - 'corpus': Corpus.objects.writable(self.context['request'].user) - .get(id=data['corpus']) - } - - -class EventSerializer(serializers.ModelSerializer): - """ - Serialize a diff event for an element on a revision - """ - - type = EnumField(EventType) - revision = RevisionSerializer() - date = serializers.DateTimeField(source='created') - - class Meta: - model = Event - fields = ( - 'id', - 'type', - 'date', - 'revision', - ) - - -class MLToolSerializer(serializers.Serializer): - """ - Serialize a machine learning tool for display - """ - name = serializers.CharField() - slug = serializers.SlugField() - type = EnumField(MLToolType) - version = serializers.CharField() diff --git a/arkindex/dataimport/serializers/ml_tool.py b/arkindex/dataimport/serializers/ml_tool.py new file mode 100644 index 0000000000..1f4fcb7b2a --- /dev/null +++ b/arkindex/dataimport/serializers/ml_tool.py @@ -0,0 +1,13 @@ +from rest_framework import serializers +from arkindex_common.ml_tool import MLToolType +from arkindex.project.serializer_fields import EnumField + + +class MLToolSerializer(serializers.Serializer): + """ + Serialize a machine learning tool for display + """ + name = serializers.CharField() + slug = serializers.SlugField() + type = EnumField(MLToolType) + version = serializers.CharField() diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index f67b3a070f..2c3ef6ebd1 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -9,7 +9,7 @@ from arkindex.images.serializers import ZoneSerializer, ImageSerializer from arkindex.images.models import Image from arkindex.documents.serializers.light import CorpusLightSerializer, ElementLightSerializer, EntityLightSerializer from arkindex.documents.serializers.ml import ClassificationSerializer, TranscriptionSerializer -from arkindex.dataimport.serializers import RevisionSerializer +from arkindex.dataimport.serializers.git import RevisionSerializer from arkindex.dataimport.models import EventType from arkindex.project.serializer_fields import EnumField from arkindex.project.polygon import Polygon -- GitLab