Skip to content
Snippets Groups Projects
Commit d45579a3 authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Split DataImport serializers into a package

parent bd16a28e
No related branches found
No related tags found
No related merge requests found
......@@ -17,12 +17,12 @@ from arkindex.project.permissions import IsVerified, IsAuthenticated, IsAdminUse
from arkindex.documents.models import Corpus, Right, Element, ElementType
from arkindex.dataimport.models import \
DataImport, DataFile, DataImportFailure, Repository, Event, EventType
from arkindex.dataimport.serializers import (
DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer,
DataImportFailureSerializer, DataFileSerializer, DataFileCreateSerializer,
RepositorySerializer, RepositoryStartImportSerializer,
ExternalRepositorySerializer, EventSerializer, MLToolSerializer,
)
from arkindex.dataimport.serializers.ml_tool import MLToolSerializer
from arkindex.dataimport.serializers.files import DataFileSerializer, DataFileCreateSerializer
from arkindex.dataimport.serializers.git import \
RepositorySerializer, RepositoryStartImportSerializer, ExternalRepositorySerializer, EventSerializer
from arkindex.dataimport.serializers.imports import \
DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer, DataImportFailureSerializer
from arkindex.users.models import OAuthCredentials
from arkindex_common.ml_tool import MLTool
from arkindex_common.enums import DataImportMode
......
from rest_framework import serializers
from arkindex.project.serializer_fields import EnumField
from arkindex.project.aws import S3FileStatus
from arkindex.dataimport.models import DataFile
from arkindex.images.serializers import ImageSerializer
import re
class DataFileSerializer(serializers.ModelSerializer):
"""
Serialize a single uploaded file
"""
images = ImageSerializer(many=True, read_only=True)
status = EnumField(S3FileStatus)
s3_url = serializers.SerializerMethodField()
class Meta:
model = DataFile
fields = (
'id',
'name',
'hash',
'content_type',
'size',
'images',
'status',
's3_url',
)
read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', )
def validate_status(self, value):
if value == S3FileStatus.Checked:
# Status has been resquested to be checked, perform validation
try:
self.instance.perform_check(raise_exc=True)
except (AssertionError, ValueError) as e:
raise serializers.ValidationError(str(e))
return value
def get_s3_url(self, obj):
if 'request' not in self.context:
return
# Only allow the S3 URL for internal users or admins
user = self.context['request'].user
if user.is_authenticated and (user.is_admin or user.is_internal):
return obj.s3_url
class DataFileCreateSerializer(serializers.ModelSerializer):
"""
Serialize a Datafile creation with Amazon S3 PUT uri
"""
status = EnumField(S3FileStatus, read_only=True)
hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32)
s3_put_url = serializers.SerializerMethodField()
class Meta:
model = DataFile
fields = (
'id',
'name',
'hash',
'size',
'corpus',
'status',
's3_url',
's3_put_url',
)
read_only_fields = ('id', 'status', 's3_url', 's3_put_url')
def get_s3_put_url(self, obj):
if obj.status == S3FileStatus.Checked:
return None
return obj.s3_put_url
def run_validation(self, data):
existing_datafile = DataFile.objects.filter(hash=data['hash']).first()
if existing_datafile:
message = {
'hash': ['DataFile with this hash already exists'],
'id': str(existing_datafile.id),
'status': existing_datafile.status.value,
}
if existing_datafile.status != S3FileStatus.Checked:
message['s3_put_url'] = existing_datafile.s3_put_url
else:
message['s3_url'] = existing_datafile.s3_url
self._errors = message
raise serializers.ValidationError(message)
return super().run_validation(data)
from rest_framework import serializers
from arkindex.project.serializer_fields import EnumField
from arkindex.documents.models import Corpus
from arkindex.dataimport.models import DataImport, Repository, Revision, Event, EventType
import gitlab.v4.objects
class RevisionSerializer(serializers.ModelSerializer):
"""
Serialize a repository revision
"""
date = serializers.DateTimeField(source='created')
class Meta:
model = Revision
fields = (
'id',
'date',
'hash',
'ref',
'message',
'author',
'commit_url',
'repo_id',
)
class RepositorySerializer(serializers.ModelSerializer):
"""
Serialize a repository
"""
class Meta:
model = Repository
fields = (
'id',
'url',
'enabled',
'corpus',
)
extra_kwargs = {
'id': {'read_only': True},
'url': {'read_only': True},
'enabled': {'read_only': True},
}
class RepositoryStartImportSerializer(serializers.ModelSerializer):
"""
A serializer used by the RepositoryStartImport endpoint to return a DataImport ID.
This serializer is required to get the OpenAPI schema generation to work.
"""
import_id = serializers.UUIDField(source='id')
class Meta:
model = DataImport
fields = ('import_id',)
read_only_fields = ('import_id',)
class ExternalRepositorySerializer(serializers.Serializer):
"""
Serialize a Git repository from an external API
"""
# Useless fields, added to prevent 500 errors when opening
# an API endpoint in the browser
id = serializers.IntegerField(min_value=0)
corpus = serializers.UUIDField()
def to_representation(self, obj):
if isinstance(obj, gitlab.v4.objects.Project):
return {
"id": obj.id,
"name": obj.name_with_namespace,
"url": obj.web_url,
}
else:
raise NotImplementedError
def to_internal_value(self, data):
"""
Deserializing only requires a 'id' attribute
"""
if not data.get('id'):
raise serializers.ValidationError({
'id': 'This field is required.'
})
if not data.get('corpus'):
raise serializers.ValidationError({
'corpus': 'This field is required.'
})
return {
'id': data['id'],
'corpus': Corpus.objects.writable(self.context['request'].user)
.get(id=data['corpus'])
}
class EventSerializer(serializers.ModelSerializer):
"""
Serialize a diff event for an element on a revision
"""
type = EnumField(EventType)
revision = RevisionSerializer()
date = serializers.DateTimeField(source='created')
class Meta:
model = Event
fields = (
'id',
'type',
'date',
'revision',
)
from rest_framework import serializers
from rest_framework.utils import model_meta
from ponos.models import State
from arkindex_common.enums import DataImportMode, DataImportPDFEngine
from arkindex_common.ml_tool import MLToolType
from arkindex.project.serializer_fields import EnumField, MLToolField
from arkindex.project.aws import S3FileStatus
from arkindex.dataimport.models import \
DataImport, DataImportFailure, DataFile, Repository, Revision, Event, EventType
from arkindex.documents.models import Corpus, Element, ElementType
from arkindex.documents.serializers.light import ElementLightSerializer
from arkindex.images.serializers import ImageSerializer
from arkindex_common.ml_tool import MLToolType
from arkindex_common.enums import DataImportMode, DataImportPDFEngine
from ponos.models import State
import gitlab.v4.objects
import re
from arkindex.dataimport.models import DataImport, DataImportFailure, DataFile
from arkindex.dataimport.serializers.git import RevisionSerializer
class DataImportLightSerializer(serializers.ModelSerializer):
......@@ -59,27 +55,6 @@ class PDFPayloadSerializer(ImagesPayloadSerializer):
pdf_engine = EnumField(DataImportPDFEngine, default=DataImportPDFEngine.Convert)
class RevisionSerializer(serializers.ModelSerializer):
"""
Serialize a repository revision
"""
date = serializers.DateTimeField(source='created')
class Meta:
model = Revision
fields = (
'id',
'date',
'hash',
'ref',
'message',
'author',
'commit_url',
'repo_id',
)
class DataImportSerializer(DataImportLightSerializer):
"""
Serialize a data importing workflow with its payload
......@@ -181,92 +156,6 @@ class DataImportFromFilesSerializer(serializers.Serializer):
return data
class DataFileSerializer(serializers.ModelSerializer):
"""
Serialize a single uploaded file
"""
images = ImageSerializer(many=True, read_only=True)
status = EnumField(S3FileStatus)
s3_url = serializers.SerializerMethodField()
class Meta:
model = DataFile
fields = (
'id',
'name',
'hash',
'content_type',
'size',
'images',
'status',
's3_url',
)
read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', )
def validate_status(self, value):
if value == S3FileStatus.Checked:
# Status has been resquested to be checked, perform validation
try:
self.instance.perform_check(raise_exc=True)
except (AssertionError, ValueError) as e:
raise serializers.ValidationError(str(e))
return value
def get_s3_url(self, obj):
if 'request' not in self.context:
return
# Only allow the S3 URL for internal users or admins
user = self.context['request'].user
if user.is_authenticated and (user.is_admin or user.is_internal):
return obj.s3_url
class DataFileCreateSerializer(serializers.ModelSerializer):
"""
Serialize a Datafile creation with Amazon S3 PUT uri
"""
status = EnumField(S3FileStatus, read_only=True)
hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32)
s3_put_url = serializers.SerializerMethodField()
class Meta:
model = DataFile
fields = (
'id',
'name',
'hash',
'size',
'corpus',
'status',
's3_url',
's3_put_url',
)
read_only_fields = ('id', 'status', 's3_url', 's3_put_url')
def get_s3_put_url(self, obj):
if obj.status == S3FileStatus.Checked:
return None
return obj.s3_put_url
def run_validation(self, data):
existing_datafile = DataFile.objects.filter(hash=data['hash']).first()
if existing_datafile:
message = {
'hash': ['DataFile with this hash already exists'],
'id': str(existing_datafile.id),
'status': existing_datafile.status.value,
}
if existing_datafile.status != S3FileStatus.Checked:
message['s3_put_url'] = existing_datafile.s3_put_url
else:
message['s3_url'] = existing_datafile.s3_url
self._errors = message
raise serializers.ValidationError(message)
return super().run_validation(data)
class DataImportFailureSerializer(serializers.ModelSerializer):
"""
Serialize a data import error log
......@@ -285,106 +174,3 @@ class DataImportFailureSerializer(serializers.ModelSerializer):
'context',
'view_url',
)
class RepositorySerializer(serializers.ModelSerializer):
"""
Serialize a repository
"""
class Meta:
model = Repository
fields = (
'id',
'url',
'enabled',
'corpus',
)
extra_kwargs = {
'id': {'read_only': True},
'url': {'read_only': True},
'enabled': {'read_only': True},
}
class RepositoryStartImportSerializer(serializers.ModelSerializer):
"""
A serializer used by the RepositoryStartImport endpoint to return a DataImport ID.
This serializer is required to get the OpenAPI schema generation to work.
"""
import_id = serializers.UUIDField(source='id')
class Meta:
model = DataImport
fields = ('import_id',)
read_only_fields = ('import_id',)
class ExternalRepositorySerializer(serializers.Serializer):
"""
Serialize a Git repository from an external API
"""
# Useless fields, added to prevent 500 errors when opening
# an API endpoint in the browser
id = serializers.IntegerField(min_value=0)
corpus = serializers.UUIDField()
def to_representation(self, obj):
if isinstance(obj, gitlab.v4.objects.Project):
return {
"id": obj.id,
"name": obj.name_with_namespace,
"url": obj.web_url,
}
else:
raise NotImplementedError
def to_internal_value(self, data):
"""
Deserializing only requires a 'id' attribute
"""
if not data.get('id'):
raise serializers.ValidationError({
'id': 'This field is required.'
})
if not data.get('corpus'):
raise serializers.ValidationError({
'corpus': 'This field is required.'
})
return {
'id': data['id'],
'corpus': Corpus.objects.writable(self.context['request'].user)
.get(id=data['corpus'])
}
class EventSerializer(serializers.ModelSerializer):
"""
Serialize a diff event for an element on a revision
"""
type = EnumField(EventType)
revision = RevisionSerializer()
date = serializers.DateTimeField(source='created')
class Meta:
model = Event
fields = (
'id',
'type',
'date',
'revision',
)
class MLToolSerializer(serializers.Serializer):
"""
Serialize a machine learning tool for display
"""
name = serializers.CharField()
slug = serializers.SlugField()
type = EnumField(MLToolType)
version = serializers.CharField()
from rest_framework import serializers
from arkindex_common.ml_tool import MLToolType
from arkindex.project.serializer_fields import EnumField
class MLToolSerializer(serializers.Serializer):
"""
Serialize a machine learning tool for display
"""
name = serializers.CharField()
slug = serializers.SlugField()
type = EnumField(MLToolType)
version = serializers.CharField()
......@@ -9,7 +9,7 @@ from arkindex.images.serializers import ZoneSerializer, ImageSerializer
from arkindex.images.models import Image
from arkindex.documents.serializers.light import CorpusLightSerializer, ElementLightSerializer, EntityLightSerializer
from arkindex.documents.serializers.ml import ClassificationSerializer, TranscriptionSerializer
from arkindex.dataimport.serializers import RevisionSerializer
from arkindex.dataimport.serializers.git import RevisionSerializer
from arkindex.dataimport.models import EventType
from arkindex.project.serializer_fields import EnumField
from arkindex.project.polygon import Polygon
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment