From d45579a3c0e5722410a33038119c543c3005ab09 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Mon, 27 May 2019 10:31:39 +0000
Subject: [PATCH] Split DataImport serializers into a package

---
 arkindex/dataimport/api.py                    |  12 +-
 arkindex/dataimport/serializers/__init__.py   |   0
 arkindex/dataimport/serializers/files.py      |  92 +++++++
 arkindex/dataimport/serializers/git.py        | 119 ++++++++++
 .../imports.py}                               | 224 +-----------------
 arkindex/dataimport/serializers/ml_tool.py    |  13 +
 arkindex/documents/serializers/elements.py    |   2 +-
 7 files changed, 236 insertions(+), 226 deletions(-)
 create mode 100644 arkindex/dataimport/serializers/__init__.py
 create mode 100644 arkindex/dataimport/serializers/files.py
 create mode 100644 arkindex/dataimport/serializers/git.py
 rename arkindex/dataimport/{serializers.py => serializers/imports.py} (50%)
 create mode 100644 arkindex/dataimport/serializers/ml_tool.py

diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py
index 48abf86eac..4349bab8fb 100644
--- a/arkindex/dataimport/api.py
+++ b/arkindex/dataimport/api.py
@@ -17,12 +17,12 @@ from arkindex.project.permissions import IsVerified, IsAuthenticated, IsAdminUse
 from arkindex.documents.models import Corpus, Right, Element, ElementType
 from arkindex.dataimport.models import \
     DataImport, DataFile, DataImportFailure, Repository, Event, EventType
-from arkindex.dataimport.serializers import (
-    DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer,
-    DataImportFailureSerializer, DataFileSerializer, DataFileCreateSerializer,
-    RepositorySerializer, RepositoryStartImportSerializer,
-    ExternalRepositorySerializer, EventSerializer, MLToolSerializer,
-)
+from arkindex.dataimport.serializers.ml_tool import MLToolSerializer
+from arkindex.dataimport.serializers.files import DataFileSerializer, DataFileCreateSerializer
+from arkindex.dataimport.serializers.git import \
+    RepositorySerializer, RepositoryStartImportSerializer, ExternalRepositorySerializer, EventSerializer
+from arkindex.dataimport.serializers.imports import \
+    DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer, DataImportFailureSerializer
 from arkindex.users.models import OAuthCredentials
 from arkindex_common.ml_tool import MLTool
 from arkindex_common.enums import DataImportMode
diff --git a/arkindex/dataimport/serializers/__init__.py b/arkindex/dataimport/serializers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/arkindex/dataimport/serializers/files.py b/arkindex/dataimport/serializers/files.py
new file mode 100644
index 0000000000..056784035a
--- /dev/null
+++ b/arkindex/dataimport/serializers/files.py
@@ -0,0 +1,92 @@
+from rest_framework import serializers
+from arkindex.project.serializer_fields import EnumField
+from arkindex.project.aws import S3FileStatus
+from arkindex.dataimport.models import DataFile
+from arkindex.images.serializers import ImageSerializer
+import re
+
+
+class DataFileSerializer(serializers.ModelSerializer):
+    """
+    Serialize a single uploaded file
+    """
+
+    images = ImageSerializer(many=True, read_only=True)
+    status = EnumField(S3FileStatus)
+    s3_url = serializers.SerializerMethodField()
+
+    class Meta:
+        model = DataFile
+        fields = (
+            'id',
+            'name',
+            'hash',
+            'content_type',
+            'size',
+            'images',
+            'status',
+            's3_url',
+        )
+        read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', )
+
+    def validate_status(self, value):
+        if value == S3FileStatus.Checked:
+            # Status has been resquested to be checked, perform validation
+            try:
+                self.instance.perform_check(raise_exc=True)
+            except (AssertionError, ValueError) as e:
+                raise serializers.ValidationError(str(e))
+        return value
+
+    def get_s3_url(self, obj):
+        if 'request' not in self.context:
+            return
+        # Only allow the S3 URL for internal users or admins
+        user = self.context['request'].user
+        if user.is_authenticated and (user.is_admin or user.is_internal):
+            return obj.s3_url
+
+
+class DataFileCreateSerializer(serializers.ModelSerializer):
+    """
+    Serialize a Datafile creation with Amazon S3 PUT uri
+    """
+
+    status = EnumField(S3FileStatus, read_only=True)
+    hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32)
+    s3_put_url = serializers.SerializerMethodField()
+
+    class Meta:
+        model = DataFile
+        fields = (
+            'id',
+            'name',
+            'hash',
+            'size',
+            'corpus',
+            'status',
+            's3_url',
+            's3_put_url',
+        )
+        read_only_fields = ('id', 'status', 's3_url', 's3_put_url')
+
+    def get_s3_put_url(self, obj):
+        if obj.status == S3FileStatus.Checked:
+            return None
+        return obj.s3_put_url
+
+    def run_validation(self, data):
+        existing_datafile = DataFile.objects.filter(hash=data['hash']).first()
+        if existing_datafile:
+            message = {
+                'hash': ['DataFile with this hash already exists'],
+                'id': str(existing_datafile.id),
+                'status': existing_datafile.status.value,
+            }
+            if existing_datafile.status != S3FileStatus.Checked:
+                message['s3_put_url'] = existing_datafile.s3_put_url
+            else:
+                message['s3_url'] = existing_datafile.s3_url
+            self._errors = message
+            raise serializers.ValidationError(message)
+        return super().run_validation(data)
diff --git a/arkindex/dataimport/serializers/git.py b/arkindex/dataimport/serializers/git.py
new file mode 100644
index 0000000000..ad1e379bde
--- /dev/null
+++ b/arkindex/dataimport/serializers/git.py
@@ -0,0 +1,119 @@
+from rest_framework import serializers
+from arkindex.project.serializer_fields import EnumField
+from arkindex.documents.models import Corpus
+from arkindex.dataimport.models import DataImport, Repository, Revision, Event, EventType
+import gitlab.v4.objects
+
+
+class RevisionSerializer(serializers.ModelSerializer):
+    """
+    Serialize a repository revision
+    """
+
+    date = serializers.DateTimeField(source='created')
+
+    class Meta:
+        model = Revision
+        fields = (
+            'id',
+            'date',
+            'hash',
+            'ref',
+            'message',
+            'author',
+            'commit_url',
+            'repo_id',
+        )
+
+
+class RepositorySerializer(serializers.ModelSerializer):
+    """
+    Serialize a repository
+    """
+
+    class Meta:
+        model = Repository
+        fields = (
+            'id',
+            'url',
+            'enabled',
+            'corpus',
+        )
+        extra_kwargs = {
+            'id': {'read_only': True},
+            'url': {'read_only': True},
+            'enabled': {'read_only': True},
+        }
+
+
+class RepositoryStartImportSerializer(serializers.ModelSerializer):
+    """
+    A serializer used by the RepositoryStartImport endpoint to return a DataImport ID.
+    This serializer is required to get the OpenAPI schema generation to work.
+    """
+
+    import_id = serializers.UUIDField(source='id')
+
+    class Meta:
+        model = DataImport
+        fields = ('import_id',)
+        read_only_fields = ('import_id',)
+
+
+class ExternalRepositorySerializer(serializers.Serializer):
+    """
+    Serialize a Git repository from an external API
+    """
+
+    # Useless fields, added to prevent 500 errors when opening
+    # an API endpoint in the browser
+    id = serializers.IntegerField(min_value=0)
+    corpus = serializers.UUIDField()
+
+    def to_representation(self, obj):
+        if isinstance(obj, gitlab.v4.objects.Project):
+            return {
+                "id": obj.id,
+                "name": obj.name_with_namespace,
+                "url": obj.web_url,
+            }
+        else:
+            raise NotImplementedError
+
+    def to_internal_value(self, data):
+        """
+        Deserializing only requires a 'id' attribute
+        """
+        if not data.get('id'):
+            raise serializers.ValidationError({
+                'id': 'This field is required.'
+            })
+        if not data.get('corpus'):
+            raise serializers.ValidationError({
+                'corpus': 'This field is required.'
+            })
+
+        return {
+            'id': data['id'],
+            'corpus': Corpus.objects.writable(self.context['request'].user)
+                                    .get(id=data['corpus'])
+        }
+
+
+class EventSerializer(serializers.ModelSerializer):
+    """
+    Serialize a diff event for an element on a revision
+    """
+
+    type = EnumField(EventType)
+    revision = RevisionSerializer()
+    date = serializers.DateTimeField(source='created')
+
+    class Meta:
+        model = Event
+        fields = (
+            'id',
+            'type',
+            'date',
+            'revision',
+        )
diff --git a/arkindex/dataimport/serializers.py b/arkindex/dataimport/serializers/imports.py
similarity index 50%
rename from arkindex/dataimport/serializers.py
rename to arkindex/dataimport/serializers/imports.py
index fc1afdfc59..20580598d5 100644
--- a/arkindex/dataimport/serializers.py
+++ b/arkindex/dataimport/serializers/imports.py
@@ -1,17 +1,13 @@
 from rest_framework import serializers
 from rest_framework.utils import model_meta
+from ponos.models import State
+from arkindex_common.enums import DataImportMode, DataImportPDFEngine
+from arkindex_common.ml_tool import MLToolType
 from arkindex.project.serializer_fields import EnumField, MLToolField
-from arkindex.project.aws import S3FileStatus
-from arkindex.dataimport.models import \
-    DataImport, DataImportFailure, DataFile, Repository, Revision, Event, EventType
 from arkindex.documents.models import Corpus, Element, ElementType
 from arkindex.documents.serializers.light import ElementLightSerializer
-from arkindex.images.serializers import ImageSerializer
-from arkindex_common.ml_tool import MLToolType
-from arkindex_common.enums import DataImportMode, DataImportPDFEngine
-from ponos.models import State
-import gitlab.v4.objects
-import re
+from arkindex.dataimport.models import DataImport, DataImportFailure, DataFile
+from arkindex.dataimport.serializers.git import RevisionSerializer
 
 
 class DataImportLightSerializer(serializers.ModelSerializer):
@@ -59,27 +55,6 @@ class PDFPayloadSerializer(ImagesPayloadSerializer):
     pdf_engine = EnumField(DataImportPDFEngine, default=DataImportPDFEngine.Convert)
 
 
-class RevisionSerializer(serializers.ModelSerializer):
-    """
-    Serialize a repository revision
-    """
-
-    date = serializers.DateTimeField(source='created')
-
-    class Meta:
-        model = Revision
-        fields = (
-            'id',
-            'date',
-            'hash',
-            'ref',
-            'message',
-            'author',
-            'commit_url',
-            'repo_id',
-        )
-
-
 class DataImportSerializer(DataImportLightSerializer):
     """
     Serialize a data importing workflow with its payload
@@ -181,92 +156,6 @@ class DataImportFromFilesSerializer(serializers.Serializer):
         return data
 
 
-class DataFileSerializer(serializers.ModelSerializer):
-    """
-    Serialize a single uploaded file
-    """
-
-    images = ImageSerializer(many=True, read_only=True)
-    status = EnumField(S3FileStatus)
-    s3_url = serializers.SerializerMethodField()
-
-    class Meta:
-        model = DataFile
-        fields = (
-            'id',
-            'name',
-            'hash',
-            'content_type',
-            'size',
-            'images',
-            'status',
-            's3_url',
-        )
-        read_only_fields = ('id', 'name', 'hash', 'size', 'content_type', 'images', 's3_url', )
-
-    def validate_status(self, value):
-        if value == S3FileStatus.Checked:
-            # Status has been resquested to be checked, perform validation
-            try:
-                self.instance.perform_check(raise_exc=True)
-            except (AssertionError, ValueError) as e:
-                raise serializers.ValidationError(str(e))
-        return value
-
-    def get_s3_url(self, obj):
-        if 'request' not in self.context:
-            return
-        # Only allow the S3 URL for internal users or admins
-        user = self.context['request'].user
-        if user.is_authenticated and (user.is_admin or user.is_internal):
-            return obj.s3_url
-
-
-class DataFileCreateSerializer(serializers.ModelSerializer):
-    """
-    Serialize a Datafile creation with Amazon S3 PUT uri
-    """
-
-    status = EnumField(S3FileStatus, read_only=True)
-    hash = serializers.RegexField(re.compile(r'[0-9A-Fa-f]{32}'), min_length=32, max_length=32)
-    s3_put_url = serializers.SerializerMethodField()
-
-    class Meta:
-        model = DataFile
-        fields = (
-            'id',
-            'name',
-            'hash',
-            'size',
-            'corpus',
-            'status',
-            's3_url',
-            's3_put_url',
-        )
-        read_only_fields = ('id', 'status', 's3_url', 's3_put_url')
-
-    def get_s3_put_url(self, obj):
-        if obj.status == S3FileStatus.Checked:
-            return None
-        return obj.s3_put_url
-
-    def run_validation(self, data):
-        existing_datafile = DataFile.objects.filter(hash=data['hash']).first()
-        if existing_datafile:
-            message = {
-                'hash': ['DataFile with this hash already exists'],
-                'id': str(existing_datafile.id),
-                'status': existing_datafile.status.value,
-            }
-            if existing_datafile.status != S3FileStatus.Checked:
-                message['s3_put_url'] = existing_datafile.s3_put_url
-            else:
-                message['s3_url'] = existing_datafile.s3_url
-            self._errors = message
-            raise serializers.ValidationError(message)
-        return super().run_validation(data)
-
-
 class DataImportFailureSerializer(serializers.ModelSerializer):
     """
     Serialize a data import error log
@@ -285,106 +174,3 @@ class DataImportFailureSerializer(serializers.ModelSerializer):
             'context',
             'view_url',
         )
-
-
-class RepositorySerializer(serializers.ModelSerializer):
-    """
-    Serialize a repository
-    """
-
-    class Meta:
-        model = Repository
-        fields = (
-            'id',
-            'url',
-            'enabled',
-            'corpus',
-        )
-        extra_kwargs = {
-            'id': {'read_only': True},
-            'url': {'read_only': True},
-            'enabled': {'read_only': True},
-        }
-
-
-class RepositoryStartImportSerializer(serializers.ModelSerializer):
-    """
-    A serializer used by the RepositoryStartImport endpoint to return a DataImport ID.
-    This serializer is required to get the OpenAPI schema generation to work.
-    """
-
-    import_id = serializers.UUIDField(source='id')
-
-    class Meta:
-        model = DataImport
-        fields = ('import_id',)
-        read_only_fields = ('import_id',)
-
-
-class ExternalRepositorySerializer(serializers.Serializer):
-    """
-    Serialize a Git repository from an external API
-    """
-
-    # Useless fields, added to prevent 500 errors when opening
-    # an API endpoint in the browser
-    id = serializers.IntegerField(min_value=0)
-    corpus = serializers.UUIDField()
-
-    def to_representation(self, obj):
-        if isinstance(obj, gitlab.v4.objects.Project):
-            return {
-                "id": obj.id,
-                "name": obj.name_with_namespace,
-                "url": obj.web_url,
-            }
-        else:
-            raise NotImplementedError
-
-    def to_internal_value(self, data):
-        """
-        Deserializing only requires a 'id' attribute
-        """
-        if not data.get('id'):
-            raise serializers.ValidationError({
-                'id': 'This field is required.'
-            })
-        if not data.get('corpus'):
-            raise serializers.ValidationError({
-                'corpus': 'This field is required.'
-            })
-
-        return {
-            'id': data['id'],
-            'corpus': Corpus.objects.writable(self.context['request'].user)
-                                    .get(id=data['corpus'])
-        }
-
-
-class EventSerializer(serializers.ModelSerializer):
-    """
-    Serialize a diff event for an element on a revision
-    """
-
-    type = EnumField(EventType)
-    revision = RevisionSerializer()
-    date = serializers.DateTimeField(source='created')
-
-    class Meta:
-        model = Event
-        fields = (
-            'id',
-            'type',
-            'date',
-            'revision',
-        )
-
-
-class MLToolSerializer(serializers.Serializer):
-    """
-    Serialize a machine learning tool for display
-    """
-    name = serializers.CharField()
-    slug = serializers.SlugField()
-    type = EnumField(MLToolType)
-    version = serializers.CharField()
diff --git a/arkindex/dataimport/serializers/ml_tool.py b/arkindex/dataimport/serializers/ml_tool.py
new file mode 100644
index 0000000000..1f4fcb7b2a
--- /dev/null
+++ b/arkindex/dataimport/serializers/ml_tool.py
@@ -0,0 +1,13 @@
+from rest_framework import serializers
+from arkindex_common.ml_tool import MLToolType
+from arkindex.project.serializer_fields import EnumField
+
+
+class MLToolSerializer(serializers.Serializer):
+    """
+    Serialize a machine learning tool for display
+    """
+    name = serializers.CharField()
+    slug = serializers.SlugField()
+    type = EnumField(MLToolType)
+    version = serializers.CharField()
diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py
index f67b3a070f..2c3ef6ebd1 100644
--- a/arkindex/documents/serializers/elements.py
+++ b/arkindex/documents/serializers/elements.py
@@ -9,7 +9,7 @@ from arkindex.images.serializers import ZoneSerializer, ImageSerializer
 from arkindex.images.models import Image
 from arkindex.documents.serializers.light import CorpusLightSerializer, ElementLightSerializer, EntityLightSerializer
 from arkindex.documents.serializers.ml import ClassificationSerializer, TranscriptionSerializer
-from arkindex.dataimport.serializers import RevisionSerializer
+from arkindex.dataimport.serializers.git import RevisionSerializer
 from arkindex.dataimport.models import EventType
 from arkindex.project.serializer_fields import EnumField
 from arkindex.project.polygon import Polygon
-- 
GitLab