Skip to content
Snippets Groups Projects
Verified Commit 84d32ae6 authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

CreateTranscriptions endpoint

parent 58354ce6
No related branches found
No related tags found
No related merge requests found
......@@ -3,8 +3,8 @@ from django.db import transaction
from django.db.models import Q, Count
from rest_framework import status
from rest_framework.generics import (
GenericAPIView, ListAPIView, ListCreateAPIView,
CreateAPIView, UpdateAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView
GenericAPIView, ListAPIView, ListCreateAPIView, CreateAPIView,
RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView
)
from rest_framework.exceptions import PermissionDenied, ValidationError
from rest_framework.response import Response
......@@ -13,9 +13,10 @@ from arkindex.documents.models import \
from arkindex_common.ml_tool import MLToolType
from arkindex.documents.serializers.ml import (
ClassificationsSerializer, ClassificationCreateSerializer, ClassificationSerializer,
TranscriptionSerializer, TranscriptionCreateSerializer, ElementTranscriptionsBulkSerializer,
DataSourceStatsSerializer, ClassificationsSelectionSerializer, ClassificationMode,
CountMLClassSerializer, AnnotatedElementSerializer
ClassificationsSelectionSerializer, ClassificationMode,
TranscriptionSerializer, TranscriptionCreateSerializer, TranscriptionBulkSerializer,
ElementTranscriptionsBulkSerializer, AnnotatedElementSerializer,
DataSourceStatsSerializer, CountMLClassSerializer
)
from arkindex.images.models import Zone
from arkindex.project.filters import SafeSearchFilter
......@@ -293,21 +294,16 @@ class ElementTranscriptionsBulk(CreateAPIView):
return annotations
class TranscriptionBulk(DeprecatedMixin, CreateAPIView, UpdateAPIView):
class TranscriptionBulk(CreateAPIView):
'''
Create multiple transcriptions at once, all linked to the same page
and to the same recognizer.
Create multiple transcriptions at once on existing elements
'''
# Force DRF to ignore PATCH
http_method_names = ['post', 'put', 'head', 'options', 'trace']
openapi_overrides = {
'operationId': 'CreateTranscriptions',
'tags': ['transcriptions'],
}
deprecation_message = (
'Creating or updating transcriptions with their own zones is now deprecated. '
'Please use CreateElementTranscriptions to push transcriptions in bulk '
'attached to sub-elements.'
)
permission_classes = (IsVerified, )
serializer_class = TranscriptionBulkSerializer
class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
......
......@@ -370,6 +370,55 @@ class AnnotatedElementSerializer(serializers.Serializer):
created = serializers.BooleanField(default=False)
class TranscriptionBulkItemSerializer(serializers.Serializer):
# Element retrieval and checks is done in the BulkSerializer to avoid duplicate queries
element_id = serializers.UUIDField(
help_text='ID of an existing element to add the transcription to'
)
type = EnumField(TranscriptionType, help_text='Type of the transcription')
text = serializers.CharField()
score = serializers.FloatField(min_value=0, max_value=1)
class TranscriptionBulkSerializer(serializers.Serializer):
worker_version = serializers.PrimaryKeyRelatedField(queryset=WorkerVersion.objects.all(), default=None)
transcriptions = TranscriptionBulkItemSerializer(many=True)
def validate(self, data):
element_ids = set(transcription['element_id'] for transcription in data['transcriptions'])
found_ids = set(Element.objects.filter(
id__in=element_ids,
corpus__in=Corpus.objects.writable(self.context['request'].user)
).values_list('id', flat=True))
missing_ids = element_ids - found_ids
if not missing_ids:
return data
# Return an error message with list indexes just like DRF's ListField, for easier debugging
errors = {}
for i, transcription in enumerate(data['transcriptions']):
if transcription['element_id'] in missing_ids:
errors[str(i)] = {
"element_id": [f'Element {transcription["element_id"]} was not found or cannot be written to.'],
}
raise ValidationError({'transcriptions': errors})
def create(self, validated_data):
transcriptions = [
Transcription(
worker_version=validated_data['worker_version'],
element_id=transcription['element_id'],
type=transcription['type'],
text=transcription['text'],
score=transcription['score'],
)
for transcription in validated_data['transcriptions']
]
Transcription.objects.bulk_create(transcriptions)
return {"transcriptions": transcriptions}
class ClassificationBulkSerializer(serializers.Serializer):
"""
Single classification serializer for bulk insertion
......
......@@ -283,14 +283,6 @@ paths:
description: Update the text of a manual transcription
delete:
description: Delete a manual transcription
/api/v1/transcription/bulk/:
post:
operationId: CreateTranscriptions
put:
operationId: UpdateTranscriptions
description: >-
Replace all existing transcriptions from a given recognizer on a page
with other transcriptions.
/api/v1/metadata/{id}/:
get:
operationId: RetrieveMetaData
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment