diff --git a/arkindex/documents/api.py b/arkindex/documents/api.py index 786dfbb94bdbcdea5ed09e79cd4a05bbaf871aa1..b723cd8346506ac68d81bdb862475b33b9428f53 100644 --- a/arkindex/documents/api.py +++ b/arkindex/documents/api.py @@ -7,14 +7,16 @@ from rest_framework import status from django.conf import settings from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page -from arkindex.documents.serializers import \ - ElementSerializer, CorpusSerializer, PageLightSerializer, \ - TranscriptionSearchResultSerializer, ActSearchResultSerializer, \ +from arkindex.documents.serializers.elements import \ + ElementSerializer, CorpusSerializer, PageLightSerializer, ActSerializer, SurfaceSerializer +from arkindex.documents.serializers.transcriptions import \ + TranscriptionsSerializer, TranscriptionCreateSerializer +from arkindex.documents.serializers.search import \ + TranscriptionSearchResultSerializer, ActSearchResultSerializer +from arkindex.documents.serializers.iiif import \ VolumeManifestSerializer, ActManifestSerializer, \ PageAnnotationListSerializer, PageActAnnotationListSerializer, \ - SurfaceAnnotationListSerializer, TranscriptionSearchAnnotationListSerializer, \ - ActSerializer, SurfaceSerializer, \ - TranscriptionCreateSerializer, TranscriptionsSerializer + SurfaceAnnotationListSerializer, TranscriptionSearchAnnotationListSerializer from arkindex.documents.models import \ Element, ElementType, Page, Act, Transcription, Corpus, TranscriptionType from arkindex.documents.search import \ diff --git a/arkindex/documents/serializers.py b/arkindex/documents/serializers.py deleted file mode 100644 index e90d42207ba84b3e1f909a9f8a594a217803f11c..0000000000000000000000000000000000000000 --- a/arkindex/documents/serializers.py +++ /dev/null @@ -1,704 +0,0 @@ -from abc import ABC, abstractmethod -from django.conf import settings -from rest_framework import serializers -from arkindex.documents.models import Element, ElementType, Transcription, Page, \ - PageType, PageDirection, Act, Corpus, MetaData, MetaType, TranscriptionType -from arkindex.images.models import Image, Zone -from arkindex.images.serializers import ZoneSerializer, ImageSerializer -from arkindex.dataimport.serializers import RevisionSerializer -from arkindex.project.serializer_fields import EnumField, ViewerURLField -from arkindex.project.tools import sslify_url -from django.urls import reverse -import urllib.parse - - -def build_absolute_url(element, request, name, id_argument='pk', **kwargs): - """Build an absolute URL for a specified view using the element ID.""" - kwargs[id_argument] = str(element.id) - return request.build_absolute_uri(reverse(name, kwargs=kwargs)) - - -class MetaDataSerializer(serializers.ModelSerializer): - """ - Serialises some Metadata for any Element - """ - type = EnumField(MetaType) - revision = RevisionSerializer() - - class Meta: - model = MetaData - fields = ( - 'id', - 'type', - 'name', - 'value', - 'revision', - ) - - -class TranscriptionSerializer(serializers.ModelSerializer): - """ - Serialises a Transcription - issued from a search - """ - - type = EnumField(ElementType) - - class Meta: - model = Transcription - fields = ( - 'id', - 'type', - 'text', - 'score', - ) - - -class ElementLightSerializer(serializers.ModelSerializer): - """ - Serialises a Element - """ - type = EnumField(ElementType) - - class Meta: - model = Element - fields = ( - 'id', - 'type', - 'name', - ) - - -class PageLightSerializer(serializers.ModelSerializer): - """ - Serialises a Page - """ - page_type = EnumField(PageType) - direction = EnumField(PageDirection) - image = ImageSerializer(source='zone.image') - - class Meta: - model = Page - fields = ( - 'id', - 'page_type', - 'nb', - 'direction', - 'display_name', - 'image', - ) - - -class ElementSerializer(serializers.ModelSerializer): - """ - Fully Serialises a document - """ - type = EnumField(ElementType) - - # TODO: detect correct manifest per element type - viewer_url = ViewerURLField('api:volume-manifest') - - class Meta: - model = Element - fields = ( - 'id', - 'type', - 'name', - 'viewer_url', - ) - - -class ActSerializer(serializers.ModelSerializer): - """ - Serialize an act with its parents and children - and metadatas - """ - - parents = serializers.ListField( - child=serializers.ListField( - child=ElementLightSerializer() - ), - source='parent_elements', - read_only=True, - ) - children = ElementLightSerializer(source='child_elements', read_only=True, many=True) - metadatas = MetaDataSerializer(read_only=True, many=True) - - class Meta: - model = Act - fields = ( - 'id', - 'name', - 'number', - 'parents', - 'children', - 'metadatas', - ) - read_only_fields = ('id', ) - - -class SurfaceSerializer(serializers.ModelSerializer): - """ - Serialize a surface with its page, image and zone - """ - - zone = ZoneSerializer() - page = PageLightSerializer() - - class Meta: - model = Element - fields = ( - 'id', - 'name', - 'page', - 'zone', - ) - - -class CorpusSerializer(serializers.ModelSerializer): - """ - Serialize a corpus - """ - - class Meta: - model = Corpus - fields = ('id', 'name') - - -class TranscriptionCreateSerializer(serializers.Serializer): - """ - Allows for insertion of new transcriptions and zones - """ - element = serializers.PrimaryKeyRelatedField(queryset=Element.objects.all()) - polygon = serializers.ListField( - child=serializers.ListField( - child=serializers.IntegerField(), - min_length=2, - max_length=2 - ), - min_length=3 - ) - text = serializers.CharField() - score = serializers.FloatField(min_value=0, max_value=1) - type = EnumField(TranscriptionType) - - -class TranscriptionBulkSerializer(serializers.Serializer): - """ - Allows for insertion of new transcriptions and zones - in Bulk (used by serializer below) - Note: no element ! - """ - polygon = serializers.ListField( - child=serializers.ListField( - child=serializers.IntegerField(), - min_length=2, - max_length=2 - ), - min_length=3 - ) - text = serializers.CharField() - score = serializers.FloatField(min_value=0, max_value=1) - type = EnumField(TranscriptionType) - - -class TranscriptionsSerializer(serializers.Serializer): - """ - Allows for insertion of new transcriptions and zones - in Bulk (uses serializer above) on a common parent - """ - transcriptions = TranscriptionBulkSerializer(many=True) - parent = serializers.PrimaryKeyRelatedField(queryset=Element.objects.all()) - image = serializers.PrimaryKeyRelatedField(queryset=Image.objects.all()) - - -class TranscriptionSearchResultSerializer(serializers.ModelSerializer): - """ - Link between objects & their search indexation - """ - type = EnumField(ElementType) - zone = ZoneSerializer() - parents = serializers.ListField( - child=serializers.ListField( - child=ElementLightSerializer() - ), - source='parent_paths', - ) - - class Meta: - model = Transcription - fields = ( - 'id', - 'type', - 'text', - 'score', - 'zone', - 'parents', - ) - - -class ActSearchResultSerializer(serializers.ModelSerializer): - """ - Serialize an act - """ - transcriptions = TranscriptionSerializer(many=True, source='transcriptions_results') - surfaces = ZoneSerializer(many=True) - parents = serializers.ListField( - child=serializers.ListField( - child=ElementLightSerializer() - ), - read_only=True, - ) - viewer_url = ViewerURLField('api:act-manifest') - - class Meta: - model = Act - fields = ( - 'id', - 'name', - 'number', - 'transcriptions', - 'surfaces', - 'parents', - 'viewer_url', - ) - - -class ImageResourceManifestSerializer(serializers.BaseSerializer): - """ - Serialize an image into a IIIF resource. - """ - - def to_representation(self, image): - assert isinstance(image, Image) - return { - "@id": sslify_url(image.get_thumbnail_url(max_width=None, max_height=None)), - "@type": "dctypes:Image", - "height": image.height, - "width": image.width, - "service": { - "@context": settings.IIIF_IMAGE_CONTEXT, - "@id": sslify_url(image.url), - "profile": settings.IIIF_IMAGE_SERVICE_PROFILE - } - } - - -class ImageThumbnailManifestSerializer(serializers.BaseSerializer): - """ - Serialize an image into a IIIF manifest thumbnail - """ - - def to_representation(self, image): - assert isinstance(image, Image) - return { - "@id": sslify_url(image.get_thumbnail_url()), - "service": { - "@context": settings.IIIF_IMAGE_CONTEXT, - "@id": sslify_url(image.url), - "profile": settings.IIIF_IMAGE_SERVICE_PROFILE - } - } - - -class ElementCanvasManifestSerializer(serializers.BaseSerializer): - """ - Serialize an element's zone into a IIIF canvas - """ - - def get_other_content(self, element): - return [] - - def to_representation(self, element): - assert isinstance(element, Element) - zone = element.zone - assert isinstance(zone, Zone) - assert 'request' in self.context, "A request is required to generate absolute URLs" - return { - "@id": build_absolute_url(element, self.context['request'], 'api:canvas-manifest'), - "@type": "sc:Canvas", - "label": element.name, - "height": zone.polygon.height, - "width": zone.polygon.width, - "images": [ - { - "@type": "oa:Annotation", - "resource": ImageResourceManifestSerializer(zone.image, context=self.context).data, - "on": build_absolute_url(element, self.context['request'], 'api:canvas-manifest'), - "motivation": "sc:painting" - } - ], - "otherContent": self.get_other_content(element), - } - - -class PageCanvasManifestSerializer(ElementCanvasManifestSerializer): - """ - Serialize a page's zone into a IIIF canvas - """ - - def to_representation(self, page): - assert isinstance(page, Page) - serialized_element = super().to_representation(page) - serialized_element['label'] = page.display_name - return serialized_element - - def get_other_content(self, page): - annotation_list_endpoint, annotation_list_name = \ - ("api:page-transcription-manifest", "Transcriptions") if settings.IIIF_TRANSCRIPTION_LIST \ - else ("api:page-act-manifest", "Actes") - return [ - { - "@id": build_absolute_url(page, self.context['request'], annotation_list_endpoint), - "@type": "sc:AnnotationList", - "label": annotation_list_name - } - ] - - -class ActPageCanvasManifestSerializer(PageCanvasManifestSerializer): - """ - Serialize a page into a IIIF canvas with annotation lists for surfaces - """ - - def get_other_content(self, page): - assert hasattr(page, 'act') - query = self.context['request'].query_params.get('q') - suffix = '?' + urllib.parse.urlencode({'q': query}) if query else '' - return [ - { - "@id": build_absolute_url(surface, self.context['request'], 'api:surface-manifest') + suffix, - "@type": "sc:AnnotationList", - "label": surface.name - } - for surface in Element.objects.get_descending( - page.act.id, - type=ElementType.Surface, - zone__image_id=page.zone.image_id - ) - ] - - -class ManifestSerializer(serializers.BaseSerializer): - """ - Serialize an element into a IIIF manifest - """ - - canvas_serializer = ElementCanvasManifestSerializer - id_url_name = 'api:volume-manifest' - - def to_representation(self, element): - assert isinstance(element, Element) - assert 'request' in self.context, "A request is required to generate absolute URLs" - - canvases = self.canvas_serializer( - self.get_canvases(element), - context=self.context, - many=True - ).data - return { - "@context": settings.IIIF_PRESENTATION_CONTEXT, - "@id": build_absolute_url(element, self.context['request'], self.id_url_name), - "@type": "sc:Manifest", - "thumbnail": ImageThumbnailManifestSerializer(element.get_thumbnail()).data, - "related": [], - "structures": self.get_structures(element, canvases), - "description": "", - "sequences": [ - { - "canvases": canvases, - "label": "", - "@id": build_absolute_url(element, self.context['request'], 'api:sequence-manifest'), - "@type": "sc:Sequence" - } - ], - "viewingHint": "individuals", - "label": element.name, - "viewingDirection": "left-to-right" - } - - def get_canvases(self, element): - return Element.objects.get_descending(element.id).prefetch_related('zone__image') - - def get_structures(self, element, canvases): - return [{ - "viewingHint": "top", - "label": element.name, - "@id": build_absolute_url(element, self.context['request'], self.id_url_name), - "ranges": [c['@id'] for c in canvases], - "@type": "sc:Range" - }] + [ - { - "canvases": [c['@id']], - "label": c['label'], - "@id": c['@id'], - "within": c['@id'], - "@type": "sc:Range" - } - for c in canvases] - - -class VolumeManifestSerializer(ManifestSerializer): - """ - Serialize a volume into a IIIF manifest - """ - - canvas_serializer = PageCanvasManifestSerializer - id_url_name = 'api:volume-manifest' - - def get_canvases(self, volume): - assert isinstance(volume, Element) and volume.type == ElementType.Volume - return Page.objects.get_descending(volume.id).prefetch_related('zone__image__server') - - def to_representation(self, volume): - serialized = super().to_representation(volume) - if 'service' not in serialized: - serialized['service'] = [] - serialized['service'].append({ - "@context": settings.IIIF_SEARCH_CONTEXT, - "@id": build_absolute_url(volume, self.context['request'], 'api:ts-search-manifest'), - "profile": settings.IIIF_SEARCH_SERVICE_PROFILE, - "label": "Search transcriptions" - }) - return serialized - - -class ActManifestSerializer(ManifestSerializer): - """ - Serialize an act into a IIIF manifest - """ - - canvas_serializer = ActPageCanvasManifestSerializer - id_url_name = 'api:act-manifest' - - def get_canvases(self, act): - assert isinstance(act, Act) - image_ids = list(Element.objects - .get_descending(act.id, type=ElementType.Surface) - .values_list('zone__image_id', flat=True)) - pages = Page.objects \ - .filter(zone__image_id__in=image_ids) \ - .select_related('zone__image__server') - # This query gives unordered pages so we reorder them manually - ordered_pages = sorted(pages, key=lambda p: image_ids.index(p.zone.image_id)) - # Add act info for canvas serializer - for p in ordered_pages: - p.act = act - return ordered_pages - - -class AnnotationSerializer(ABC, serializers.BaseSerializer): - """ - Serialize an element into a IIIF annotation - """ - - @abstractmethod - def get_url(self, element): - """Get an element's ID URL""" - - def get_resource(self, element): - """Get the annotation resource for a given element.""" - return { - "@id": self.get_url(), - "@type": "cnt:ContentAsText", - "chars": element.name, - "format": "text/plain" - } - - def get_target(self, element): - """Get the target canvas (`on` property) for a given element.""" - assert hasattr(element, 'zone') - return "{0}#xywh={1.x},{1.y},{1.width},{1.height}".format( - element.zone.image.get_thumbnail_url(max_width=None, max_height=None), - element.zone.polygon) - - def to_representation(self, element): - assert isinstance(element, (Element, Transcription)) - assert 'request' in self.context, "A request is required to generate absolute URLs" - return { - "@id": self.get_url(element), - "@type": "oa:Annotation", - "motivation": "sc:painting", - "on": self.get_target(element), - "resource": self.get_resource(element), - } - - -class TranscriptionAnnotationSerializer(AnnotationSerializer): - """ - Serialize a transcription into a IIIF annotation - """ - - def get_url(self, ts): - return build_absolute_url( - ts, - self.context['request'], - 'api:transcription-manifest', - id_argument='page_pk', - transcription_pk=ts.id, - ) - - def get_resource(self, ts): - return { - "@id": self.get_url(ts), - "@type": "cnt:ContentAsText", - "chars": ts.text, - "format": "text/plain" - } - - -class TranscriptionSearchAnnotationSerializer(TranscriptionAnnotationSerializer): - - def get_target(self, element): - assert isinstance(element, Transcription) - url = build_absolute_url(element.element, self.context['request'], 'api:canvas-manifest') - return "{0}#xywh={1.x},{1.y},{1.width},{1.height}".format(url, element.zone.polygon) - - -class SurfaceAnnotationSerializer(AnnotationSerializer): - """ - Serialize a surface into a IIIF annotation - """ - - def get_url(self, surface): - return build_absolute_url( - surface, - self.context['request'], - 'api:surface-manifest', - id_argument='page_pk', - surface_pk=surface.id, - ) - - def get_resource(self, surface): - act = Act.objects.get_ascending(surface.id)[0] - chars = 'Act {}'.format(act.number) if act is not None else 'Unknown act' - return { - "@id": self.get_url(surface), - "@type": "cnt:ContentAsText", - "chars": chars, - "format": "text/plain" - } - - -class AnnotationListSerializer(ABC, serializers.BaseSerializer): - """ - Serialize a list of serialized annotations into a IIIF annotation list - """ - - annotation_serializer = AnnotationSerializer - - def to_representation(self, obj): - assert 'request' in self.context, "A request is required to generate absolute URLs" - - return { - "@context": settings.IIIF_PRESENTATION_CONTEXT, - "@id": self.context['request'].build_absolute_uri(), - "@type": "sc:AnnotationList", - "resources": self.annotation_serializer( - self.get_elements(obj), - context=self.context, - many=True - ).data - } - - @abstractmethod - def get_elements(self, obj): - "Get a list of elements to serialize as annotations." - - -class PageAnnotationListSerializer(AnnotationListSerializer): - """ - Serialize a page's transcriptions into a IIIF annotation list - """ - - annotation_serializer = TranscriptionAnnotationSerializer - - def get_elements(self, page): - assert isinstance(page, Page) - return page.transcriptions.all() - - -class PageActAnnotationListSerializer(AnnotationListSerializer): - """ - Serialize an page's acts into a IIIF annotation list - """ - - annotation_serializer = SurfaceAnnotationSerializer - - def get_elements(self, page): - assert isinstance(page, Page) - if page.zone is None: - return [] - return Element.objects.filter(type=ElementType.Surface, zone__image=page.zone.image) - - -class SurfaceAnnotationListSerializer(AnnotationListSerializer): - """ - Serialize a single surface into a IIIF annotation list - """ - - annotation_serializer = SurfaceAnnotationSerializer - - def get_elements(self, surface): - assert isinstance(surface, Element) and surface.type == ElementType.Surface - return [surface] - - def to_representation(self, surface): - serialized = super().to_representation(surface) - query = self.context['request'].query_params.get('q') - # When there's a search query, add all transcriptions that contain the query - # as serialized annotations in the list's resources - if query: - serialized['resources'].extend( - TranscriptionAnnotationSerializer( - Transcription.objects.filter( - zone__image=surface.zone.image, - zone__polygon__in=surface.zone.polygon, - text__icontains=query, - score__gte=0.5, - ), - context=self.context, - many=True - ).data - ) - return serialized - - -class SearchAnnotationListSerializer(AnnotationListSerializer): - """ - Serialize a list of serialized annotations into a search result annotation list - """ - - def to_representation(self, obj): - serialized = super().to_representation(obj) - serialized['@context'] = settings.IIIF_SEARCH_CONTEXT - serialized['within'] = { - "@type": "sc:Layer", - "total": len(serialized['resources']), - } - serialized['startIndex'] = 0 - serialized['hits'] = [ - { - "@type": "search:Hit", - "annotations": [anno['@id']], - "match": self.get_match(anno), - } - for anno in serialized['resources'] - ] - return serialized - - def get_match(self, anno): - """Get a match text for an annotation. - This is optional in the Search API but mandatory with Mirador""" - return anno['resource']['chars'] - - -class TranscriptionSearchAnnotationListSerializer(SearchAnnotationListSerializer): - """ - Serialize a transcription search result into an annotation list - """ - - annotation_serializer = TranscriptionSearchAnnotationSerializer - - def get_elements(self, obj): - return obj diff --git a/arkindex/documents/serializers/__init__.py b/arkindex/documents/serializers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py new file mode 100644 index 0000000000000000000000000000000000000000..d2c0d7820ed37cf30fa972721fb2ec9741532857 --- /dev/null +++ b/arkindex/documents/serializers/elements.py @@ -0,0 +1,135 @@ +from rest_framework import serializers +from arkindex.documents.models import \ + Element, ElementType, Page, PageType, PageDirection, Act, Corpus, MetaData, MetaType +from arkindex.images.serializers import ZoneSerializer, ImageSerializer +from arkindex.dataimport.serializers import RevisionSerializer +from arkindex.project.serializer_fields import EnumField, ViewerURLField + + +class MetaDataSerializer(serializers.ModelSerializer): + """ + Serialises some Metadata for any Element + """ + type = EnumField(MetaType) + revision = RevisionSerializer() + + class Meta: + model = MetaData + fields = ( + 'id', + 'type', + 'name', + 'value', + 'revision', + ) + + +class ElementLightSerializer(serializers.ModelSerializer): + """ + Serialises a Element + """ + type = EnumField(ElementType) + + class Meta: + model = Element + fields = ( + 'id', + 'type', + 'name', + ) + + +class PageLightSerializer(serializers.ModelSerializer): + """ + Serialises a Page + """ + page_type = EnumField(PageType) + direction = EnumField(PageDirection) + image = ImageSerializer(source='zone.image') + + class Meta: + model = Page + fields = ( + 'id', + 'page_type', + 'nb', + 'direction', + 'display_name', + 'image', + ) + + +class ElementSerializer(serializers.ModelSerializer): + """ + Fully Serialises a document + """ + type = EnumField(ElementType) + + # TODO: detect correct manifest per element type + viewer_url = ViewerURLField('api:volume-manifest') + + class Meta: + model = Element + fields = ( + 'id', + 'type', + 'name', + 'viewer_url', + ) + + +class ActSerializer(serializers.ModelSerializer): + """ + Serialize an act with its parents and children + and metadatas + """ + + parents = serializers.ListField( + child=serializers.ListField( + child=ElementLightSerializer() + ), + source='parent_elements', + read_only=True, + ) + children = ElementLightSerializer(source='child_elements', read_only=True, many=True) + metadatas = MetaDataSerializer(read_only=True, many=True) + + class Meta: + model = Act + fields = ( + 'id', + 'name', + 'number', + 'parents', + 'children', + 'metadatas', + ) + read_only_fields = ('id', ) + + +class SurfaceSerializer(serializers.ModelSerializer): + """ + Serialize a surface with its page, image and zone + """ + + zone = ZoneSerializer() + page = PageLightSerializer() + + class Meta: + model = Element + fields = ( + 'id', + 'name', + 'page', + 'zone', + ) + + +class CorpusSerializer(serializers.ModelSerializer): + """ + Serialize a corpus + """ + + class Meta: + model = Corpus + fields = ('id', 'name') diff --git a/arkindex/documents/serializers/iiif/__init__.py b/arkindex/documents/serializers/iiif/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1104a8b1c97500e07d7473aaa3d643333023641 --- /dev/null +++ b/arkindex/documents/serializers/iiif/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa +from arkindex.documents.serializers.iiif.manifests import VolumeManifestSerializer, ActManifestSerializer +from arkindex.documents.serializers.iiif.annotations import \ + PageAnnotationListSerializer, PageActAnnotationListSerializer, \ + SurfaceAnnotationListSerializer, TranscriptionSearchAnnotationListSerializer diff --git a/arkindex/documents/serializers/iiif/annotations.py b/arkindex/documents/serializers/iiif/annotations.py new file mode 100644 index 0000000000000000000000000000000000000000..b051fbd87713e9a0f316caa92357e9bf92b363a6 --- /dev/null +++ b/arkindex/documents/serializers/iiif/annotations.py @@ -0,0 +1,222 @@ +from abc import ABC, abstractmethod +from django.conf import settings +from rest_framework import serializers +from arkindex.documents.models import Element, ElementType, Act, Page, Transcription +from arkindex.project.tools import build_absolute_url + + +class AnnotationSerializer(ABC, serializers.BaseSerializer): + """ + Serialize an element into a IIIF annotation + """ + + @abstractmethod + def get_url(self, element): + """Get an element's ID URL""" + + def get_resource(self, element): + """Get the annotation resource for a given element.""" + return { + "@id": self.get_url(), + "@type": "cnt:ContentAsText", + "chars": element.name, + "format": "text/plain" + } + + def get_target(self, element): + """Get the target canvas (`on` property) for a given element.""" + assert hasattr(element, 'zone') + return "{0}#xywh={1.x},{1.y},{1.width},{1.height}".format( + element.zone.image.get_thumbnail_url(max_width=None, max_height=None), + element.zone.polygon) + + def to_representation(self, element): + assert isinstance(element, (Element, Transcription)) + assert 'request' in self.context, "A request is required to generate absolute URLs" + return { + "@id": self.get_url(element), + "@type": "oa:Annotation", + "motivation": "sc:painting", + "on": self.get_target(element), + "resource": self.get_resource(element), + } + + +class TranscriptionAnnotationSerializer(AnnotationSerializer): + """ + Serialize a transcription into a IIIF annotation + """ + + def get_url(self, ts): + return build_absolute_url( + ts, + self.context['request'], + 'api:transcription-manifest', + id_argument='page_pk', + transcription_pk=ts.id, + ) + + def get_resource(self, ts): + return { + "@id": self.get_url(ts), + "@type": "cnt:ContentAsText", + "chars": ts.text, + "format": "text/plain" + } + + +class TranscriptionSearchAnnotationSerializer(TranscriptionAnnotationSerializer): + + def get_target(self, element): + assert isinstance(element, Transcription) + url = build_absolute_url(element.element, self.context['request'], 'api:canvas-manifest') + return "{0}#xywh={1.x},{1.y},{1.width},{1.height}".format(url, element.zone.polygon) + + +class SurfaceAnnotationSerializer(AnnotationSerializer): + """ + Serialize a surface into a IIIF annotation + """ + + def get_url(self, surface): + return build_absolute_url( + surface, + self.context['request'], + 'api:surface-manifest', + id_argument='page_pk', + surface_pk=surface.id, + ) + + def get_resource(self, surface): + act = Act.objects.get_ascending(surface.id)[0] + chars = 'Act {}'.format(act.number) if act is not None else 'Unknown act' + return { + "@id": self.get_url(surface), + "@type": "cnt:ContentAsText", + "chars": chars, + "format": "text/plain" + } + + +class AnnotationListSerializer(ABC, serializers.BaseSerializer): + """ + Serialize a list of serialized annotations into a IIIF annotation list + """ + + annotation_serializer = AnnotationSerializer + + def to_representation(self, obj): + assert 'request' in self.context, "A request is required to generate absolute URLs" + + return { + "@context": settings.IIIF_PRESENTATION_CONTEXT, + "@id": self.context['request'].build_absolute_uri(), + "@type": "sc:AnnotationList", + "resources": self.annotation_serializer( + self.get_elements(obj), + context=self.context, + many=True + ).data + } + + @abstractmethod + def get_elements(self, obj): + "Get a list of elements to serialize as annotations." + + +class PageAnnotationListSerializer(AnnotationListSerializer): + """ + Serialize a page's transcriptions into a IIIF annotation list + """ + + annotation_serializer = TranscriptionAnnotationSerializer + + def get_elements(self, page): + assert isinstance(page, Page) + return page.transcriptions.all() + + +class PageActAnnotationListSerializer(AnnotationListSerializer): + """ + Serialize an page's acts into a IIIF annotation list + """ + + annotation_serializer = SurfaceAnnotationSerializer + + def get_elements(self, page): + assert isinstance(page, Page) + if page.zone is None: + return [] + return Element.objects.filter(type=ElementType.Surface, zone__image=page.zone.image) + + +class SurfaceAnnotationListSerializer(AnnotationListSerializer): + """ + Serialize a single surface into a IIIF annotation list + """ + + annotation_serializer = SurfaceAnnotationSerializer + + def get_elements(self, surface): + assert isinstance(surface, Element) and surface.type == ElementType.Surface + return [surface] + + def to_representation(self, surface): + serialized = super().to_representation(surface) + query = self.context['request'].query_params.get('q') + # When there's a search query, add all transcriptions that contain the query + # as serialized annotations in the list's resources + if query: + serialized['resources'].extend( + TranscriptionAnnotationSerializer( + Transcription.objects.filter( + zone__image=surface.zone.image, + zone__polygon__in=surface.zone.polygon, + text__icontains=query, + score__gte=0.5, + ), + context=self.context, + many=True + ).data + ) + return serialized + + +class SearchAnnotationListSerializer(AnnotationListSerializer): + """ + Serialize a list of serialized annotations into a search result annotation list + """ + + def to_representation(self, obj): + serialized = super().to_representation(obj) + serialized['@context'] = settings.IIIF_SEARCH_CONTEXT + serialized['within'] = { + "@type": "sc:Layer", + "total": len(serialized['resources']), + } + serialized['startIndex'] = 0 + serialized['hits'] = [ + { + "@type": "search:Hit", + "annotations": [anno['@id']], + "match": self.get_match(anno), + } + for anno in serialized['resources'] + ] + return serialized + + def get_match(self, anno): + """Get a match text for an annotation. + This is optional in the Search API but mandatory with Mirador""" + return anno['resource']['chars'] + + +class TranscriptionSearchAnnotationListSerializer(SearchAnnotationListSerializer): + """ + Serialize a transcription search result into an annotation list + """ + + annotation_serializer = TranscriptionSearchAnnotationSerializer + + def get_elements(self, obj): + return obj diff --git a/arkindex/documents/serializers/iiif/manifests.py b/arkindex/documents/serializers/iiif/manifests.py new file mode 100644 index 0000000000000000000000000000000000000000..b40f97c2c4e06b30179894ff30cd6c5c17f5491d --- /dev/null +++ b/arkindex/documents/serializers/iiif/manifests.py @@ -0,0 +1,230 @@ + +from django.conf import settings +from rest_framework import serializers +from arkindex.documents.models import Element, ElementType, Page, Act +from arkindex.images.models import Image, Zone +from arkindex.project.tools import sslify_url, build_absolute_url +import urllib.parse + + +class ImageResourceManifestSerializer(serializers.BaseSerializer): + """ + Serialize an image into a IIIF resource. + """ + + def to_representation(self, image): + assert isinstance(image, Image) + return { + "@id": sslify_url(image.get_thumbnail_url(max_width=None, max_height=None)), + "@type": "dctypes:Image", + "height": image.height, + "width": image.width, + "service": { + "@context": settings.IIIF_IMAGE_CONTEXT, + "@id": sslify_url(image.url), + "profile": settings.IIIF_IMAGE_SERVICE_PROFILE + } + } + + +class ImageThumbnailManifestSerializer(serializers.BaseSerializer): + """ + Serialize an image into a IIIF manifest thumbnail + """ + + def to_representation(self, image): + assert isinstance(image, Image) + return { + "@id": sslify_url(image.get_thumbnail_url()), + "service": { + "@context": settings.IIIF_IMAGE_CONTEXT, + "@id": sslify_url(image.url), + "profile": settings.IIIF_IMAGE_SERVICE_PROFILE + } + } + + +class ElementCanvasManifestSerializer(serializers.BaseSerializer): + """ + Serialize an element's zone into a IIIF canvas + """ + + def get_other_content(self, element): + return [] + + def to_representation(self, element): + assert isinstance(element, Element) + zone = element.zone + assert isinstance(zone, Zone) + assert 'request' in self.context, "A request is required to generate absolute URLs" + return { + "@id": build_absolute_url(element, self.context['request'], 'api:canvas-manifest'), + "@type": "sc:Canvas", + "label": element.name, + "height": zone.polygon.height, + "width": zone.polygon.width, + "images": [ + { + "@type": "oa:Annotation", + "resource": ImageResourceManifestSerializer(zone.image, context=self.context).data, + "on": build_absolute_url(element, self.context['request'], 'api:canvas-manifest'), + "motivation": "sc:painting" + } + ], + "otherContent": self.get_other_content(element), + } + + +class PageCanvasManifestSerializer(ElementCanvasManifestSerializer): + """ + Serialize a page's zone into a IIIF canvas + """ + + def to_representation(self, page): + assert isinstance(page, Page) + serialized_element = super().to_representation(page) + serialized_element['label'] = page.display_name + return serialized_element + + def get_other_content(self, page): + annotation_list_endpoint, annotation_list_name = \ + ("api:page-transcription-manifest", "Transcriptions") if settings.IIIF_TRANSCRIPTION_LIST \ + else ("api:page-act-manifest", "Actes") + return [ + { + "@id": build_absolute_url(page, self.context['request'], annotation_list_endpoint), + "@type": "sc:AnnotationList", + "label": annotation_list_name + } + ] + + +class ActPageCanvasManifestSerializer(PageCanvasManifestSerializer): + """ + Serialize a page into a IIIF canvas with annotation lists for surfaces + """ + + def get_other_content(self, page): + assert hasattr(page, 'act') + query = self.context['request'].query_params.get('q') + suffix = '?' + urllib.parse.urlencode({'q': query}) if query else '' + return [ + { + "@id": build_absolute_url(surface, self.context['request'], 'api:surface-manifest') + suffix, + "@type": "sc:AnnotationList", + "label": surface.name + } + for surface in Element.objects.get_descending( + page.act.id, + type=ElementType.Surface, + zone__image_id=page.zone.image_id + ) + ] + + +class ManifestSerializer(serializers.BaseSerializer): + """ + Serialize an element into a IIIF manifest + """ + + canvas_serializer = ElementCanvasManifestSerializer + id_url_name = 'api:volume-manifest' + + def to_representation(self, element): + assert isinstance(element, Element) + assert 'request' in self.context, "A request is required to generate absolute URLs" + + canvases = self.canvas_serializer( + self.get_canvases(element), + context=self.context, + many=True + ).data + return { + "@context": settings.IIIF_PRESENTATION_CONTEXT, + "@id": build_absolute_url(element, self.context['request'], self.id_url_name), + "@type": "sc:Manifest", + "thumbnail": ImageThumbnailManifestSerializer(element.get_thumbnail()).data, + "related": [], + "structures": self.get_structures(element, canvases), + "description": "", + "sequences": [ + { + "canvases": canvases, + "label": "", + "@id": build_absolute_url(element, self.context['request'], 'api:sequence-manifest'), + "@type": "sc:Sequence" + } + ], + "viewingHint": "individuals", + "label": element.name, + "viewingDirection": "left-to-right" + } + + def get_canvases(self, element): + return Element.objects.get_descending(element.id).prefetch_related('zone__image') + + def get_structures(self, element, canvases): + return [{ + "viewingHint": "top", + "label": element.name, + "@id": build_absolute_url(element, self.context['request'], self.id_url_name), + "ranges": [c['@id'] for c in canvases], + "@type": "sc:Range" + }] + [ + { + "canvases": [c['@id']], + "label": c['label'], + "@id": c['@id'], + "within": c['@id'], + "@type": "sc:Range" + } + for c in canvases] + + +class VolumeManifestSerializer(ManifestSerializer): + """ + Serialize a volume into a IIIF manifest + """ + + canvas_serializer = PageCanvasManifestSerializer + id_url_name = 'api:volume-manifest' + + def get_canvases(self, volume): + assert isinstance(volume, Element) and volume.type == ElementType.Volume + return Page.objects.get_descending(volume.id).prefetch_related('zone__image__server') + + def to_representation(self, volume): + serialized = super().to_representation(volume) + if 'service' not in serialized: + serialized['service'] = [] + serialized['service'].append({ + "@context": settings.IIIF_SEARCH_CONTEXT, + "@id": build_absolute_url(volume, self.context['request'], 'api:ts-search-manifest'), + "profile": settings.IIIF_SEARCH_SERVICE_PROFILE, + "label": "Search transcriptions" + }) + return serialized + + +class ActManifestSerializer(ManifestSerializer): + """ + Serialize an act into a IIIF manifest + """ + + canvas_serializer = ActPageCanvasManifestSerializer + id_url_name = 'api:act-manifest' + + def get_canvases(self, act): + assert isinstance(act, Act) + image_ids = list(Element.objects + .get_descending(act.id, type=ElementType.Surface) + .values_list('zone__image_id', flat=True)) + pages = Page.objects \ + .filter(zone__image_id__in=image_ids) \ + .select_related('zone__image__server') + # This query gives unordered pages so we reorder them manually + ordered_pages = sorted(pages, key=lambda p: image_ids.index(p.zone.image_id)) + # Add act info for canvas serializer + for p in ordered_pages: + p.act = act + return ordered_pages diff --git a/arkindex/documents/serializers/search.py b/arkindex/documents/serializers/search.py new file mode 100644 index 0000000000000000000000000000000000000000..2336b547720f65c0cb346cba58830826b567baab --- /dev/null +++ b/arkindex/documents/serializers/search.py @@ -0,0 +1,58 @@ +from rest_framework import serializers +from arkindex.documents.models import Transcription, TranscriptionType, Act +from arkindex.documents.serializers.elements import ElementLightSerializer +from arkindex.documents.serializers.transcriptions import TranscriptionSerializer +from arkindex.images.serializers import ZoneSerializer +from arkindex.project.serializer_fields import EnumField, ViewerURLField + + +class TranscriptionSearchResultSerializer(serializers.ModelSerializer): + """ + Link between objects & their search indexation + """ + type = EnumField(TranscriptionType) + zone = ZoneSerializer() + parents = serializers.ListField( + child=serializers.ListField( + child=ElementLightSerializer() + ), + source='parent_paths', + ) + + class Meta: + model = Transcription + fields = ( + 'id', + 'type', + 'text', + 'score', + 'zone', + 'parents', + ) + + +class ActSearchResultSerializer(serializers.ModelSerializer): + """ + Serialize an act + """ + transcriptions = TranscriptionSerializer(many=True, source='transcriptions_results') + surfaces = ZoneSerializer(many=True) + parents = serializers.ListField( + child=serializers.ListField( + child=ElementLightSerializer() + ), + read_only=True, + ) + viewer_url = ViewerURLField('api:act-manifest') + + class Meta: + model = Act + fields = ( + 'id', + 'name', + 'number', + 'transcriptions', + 'surfaces', + 'parents', + 'viewer_url', + ) diff --git a/arkindex/documents/serializers/transcriptions.py b/arkindex/documents/serializers/transcriptions.py new file mode 100644 index 0000000000000000000000000000000000000000..bc349acfedd0e2aca50ab703130ba84583bf41cc --- /dev/null +++ b/arkindex/documents/serializers/transcriptions.py @@ -0,0 +1,69 @@ +from rest_framework import serializers +from arkindex.documents.models import Element, Transcription, TranscriptionType +from arkindex.images.models import Image +from arkindex.project.serializer_fields import EnumField + + +class TranscriptionSerializer(serializers.ModelSerializer): + """ + Serialises a Transcription + issued from a search + """ + + type = EnumField(TranscriptionType) + + class Meta: + model = Transcription + fields = ( + 'id', + 'type', + 'text', + 'score', + ) + + +class TranscriptionCreateSerializer(serializers.Serializer): + """ + Allows for insertion of new transcriptions and zones + """ + element = serializers.PrimaryKeyRelatedField(queryset=Element.objects.all()) + polygon = serializers.ListField( + child=serializers.ListField( + child=serializers.IntegerField(), + min_length=2, + max_length=2 + ), + min_length=3 + ) + text = serializers.CharField() + score = serializers.FloatField(min_value=0, max_value=1) + type = EnumField(TranscriptionType) + + +class TranscriptionBulkSerializer(serializers.Serializer): + """ + Allows for insertion of new transcriptions and zones + in Bulk (used by serializer below) + Note: no element ! + """ + polygon = serializers.ListField( + child=serializers.ListField( + child=serializers.IntegerField(), + min_length=2, + max_length=2 + ), + min_length=3 + ) + text = serializers.CharField() + score = serializers.FloatField(min_value=0, max_value=1) + type = EnumField(TranscriptionType) + + +class TranscriptionsSerializer(serializers.Serializer): + """ + Allows for insertion of new transcriptions and zones + in Bulk (uses serializer above) on a common parent + """ + transcriptions = TranscriptionBulkSerializer(many=True) + parent = serializers.PrimaryKeyRelatedField(queryset=Element.objects.all()) + image = serializers.PrimaryKeyRelatedField(queryset=Image.objects.all()) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 608d65f7d5a29eac699667d3ecea18690d3038f2..36ea7ad2f9b1f1b67aa1127641a62aa8080d9db4 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -44,16 +44,16 @@ api = [ # Placeholder URLs for IIIF IDs url(r'^manifest/(?P<pk>[\w\-]+)/sequence/?$', - RedirectView.as_view(pattern_name='volume-manifest', permanent=False), + RedirectView.as_view(pattern_name='api:volume-manifest', permanent=False), name='sequence-manifest'), url(r'^manifest/(?P<pk>[\w\-]+)/canvas/?$', - RedirectView.as_view(pattern_name='page-act-manifest', permanent=False), + RedirectView.as_view(pattern_name='api:page-act-manifest', permanent=False), name='canvas-manifest'), url(r'^manifest/(?P<page_pk>[\w\-]+)/transcriptions/(?P<transcription_pk>[\w\-]+)/?$', - RedirectView.as_view(pattern_name='page-transcription-manifest', permanent=False), + RedirectView.as_view(pattern_name='api:page-transcription-manifest', permanent=False), name='transcription-manifest'), url(r'^manifest/(?P<page_pk>[\w\-]+)/surfaces/(?P<surface_pk>[\w\-]+)/?$', - RedirectView.as_view(pattern_name='page-act-manifest', permanent=False), + RedirectView.as_view(pattern_name='api:page-act-manifest', permanent=False), name='surface-manifest'), # Search transcriptions diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py index afbcb9c940853a0258356979af88dff245abe166..bb16a7ac53d272adea783bd49497e94c988e604a 100644 --- a/arkindex/project/tools.py +++ b/arkindex/project/tools.py @@ -1,5 +1,6 @@ from urllib.parse import urlsplit, SplitResult from django.conf import settings +from django.urls import reverse import Levenshtein import random import string @@ -68,3 +69,11 @@ def find_closest(name, queryset, min_ratio=0.8): best_element, best_score = scores[0] if best_score >= min_ratio: return best_element + + +def build_absolute_url(element, request, name, id_argument='pk', **kwargs): + """ + Build an absolute URL for a specified view using the element ID. + """ + kwargs[id_argument] = str(element.id) + return request.build_absolute_uri(reverse(name, kwargs=kwargs))