Compare revisions

Erwan Rouchet · Bastien Abadie · Erwan Rouchet · Bastien Abadie · Erwan Rouchet · Bastien Abadie
--- a/VERSION
+++ b/VERSION
-0.14.3-beta2
+0.14.3-rc1
--- a/arkindex/dataimport/api.py
+++ b/arkindex/dataimport/api.py
@@ -694,9 +694,11 @@ class WorkerVersionList(ListCreateAPIView):
    }

    def get_queryset(self):
-        return WorkerVersion.objects.filter(
-            worker_id=self.kwargs['pk']
-        ).prefetch_related('revision').order_by('-revision__created')
+        return WorkerVersion.objects \
+            .filter(worker_id=self.kwargs['pk']) \
+            .select_related('revision__repo', 'worker__repository') \
+            .prefetch_related('revision__refs', 'revision__versions') \
+            .order_by('-revision__created')

    def create(self, request, *args, **kwargs):
        serializer = self.get_serializer(data=request.data)

--- a/arkindex/dataimport/models.py
+++ b/arkindex/dataimport/models.py
@@ -352,13 +352,24 @@ class Revision(IndexableModel):

    @property
    def state(self):
-        # Computes revision state according to its versions one
+        """
+        Computes revision state according to its versions'

-        # If there is one version in error, revision state is too
-        # Else if there is one version processing, revision state is too
-        # Else if all versions are available, then the revision is too
-        # Else, the revision is created since it has either no version or versions mixing processing/created states
-        states = set(self.versions.values_list('state', flat=True))
+        If there is one version in error, revision state is too
+        Else if there is one version processing, revision state is too
+        Else if all versions are available, then the revision is too
+        Else, the revision is created since it has either no version or versions mixing processing/created states
+        """
+        # This prevents performing another SQL request when versions have already been prefetched.
+        # See https://stackoverflow.com/a/19651840/5990435
+        if (
+            hasattr(self, "_prefetched_objects_cache")
+            and self.versions.field.remote_field.get_cache_name()
+            in self._prefetched_objects_cache
+        ):
+            states = set(version.state for version in self.versions.all())
+        else:
+            states = set(self.versions.values_list('state', flat=True))

        if WorkerVersionState.Error in states:
            return WorkerVersionState.Error

--- a/arkindex/dataimport/serializers/workers.py
+++ b/arkindex/dataimport/serializers/workers.py
@@ -46,6 +46,11 @@ class WorkerVersionSerializer(serializers.ModelSerializer):
            'worker',
        )
        read_only_fields = ('docker_image_name',)
+        # Avoid loading all revisions and all Ponos artifacts when opening this endpoint in a browser
+        extra_kwargs = {
+            'revision': {'style': {'base_template': 'input.html'}},
+            'docker_image': {'style': {'base_template': 'input.html'}},
+        }

    def to_representation(self, instance):
        self.fields['revision'] = RevisionWithRefsSerializer(read_only=True)

--- a/arkindex/dataimport/tests/test_process_elements.py
+++ b/arkindex/dataimport/tests/test_process_elements.py
@@ -679,7 +679,7 @@ class TestProcessElements(FixtureAPITestCase):

    def test_elements_count(self):
        """
-        Elements count can be retrieved when no cursor is set
+        Elements count can be retrieved with with_count parameter
        """
        self.client.force_login(self.superuser)
        with self.assertNumQueries(7):
@@ -697,7 +697,8 @@ class TestProcessElements(FixtureAPITestCase):

        second_page = self.client.get(next_url)
        data = second_page.json()
-        self.assertIsNone(data['count'])
+        # Count should be present in the next URL
+        self.assertEqual(data['count'], 12)
        self.assertIsNone(data['next'])
        self.assertEqual(len(data['results']), 6)


--- a/arkindex/dataimport/tests/test_workers.py
+++ b/arkindex/dataimport/tests/test_workers.py
@@ -162,12 +162,14 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):

    # Tests on get_query_set for WorkerVersionList
    def test_versions_list_requires_login(self):
-        response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(self.worker_1.id)}))
+        with self.assertNumQueries(0):
+            response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(self.worker_1.id)}))
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

    def test_versions_list(self):
        self.client.force_login(self.user)
-        response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(self.worker_1.id)}))
+        with self.assertNumQueries(5):
+            response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(self.worker_1.id)}))
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        data = response.json()
        self.assertEqual(len(data), 1)
@@ -190,7 +192,8 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
            configuration={"test": "test2"}
        )

-        response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(worker_2.id)}))
+        with self.assertNumQueries(5):
+            response = self.client.get(reverse('api:worker-versions', kwargs={'pk': str(worker_2.id)}))
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        data = response.json()
        self.assertEqual(len(data), 1)

--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -74,9 +74,9 @@ class ElementAdmin(admin.ModelAdmin):
 class TranscriptionAdmin(admin.ModelAdmin):
    list_display = ('id', 'text', 'score', 'element', )
    list_filter = [('type', EnumFieldListFilter), 'source']
-    fields = ('id', 'text', 'score', 'element', 'zone', 'source', )
+    fields = ('id', 'text', 'score', 'element', 'source', )
    readonly_fields = ('id', )
-    raw_id_fields = ('element', 'zone', )
+    raw_id_fields = ('element', )


 class MLClassAdmin(admin.ModelAdmin):

--- a/arkindex/documents/api/elements.py
+++ b/arkindex/documents/api/elements.py
@@ -3,7 +3,8 @@ from datetime import datetime, timezone
 from psycopg2.extras import execute_values
 from django.conf import settings
 from django.db import transaction, connection
-from django.db.models import Q, Prefetch, Max, QuerySet
+from django.db.models import Q, Prefetch, Max, QuerySet, CharField
+from django.db.models.functions import Cast
 from django.shortcuts import get_object_or_404
 from django.utils.functional import cached_property
 from rest_framework.exceptions import ValidationError, NotFound
@@ -790,13 +791,11 @@ class ElementTranscriptions(ListAPIView):
        ))
        self.check_object_permissions(self.request, element)

+        # ORDER BY casting IDs as char to avoid the PostgreSQL optimizer's inefficient scan
        queryset = Transcription.objects \
-            .prefetch_related('zone__image__server', 'source') \
-            .extra(
-                # ORDER BY casting IDs as char to avoid PostgreSQL optimizer inefficient scan
-                select={'char_id': 'CAST(id AS CHAR(36))'},
-                order_by=['char_id']
-            )
+            .prefetch_related('element__zone__image__server', 'source') \
+            .annotate(char_id=Cast('id', output_field=CharField())) \
+            .order_by('char_id')

        if self.is_recursive:
            queryset = queryset.filter(
@@ -1020,7 +1019,7 @@ class ElementBulkCreate(CreateAPIView):
            # Use WKB representation to compare existing zones
            # to avoid comparing references or slower coordinates
            polygon.wkb: zone_id
-            for polygon, zone_id in Zone.objects.filter(image_id=image_id).values_list('polygon', 'id')
+            for polygon, zone_id in Zone.objects.using('default').filter(image_id=image_id).values_list('polygon', 'id')
        }

        # Retrieve or create required zones
@@ -1047,6 +1046,7 @@ class ElementBulkCreate(CreateAPIView):
            int,
            ElementPath
            .objects
+            .using('default')
            .filter(
                path__last=self.element.id,
                element__type_id__in=set(element_data['type'] for element_data in elements)

--- a/arkindex/documents/api/entities.py
+++ b/arkindex/documents/api/entities.py
@@ -109,14 +109,14 @@ class EntityElements(ListAPIView):
                corpus__in=Corpus.objects.readable(self.request.user),
                metadatas__entity_id=pk
            ) \
-            .select_related('type') \
-            .prefetch_related('metadatas__entity', 'metadatas__revision', 'corpus')
+            .select_related('type', 'corpus') \
+            .prefetch_related('metadatas__entity', 'metadatas__revision', 'zone__image__server')
        transcription_elements = Element.objects \
            .filter(
                corpus__in=Corpus.objects.readable(self.request.user),
                transcriptions__transcription_entities__entity_id=pk
-            ).select_related('type') \
-            .prefetch_related('metadatas__entity', 'metadatas__revision', 'corpus')
+            ).select_related('type', 'corpus') \
+            .prefetch_related('metadatas__entity', 'metadatas__revision', 'zone__image__server')
        return metadata_elements.union(transcription_elements) \
            .order_by('name', 'type')


--- a/arkindex/documents/api/ml.py
+++ b/arkindex/documents/api/ml.py
@@ -278,7 +278,6 @@ class ElementTranscriptionsBulk(CreateAPIView):
            transcriptions.append(Transcription(
                element=annotation['element'],
                type=tr_type,
-                zone=None,
                worker_version=worker_version,
                text=annotation['text'],
                score=annotation['score']
@@ -508,7 +507,12 @@ class ClassificationReject(ClassificationModerationActionsMixin):

    def put(self, request, *args, **kwargs):
        instance = self.get_object()
-        if instance.source.slug == 'manual':
+
+        manual = (
+            instance.source and instance.source.slug == 'manual'
+            or not instance.source and not instance.worker_version
+        )
+        if manual:
            # Delete manual classifications upon rejection
            instance.delete()
            return Response(None, status=status.HTTP_204_NO_CONTENT)

--- a/arkindex/documents/consumers.py
+++ b/arkindex/documents/consumers.py
@@ -75,29 +75,28 @@ class ReindexConsumer(SyncConsumer):
        elif element_id or corpus_id:
            if element_id:
                # Pick this element, and all its children
-                elements_queryset = list(Element.objects.get_descending(element_id))
-                elements_queryset.append(Element.objects.get(id=element_id))
+                elements_queryset = Element.objects.filter(Q(id=element_id) | Q(paths__path__contains=[element_id]))
            else:
                # Pick all elements in the corpus
                elements_queryset = Element.objects.filter(corpus_id=corpus_id)

-            transcriptions_queryset = Transcription.objects.filter(
-                element__in=elements_queryset,
-                zone__isnull=False
-            )
+            transcriptions_queryset = Transcription.objects.filter(element__in=elements_queryset)
            entities_queryset = Entity.objects.filter(
                Q(metadatas__element__in=elements_queryset)
                | Q(transcriptions__element__in=elements_queryset)
            )
        else:
-            transcriptions_queryset = Transcription.objects.filter(zone__isnull=False)
+            transcriptions_queryset = Transcription.objects.all()
            elements_queryset = Element.objects.all()
            entities_queryset = Entity.objects.all()

        if transcriptions:
-            indexer.run_index(transcriptions_queryset, bulk_size=400)
+            indexer.run_index(transcriptions_queryset.select_related('element'), bulk_size=400)
        if elements:
-            indexer.run_index(elements_queryset, bulk_size=100)
+            indexer.run_index(
+                elements_queryset.select_related('type').prefetch_related('metadatas', 'transcriptions'),
+                bulk_size=100,
+            )
        if entities:
            indexer.run_index(entities_queryset, bulk_size=400)


--- a/arkindex/documents/fixtures/data.json
+++ b/arkindex/documents/fixtures/data.json
--- a/arkindex/documents/management/commands/build_fixtures.py
+++ b/arkindex/documents/management/commands/build_fixtures.py
@@ -2,7 +2,7 @@
 from django.core.management.base import BaseCommand
 from arkindex_common.ml_tool import MLToolType
 from arkindex_common.enums import TranscriptionType, MetaType
-from arkindex.documents.models import Corpus, Element, Transcription, DataSource, MetaData
+from arkindex.documents.models import Corpus, Element, DataSource, MetaData
 from arkindex.dataimport.models import RepositoryType, WorkerVersion, WorkerVersionState, Workflow
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.users.models import User, CorpusRight
@@ -150,6 +150,11 @@ class Command(BaseCommand):
            # Allows manual transcriptions of type 'line' for text lines
            allowed_transcription=TranscriptionType.Line
        )
+        word_type = corpus.types.create(
+            slug='word',
+            display_name='Word',
+            allowed_transcription=TranscriptionType.Word
+        )

        # Create 2 volumes
        vol1 = Element.objects.create(
@@ -217,21 +222,24 @@ class Command(BaseCommand):
        # Create transcriptions on images of volume 1
        for page in (p1_1, p1_2, p1_3):
            for word, pos in [("PARIS", 100), ("ROY", 400), ("DATUM", 700)]:
-                Transcription.objects.create(
-                    element=page,
+                element = corpus.elements.create(
+                    type=word_type,
+                    name=word,
+                    zone=makezone(page.zone.image, pos, pos + 100)
+                )
+                element.add_parent(page)
+                element.transcriptions.create(
                    source=recognizer_source,
                    text=word,
                    type=TranscriptionType.Word,
-                    zone=makezone(page.zone.image, pos, pos + 100),
                    score=1.0,
                )

-        # Create a page transcriptions on page 1 with no zone
+        # Create a page transcription on page 1
        p1_1.transcriptions.create(
            source=recognizer_source,
            text='Lorem ipsum dolor sit amet',
            type=TranscriptionType.Page,
-            zone=None,
            score=1.0,
        )


--- a/arkindex/documents/management/commands/reindex.py
+++ b/arkindex/documents/management/commands/reindex.py
@@ -16,13 +16,15 @@ logger = logging.getLogger(__name__)
 def get_transcriptions(corpus=None, folder=None):
    if folder:
        # Lookup all the transcriptions linked to a folder
-        return Transcription.objects.filter(
+        queryset = Transcription.objects.filter(
            element__in=Element.objects.get_descending(folder.id)
        ).distinct()
    elif corpus:
-        return Transcription.objects.filter(element__corpus=corpus)
+        queryset = Transcription.objects.filter(element__corpus=corpus)
+    else:
+        queryset = Transcription.objects.all()

-    return Transcription.objects.all()
+    return queryset.select_related('element')


 def get_elements(corpus=None, folder=None):
@@ -33,7 +35,7 @@ def get_elements(corpus=None, folder=None):
    else:
        queryset = Element.objects.all()

-    return queryset.prefetch_related('metadatas', 'transcriptions')
+    return queryset.select_related('type').prefetch_related('metadatas', 'transcriptions')


 def get_entities(corpus=None, folder=None):

--- a/arkindex/documents/migrations/0021_move_transcriptions.py
+++ b/arkindex/documents/migrations/0021_move_transcriptions.py
+# Generated by Django 3.1 on 2020-09-01 07:48
+
+from django.db import migrations, models
+from arkindex_common.enums import TranscriptionType
+
+
+def preflight_checks(apps, schema_editor):
+    ElementType = apps.get_model('documents', 'ElementType')
+    existing_types = []
+
+    for ts_type in TranscriptionType:
+        if ElementType.objects.filter(slug=f'transcription_{ts_type.value}').exists():
+            existing_types.append(f'`transcription_{ts_type.value}`')
+
+    if existing_types:
+        raise AssertionError(
+            'This migration could not be run because one or more element types use the reserved slug(s) '
+            + ', '.join(existing_types)
+        )
+
+
+FORWARD_SQL = [
+    'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";',
+    # Early handling for the edge case of transcriptions already on the correct element
+    """
+    UPDATE documents_transcription transcription
+    SET zone_id = NULL
+    FROM documents_element element
+    WHERE transcription.element_id = element.id
+    AND transcription.zone_id IS NOT NULL
+    AND transcription.zone_id = element.zone_id;
+    """,
+    # Create element types starting with transcription_* as needed
+    """
+    INSERT INTO documents_elementtype (id, corpus_id, slug, display_name, folder, allowed_transcription)
+    SELECT
+        uuid_generate_v4(),
+        element.corpus_id,
+        'transcription_' || transcription.type,
+        initcap(transcription.type) || ' Transcription',
+        FALSE,
+        transcription.type
+    FROM documents_transcription transcription
+    INNER JOIN documents_element element ON (element.id = transcription.element_id)
+    WHERE transcription.zone_id IS NOT NULL
+    GROUP BY element.corpus_id, transcription.type;
+    """,
+    # Create new elements
+    """
+    INSERT INTO documents_element (id, corpus_id, type_id, name, zone_id, source_id, worker_version_id, created, updated)
+    SELECT
+        transcription.id,
+        element.corpus_id,
+        type.id,
+        (ROW_NUMBER() OVER (
+            PARTITION BY
+                transcription.element_id,
+                transcription.source_id,
+                transcription.worker_version_id,
+                transcription.type
+            ORDER BY
+                ST_Y(ST_StartPoint(polygon)),
+                ST_X(ST_StartPoint(polygon))
+        ))::varchar,
+        transcription.zone_id,
+        transcription.source_id,
+        transcription.worker_version_id,
+        NOW(),
+        NOW()
+    FROM
+        documents_transcription transcription
+        INNER JOIN documents_element element on (transcription.element_id = element.id)
+        INNER JOIN documents_elementtype type ON (type.corpus_id = element.corpus_id AND type.slug = 'transcription_' || transcription.type)
+        INNER JOIN images_zone zone ON (transcription.zone_id = zone.id);
+    """,
+    # Create element paths
+    # Append to existing parent paths of the parent element, or create one new element path with the parent element itself in it
+    """
+    INSERT INTO documents_elementpath (id, element_id, path, ordering)
+    SELECT
+        uuid_generate_v4(),
+        transcription.id,
+        COALESCE(path.path, ARRAY[]::uuid[]) || transcription.element_id,
+        ROW_NUMBER() OVER (
+            PARTITION BY transcription.element_id
+            ORDER BY
+                ST_Y(ST_StartPoint(polygon)),
+                ST_X(ST_StartPoint(polygon))
+        )
+    FROM
+        documents_transcription transcription
+        INNER JOIN images_zone zone ON (zone.id = transcription.zone_id)
+        LEFT JOIN documents_elementpath path ON (path.element_id = transcription.element_id);
+    """,
+    # Move transcriptions to their new elements
+    """
+    UPDATE documents_transcription
+    SET element_id = id
+    WHERE zone_id IS NOT NULL;
+    """,
+    # At this point, we can drop the zone column, but this would fail due to 'pending trigger events'
+    # Postgres does not allow editing the schema *after* editing the data in the same transcription;
+    # This migration is continued in documents.0021 to allow a new database transaction to happen.
+]
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0020_remove_source_xor_version_constraint'),
+        ('images', '0005_polygon_index')
+    ]
+
+    operations = [
+        migrations.AddConstraint(
+            model_name='transcription',
+            constraint=models.CheckConstraint(
+                check=~models.Q(source_id__isnull=False, worker_version_id__isnull=False),
+                name='transcription_source_not_worker_version',
+            )
+        ),
+        migrations.RunPython(
+            preflight_checks,
+            reverse_code=migrations.RunPython.noop,
+            elidable=True,
+        ),
+        migrations.RunSQL(
+            FORWARD_SQL,
+            reverse_sql=migrations.RunSQL.noop,
+            elidable=True,
+        ),
+    ]
--- a/arkindex/documents/migrations/0022_remove_transcription_zone.py
+++ b/arkindex/documents/migrations/0022_remove_transcription_zone.py
+# Generated by Django 3.1 on 2020-09-01 07:48
+
+from django.db import migrations
+
+FORWARD_SQL = [
+    # Use a temporary table here to iterate over transcriptions just once before deleting,
+    # causing this migration to only take a few minutes
+    # Note the strange join conditions as either source_id or worker_version_id are NULL,
+    # which causes a NATURAL JOIN or a JOIN … USING to fail since comparing NULLs returns NULL.
+    """
+    CREATE TEMPORARY TABLE duplicate_ids AS
+    WITH filters AS (
+        SELECT
+            sub.*,
+            FIRST_VALUE(id) OVER (
+                PARTITION BY
+                    transcription.element_id,
+                    transcription.source_id,
+                    transcription.worker_version_id
+            ) AS keep_id
+        FROM documents_transcription transcription
+        INNER JOIN (
+            SELECT element_id, source_id, worker_version_id
+            FROM documents_transcription
+            GROUP BY element_id, source_id, worker_version_id
+            HAVING COUNT(*) > 1
+        ) sub ON (
+            sub.element_id = transcription.element_id AND (
+                sub.source_id = transcription.source_id
+                OR sub.worker_version_id = transcription.worker_version_id
+            )
+        )
+    )
+    SELECT id
+    FROM documents_transcription transcription
+    INNER JOIN filters ON (
+        filters.element_id = transcription.element_id AND (
+            filters.source_id = transcription.source_id
+            OR filters.worker_version_id = transcription.worker_version_id
+        )
+    )
+    WHERE keep_id != id;
+    """,
+    # Remove any TranscriptionEntity that could be linked to the duplicate transcriptions
+    """
+    DELETE FROM documents_transcriptionentity transcriptionentity
+    USING duplicate_ids
+    WHERE transcriptionentity.transcription_id = duplicate_ids.id;
+    """,
+    # Remove duplicate transcriptions
+    """
+    DELETE FROM documents_transcription transcription
+    USING duplicate_ids
+    WHERE transcription.id = duplicate_ids.id;
+    """,
+    'DROP TABLE duplicate_ids;',
+]
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0021_move_transcriptions'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='transcription',
+            name='zone',
+        ),
+        # Remove the few remaining transcriptions that would break the unique constraints we will add in documents.0023.
+        # Those are transcriptions from the same source, on the same element, with the exact same zones.
+        # This query is rather complex as we want to only remove duplicates, and window functions have their limits,
+        # but the GROUP BY…HAVING will quickly exclude most of the table so it isn't slow.
+        migrations.RunSQL(
+            FORWARD_SQL,
+            reverse_sql=migrations.RunSQL.noop,
+            elidable=True,
+        ),
+    ]
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -440,12 +440,6 @@ class Transcription(models.Model):
        max_length=50,
        db_index=True,
    )
-    zone = models.ForeignKey(
-        'images.Zone',
-        on_delete=models.PROTECT,
-        related_name='transcriptions',
-        null=True,
-    )
    source = models.ForeignKey(
        DataSource,
        on_delete=models.CASCADE,
@@ -468,17 +462,18 @@ class Transcription(models.Model):
        related_name='transcriptions',
    )

-    class Meta:
-        # The following index was attempted with md5(text) in a manual migration
-        # but it causes too many performance issues.
-        # unique_together = (
-        #     ('element', 'zone', 'text')
-        # )
-        pass
-
    def __str__(self):
        return 'Transcription: {}'.format(self.text[:20])

+    class Meta:
+        constraints = [
+            # Require either a source, a worker version, or none (manual), but not both at once
+            models.CheckConstraint(
+                check=~Q(source_id__isnull=False, worker_version_id__isnull=False),
+                name='transcription_source_not_worker_version',
+            )
+        ]
+

 class TranscriptionEntity(models.Model):
    """

--- a/arkindex/documents/search.py
+++ b/arkindex/documents/search.py
-from arkindex.documents.models import Transcription, Element, Entity
 from itertools import chain
+from django.db.models import prefetch_related_objects
+from arkindex.documents.models import Transcription, Element, Entity
 import uuid


@@ -12,7 +13,7 @@ def search_transcriptions_post(data):
    ts = Transcription.objects \
                      .filter(id__in=transcription_ids) \
                      .order_by('-score') \
-                      .prefetch_related('zone__image__server', 'element', 'source')
+                      .prefetch_related('element__zone__image__server', 'source')
    element_ids = list(ts.values_list('element_id', flat=True))
    all_parent_paths = Element.objects.get_ascendings_paths(*element_ids)
    for trans in ts:
@@ -62,7 +63,7 @@ def search_elements_post(data):

    transcriptions = {
        t.id: t
-        for t in Transcription.objects.filter(id__in=tr_ids).prefetch_related('zone__image__server', 'source')
+        for t in Transcription.objects.filter(id__in=tr_ids).prefetch_related('source')
    }

    elts_tr_ids = {
@@ -78,11 +79,15 @@ def search_elements_post(data):
        for result in data
    }

-    elts = list(Element.objects.filter(id__in=elt_ids).prefetch_related('corpus', 'type'))
+    elts = list(Element.objects.filter(id__in=elt_ids).prefetch_related('corpus', 'type', 'zone__image__server'))
    # Preserve the ordering given by ElasticSearch
    ordered_elts = list(filter(None, map(lambda eid: next((e for e in elts if e.id == eid), None), elt_ids)))

    all_paths = Element.objects.get_ascendings_paths(*(e.id for e in ordered_elts))
+    prefetch_related_objects(
+        [element for paths in all_paths.values() for path in paths for element in path],
+        'type',
+    )

    for elt in ordered_elts:
        elt.transcriptions_results = list(filter(None, [transcriptions.get(tid) for tid in elts_tr_ids[elt.id]]))

--- a/arkindex/documents/serializers/iiif/annotations.py
+++ b/arkindex/documents/serializers/iiif/annotations.py
 from abc import ABC, abstractmethod
 from django.conf import settings
+from django.db.models import Q
 from rest_framework import serializers
 from arkindex.documents.models import Element, Transcription
 from arkindex.project.tools import build_absolute_url, bounding_box
@@ -77,7 +78,7 @@ class TranscriptionSearchAnnotationSerializer(TranscriptionAnnotationSerializer)
    def get_target(self, ts):
        assert isinstance(ts, Transcription)
        url = build_absolute_url(ts.element, self.context['request'], 'api:iiif-canvas')
-        x, y, w, h = bounding_box(ts.zone.polygon)
+        x, y, w, h = bounding_box(ts.element.zone.polygon)
        return f'{url}#xywh={x},{y},{w},{h}'


@@ -103,8 +104,11 @@ class AnnotationListSerializer(serializers.BaseSerializer):
        }

    def get_elements(self, element):
-        return element.transcriptions.all()
        "Get a list of elements to serialize as annotations."
+        return Transcription.objects.filter(
+            Q(element=element)
+            | Q(element__in=Element.objects.get_descending(element.id))
+        )


 class ElementAnnotationListSerializer(AnnotationListSerializer):

--- a/arkindex/documents/serializers/ml.py
+++ b/arkindex/documents/serializers/ml.py
@@ -10,7 +10,6 @@ from arkindex.documents.models import (
    Corpus, Element, ElementType, Transcription, DataSource, MLClass, Classification, ClassificationState
 )
 from arkindex.project.serializer_fields import EnumField, LinearRingField
-from arkindex.images.serializers import ZoneSerializer
 from arkindex.documents.serializers.light import ElementZoneSerializer
 import uuid

@@ -232,18 +231,16 @@ class TranscriptionSerializer(serializers.ModelSerializer):
    Serialises a Transcription
    """
    type = EnumField(TranscriptionType, read_only=True)
-    zone = ZoneSerializer(read_only=True)
    source = DataSourceSerializer(read_only=True)

    class Meta:
        model = Transcription
-        read_only_fields = ('id', 'type', 'score', 'zone', 'source')
+        read_only_fields = ('id', 'type', 'score', 'source')
        fields = (
            'id',
            'type',
            'text',
            'score',
-            'zone',
            'source',
            'worker_version_id',
        )
@@ -428,6 +425,7 @@ class ClassificationsSerializer(serializers.Serializer):
        ml_classes = dict(
            MLClass
            .objects
+            .using('default')
            .filter(corpus_id=parent.corpus_id, name__in=ml_class_names)
            .values_list('name', 'id')
        )
No results found