diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py index 2a00a591715a6d7eee001b4d9afac889d73afbb7..7a42a6b9f39ad17c4c081f95a2276f2049144295 100644 --- a/arkindex/documents/admin.py +++ b/arkindex/documents/admin.py @@ -74,9 +74,9 @@ class ElementAdmin(admin.ModelAdmin): class TranscriptionAdmin(admin.ModelAdmin): list_display = ('id', 'text', 'score', 'element', ) list_filter = [('type', EnumFieldListFilter), 'source'] - fields = ('id', 'text', 'score', 'element', 'zone', 'source', ) + fields = ('id', 'text', 'score', 'element', 'source', ) readonly_fields = ('id', ) - raw_id_fields = ('element', 'zone', ) + raw_id_fields = ('element', ) class MLClassAdmin(admin.ModelAdmin): diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 547b88756653ee5f2ab4988de5bf2bb6dcdca45f..f1eb993b517acb4c5e8195e2ed5a30abe5f65f9f 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -3,7 +3,8 @@ from datetime import datetime, timezone from psycopg2.extras import execute_values from django.conf import settings from django.db import transaction, connection -from django.db.models import Q, Prefetch, Max, QuerySet +from django.db.models import Q, Prefetch, Max, QuerySet, CharField +from django.db.models.functions import Cast from django.shortcuts import get_object_or_404 from django.utils.functional import cached_property from rest_framework.exceptions import ValidationError, NotFound @@ -790,13 +791,11 @@ class ElementTranscriptions(ListAPIView): )) self.check_object_permissions(self.request, element) + # ORDER BY casting IDs as char to avoid the PostgreSQL optimizer's inefficient scan queryset = Transcription.objects \ - .prefetch_related('zone__image__server', 'source') \ - .extra( - # ORDER BY casting IDs as char to avoid PostgreSQL optimizer inefficient scan - select={'char_id': 'CAST(id AS CHAR(36))'}, - order_by=['char_id'] - ) + .prefetch_related('element__zone__image__server', 'source') \ + .annotate(char_id=Cast('id', output_field=CharField())) \ + .order_by('char_id') if self.is_recursive: queryset = queryset.filter( diff --git a/arkindex/documents/api/entities.py b/arkindex/documents/api/entities.py index cc69925745f4afda656150789830e793b38a9acb..8a5f54a13563fd210fe352683508988ccb2a7b67 100644 --- a/arkindex/documents/api/entities.py +++ b/arkindex/documents/api/entities.py @@ -109,14 +109,14 @@ class EntityElements(ListAPIView): corpus__in=Corpus.objects.readable(self.request.user), metadatas__entity_id=pk ) \ - .select_related('type') \ - .prefetch_related('metadatas__entity', 'metadatas__revision', 'corpus') + .select_related('type', 'corpus') \ + .prefetch_related('metadatas__entity', 'metadatas__revision', 'zone__image__server') transcription_elements = Element.objects \ .filter( corpus__in=Corpus.objects.readable(self.request.user), transcriptions__transcription_entities__entity_id=pk - ).select_related('type') \ - .prefetch_related('metadatas__entity', 'metadatas__revision', 'corpus') + ).select_related('type', 'corpus') \ + .prefetch_related('metadatas__entity', 'metadatas__revision', 'zone__image__server') return metadata_elements.union(transcription_elements) \ .order_by('name', 'type') diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index 885372f661949bacee4b5235f7c858acd7c3f7c8..544ec623a5dd1adf101280b95734e3e06032f180 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -278,7 +278,6 @@ class ElementTranscriptionsBulk(CreateAPIView): transcriptions.append(Transcription( element=annotation['element'], type=tr_type, - zone=None, worker_version=worker_version, text=annotation['text'], score=annotation['score'] diff --git a/arkindex/documents/consumers.py b/arkindex/documents/consumers.py index 5632e2411d11b3e59dc1a19fadda45782117662c..d41888420276a0d66417b25707f324f7992bc492 100644 --- a/arkindex/documents/consumers.py +++ b/arkindex/documents/consumers.py @@ -75,29 +75,28 @@ class ReindexConsumer(SyncConsumer): elif element_id or corpus_id: if element_id: # Pick this element, and all its children - elements_queryset = list(Element.objects.get_descending(element_id)) - elements_queryset.append(Element.objects.get(id=element_id)) + elements_queryset = Element.objects.filter(Q(id=element_id) | Q(paths__path__contains=[element_id])) else: # Pick all elements in the corpus elements_queryset = Element.objects.filter(corpus_id=corpus_id) - transcriptions_queryset = Transcription.objects.filter( - element__in=elements_queryset, - zone__isnull=False - ) + transcriptions_queryset = Transcription.objects.filter(element__in=elements_queryset) entities_queryset = Entity.objects.filter( Q(metadatas__element__in=elements_queryset) | Q(transcriptions__element__in=elements_queryset) ) else: - transcriptions_queryset = Transcription.objects.filter(zone__isnull=False) + transcriptions_queryset = Transcription.objects.all() elements_queryset = Element.objects.all() entities_queryset = Entity.objects.all() if transcriptions: - indexer.run_index(transcriptions_queryset, bulk_size=400) + indexer.run_index(transcriptions_queryset.select_related('element'), bulk_size=400) if elements: - indexer.run_index(elements_queryset, bulk_size=100) + indexer.run_index( + elements_queryset.select_related('type').prefetch_related('metadatas', 'transcriptions'), + bulk_size=100, + ) if entities: indexer.run_index(entities_queryset, bulk_size=400) diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json index 0cdc384ccf43c0737888e344c1ac841e55aba6ae..43e9debed534a5920a0c161adca0b0c8e538e6e2 100644 --- a/arkindex/documents/fixtures/data.json +++ b/arkindex/documents/fixtures/data.json @@ -1,148 +1,159 @@ [ { "model": "dataimport.repository", - "pk": "d0cd8a85-8f30-43e1-ba19-7bd0dacb2041", + "pk": "1e022c37-4a8e-4624-8fbe-6c8a4d5b22a2", "fields": { "url": "http://gitlab/repo", "type": "iiif", "hook_token": "hook-token", - "credentials": "d21e40e2-32d2-4903-ac4a-6672dc6c14ba", + "credentials": "b07d0cb3-65b7-45c4-9bf2-d7153f4d27cd", "provider_name": "GitLabProvider" } }, { "model": "dataimport.repository", - "pk": "f020dbad-f6ba-4d95-9bba-35dc80272943", + "pk": "b6e463c3-0631-425f-8702-da7f32166b21", "fields": { "url": "http://my_repo.fake/workers/worker", "type": "worker", "hook_token": "worker-hook-token", - "credentials": "d21e40e2-32d2-4903-ac4a-6672dc6c14ba", + "credentials": "b07d0cb3-65b7-45c4-9bf2-d7153f4d27cd", "provider_name": "GitLabProvider" } }, { "model": "dataimport.revision", - "pk": "3ddfb063-eff0-4a9a-8b0f-000c0eca0f7a", + "pk": "bc9f203f-e870-4f79-96ce-4c0f3211691e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "d0cd8a85-8f30-43e1-ba19-7bd0dacb2041", - "hash": "42", - "message": "a", - "author": "me" + "repo": "b6e463c3-0631-425f-8702-da7f32166b21", + "hash": "1337", + "message": "My w0rk3r", + "author": "Test user" } }, { "model": "dataimport.revision", - "pk": "796847cd-9463-4496-a102-c1a5f825e50d", + "pk": "f353f467-9fb4-4f78-b962-03a8cd10cd85", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "f020dbad-f6ba-4d95-9bba-35dc80272943", - "hash": "1337", - "message": "My w0rk3r", - "author": "Test user" + "repo": "1e022c37-4a8e-4624-8fbe-6c8a4d5b22a2", + "hash": "42", + "message": "a", + "author": "me" } }, { "model": "dataimport.worker", - "pk": "29a773f3-d140-48e3-b1e4-29325f9dbc6f", + "pk": "72cd3c6b-be61-439e-ac00-da0440a6dfab", "fields": { "name": "Recognizer", "slug": "reco", "type": "recognizer", - "repository": "f020dbad-f6ba-4d95-9bba-35dc80272943" + "repository": "b6e463c3-0631-425f-8702-da7f32166b21" } }, { "model": "dataimport.worker", - "pk": "cf4243ca-5f42-4dbd-a2fe-14a8e75a33d8", + "pk": "dd3e3126-f07d-47d5-80cc-f9439aca0ad0", "fields": { "name": "Document layout analyser", "slug": "dla", "type": "dla", - "repository": "f020dbad-f6ba-4d95-9bba-35dc80272943" + "repository": "b6e463c3-0631-425f-8702-da7f32166b21" } }, { "model": "dataimport.workerversion", - "pk": "8d2ca1e7-f4f9-4403-8569-20e9712a897a", + "pk": "060456c0-c109-4f05-b78d-4dbe13995d05", "fields": { - "worker": "29a773f3-d140-48e3-b1e4-29325f9dbc6f", - "revision": "796847cd-9463-4496-a102-c1a5f825e50d", + "worker": "72cd3c6b-be61-439e-ac00-da0440a6dfab", + "revision": "bc9f203f-e870-4f79-96ce-4c0f3211691e", "configuration": { "test": 42 }, "state": "available", - "docker_image": "71a4b60b-a2f2-4503-8f5d-9d3c38f66626", + "docker_image": "2f4c9981-6ee0-43a1-80f0-54cca23be3ff", "docker_image_iid": null } }, { "model": "dataimport.workerversion", - "pk": "afa4a177-b8d2-4bcf-aaa7-b00196807e1d", + "pk": "67be8557-e683-43c5-b9d7-1b57b4516e09", "fields": { - "worker": "cf4243ca-5f42-4dbd-a2fe-14a8e75a33d8", - "revision": "796847cd-9463-4496-a102-c1a5f825e50d", + "worker": "dd3e3126-f07d-47d5-80cc-f9439aca0ad0", + "revision": "bc9f203f-e870-4f79-96ce-4c0f3211691e", "configuration": { "test": 42 }, "state": "available", - "docker_image": "71a4b60b-a2f2-4503-8f5d-9d3c38f66626", + "docker_image": "2f4c9981-6ee0-43a1-80f0-54cca23be3ff", "docker_image_iid": null } }, { "model": "documents.corpus", - "pk": "1f7e82ec-4094-4397-b729-ec424ea7a37f", + "pk": "78d45b37-36d1-4576-8bb2-8e5844f31feb", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "name": "Unit Tests", "description": "", - "repository": "d0cd8a85-8f30-43e1-ba19-7bd0dacb2041", + "repository": "1e022c37-4a8e-4624-8fbe-6c8a4d5b22a2", "public": true } }, { "model": "documents.elementtype", - "pk": "33d93a96-58a9-4a7c-be54-12d4691f4e81", + "pk": "1e98cd23-9b0b-44ed-84b5-3a9e4dec68b3", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "slug": "text_line", - "display_name": "Line", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "slug": "volume", + "display_name": "Volume", + "folder": true, + "allowed_transcription": null + } +}, +{ + "model": "documents.elementtype", + "pk": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "fields": { + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "slug": "word", + "display_name": "Word", "folder": false, - "allowed_transcription": "line" + "allowed_transcription": "word" } }, { "model": "documents.elementtype", - "pk": "3bd6b808-4ced-41f5-8ae0-bf00804fcbaa", + "pk": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "slug": "volume", - "display_name": "Volume", - "folder": true, + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "slug": "act", + "display_name": "Act", + "folder": false, "allowed_transcription": null } }, { "model": "documents.elementtype", - "pk": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", + "pk": "77739a6b-f044-4a5e-9c19-bc881caafbbf", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "slug": "surface", - "display_name": "Surface", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "slug": "text_line", + "display_name": "Line", "folder": false, - "allowed_transcription": null + "allowed_transcription": "line" } }, { "model": "documents.elementtype", - "pk": "aad1941d-d7dd-485f-8cf3-90487ac522c8", + "pk": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", "slug": "page", "display_name": "Page", "folder": false, @@ -151,284 +162,435 @@ }, { "model": "documents.elementtype", - "pk": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", + "pk": "f56fb6dd-afe4-49b3-a870-963467404935", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "slug": "act", - "display_name": "Act", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "slug": "surface", + "display_name": "Surface", "folder": false, "allowed_transcription": null } }, { "model": "documents.elementpath", - "pk": "0355c4b6-bb16-474e-9b7b-bb86077339f3", + "pk": "06d9848e-c2cd-4f1b-9d07-00ed04f69b9e", "fields": { - "element": "f3981e96-ed95-43a0-b1ea-fd288c55f5b3", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"ccc2cfdb-2de5-4e42-8a1a-fc5e1041b21d\"]", - "ordering": 0 + "element": "bbd63fcc-23af-4593-bcb5-341c3008b7ab", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"35fb048c-f441-4e9d-bb69-78a8dfdb4598\"]", + "ordering": 2 + } +}, +{ + "model": "documents.elementpath", + "pk": "07d8a649-9b5c-4338-9e42-20a3fc9c884e", + "fields": { + "element": "35fb048c-f441-4e9d-bb69-78a8dfdb4598", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "0406c678-c1b1-41b2-a4bd-d85e83ccd230", + "pk": "0abd85c5-7d97-49ca-9b60-b67e7e7d215e", "fields": { - "element": "ccc2cfdb-2de5-4e42-8a1a-fc5e1041b21d", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "7945d738-4a6e-4a76-ad57-21c03d63625d", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "0f73c6dc-07b3-494a-b2fd-692c9a19ce5a", + "pk": "1aefc8c1-1dba-4f8e-b2eb-b7b7c5bc0b01", "fields": { - "element": "9621c876-e7da-4d43-a5a7-82470dead663", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "7e788454-afed-4bef-a370-bccf0dd3b2b4", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "208dd6c4-5510-4ceb-b8b4-fef27680df72", + "pk": "1f1a82a0-7835-465b-ae6d-d6db65e92696", "fields": { - "element": "cb69d995-2ce2-4404-a567-5598cf9dd231", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "ebdb0c60-e1e4-4f8a-a8b7-4127896ac194", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "269ebcc7-7b11-43c9-a6cd-94d1ee99b03a", + "pk": "2f8c2139-9b01-415c-8a80-b708a7fd841d", "fields": { - "element": "dbfa2c38-8664-447a-bb09-215cc86c6ab4", - "path": "[\"209e1e3b-5808-43bf-99b7-801959d6c807\"]", - "ordering": 2 + "element": "326843be-0112-4954-a2ea-da78aded01a9", + "path": "[\"07422db5-10c5-404b-9325-db2689871761\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "287575b8-9b19-49bd-adab-a241385191f4", + "pk": "4beb2166-9bc1-462f-bbbb-34cb43ceeb8a", "fields": { - "element": "f5535f84-2ca7-4853-9ab6-44606a8dc462", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"9621c876-e7da-4d43-a5a7-82470dead663\"]", + "element": "b9f39256-6ff0-4daf-b56a-a5a316964133", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"35fb048c-f441-4e9d-bb69-78a8dfdb4598\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "36cc34f3-8d0b-43cb-9040-7169471a03d4", + "pk": "4e98abb3-e74c-4532-a831-11b41c00f2d5", + "fields": { + "element": "96893993-5b8d-4ed0-b305-08f745d8df01", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"35fb048c-f441-4e9d-bb69-78a8dfdb4598\"]", + "ordering": 1 + } +}, +{ + "model": "documents.elementpath", + "pk": "4ed309a6-3af4-4fe4-975d-0a25ecd48bcf", + "fields": { + "element": "c4ccff42-93d3-4f66-a073-397dd85e1806", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", + "ordering": 4 + } +}, +{ + "model": "documents.elementpath", + "pk": "5980b951-7bcc-4213-a73b-e71659e8a8e1", "fields": { - "element": "564aa56f-b257-445c-9930-4b6110628c09", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "5130da8e-f2aa-477d-bf58-a2357efc8b2b", + "path": "[\"07422db5-10c5-404b-9325-db2689871761\"]", "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "38df3e8b-d824-4862-b7b5-580065b2011b", + "pk": "7680a018-4523-4abc-8242-132958dd7f5a", + "fields": { + "element": "fcf303df-36a5-40f2-997f-82eb47541bcf", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"99de88f1-9655-4bae-9203-01d56d097ef7\"]", + "ordering": 1 + } +}, +{ + "model": "documents.elementpath", + "pk": "7de05621-65f2-4dcb-8f44-9a88fdfb3a22", "fields": { - "element": "c0ec4456-f6a2-4253-a876-6eba4f96f863", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "cbafeb68-d66a-4e1c-bcd4-47dd88b02eec", + "path": "[\"07422db5-10c5-404b-9325-db2689871761\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "3a6d0696-1ca2-4f83-a6dc-25a859482ebc", + "pk": "a3e6f951-64da-474b-bd61-d323ad71d5c9", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", + "element": "9f81a475-ed1f-470d-b11a-9740d6bb24d2", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"99de88f1-9655-4bae-9203-01d56d097ef7\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "582355b4-4352-4277-b6dd-62c6a8dfbf88", + "pk": "a7ad8b75-e9fd-44f9-acec-a283b90d9585", "fields": { - "element": "d7b6b144-88cb-4c47-a380-a84339773dda", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"cb69d995-2ce2-4404-a567-5598cf9dd231\"]", + "element": "73b56c40-4136-499b-9c10-82c2f569c78b", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"99de88f1-9655-4bae-9203-01d56d097ef7\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "7abb9de4-a62f-49e4-979f-8975fe2e1111", + "pk": "a9aec95c-fa0f-4b0d-b3ab-7e061481b7bb", "fields": { - "element": "02d0fa30-2a2d-4972-a069-626c629357a4", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"d19ded5f-c50c-4cc6-a0cf-4463e2f2df68\"]", + "element": "1fcb137d-12aa-47ea-8d94-fffd821cc3da", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"b675ba10-f6ed-45cb-8a8c-449940471db5\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "a30e4bb0-b337-4718-a46b-53fc390393d0", + "pk": "aa5478d1-be7c-48c2-90cf-ae0294981ad9", "fields": { - "element": "7c022411-eb2c-4f06-9387-5d8418a0bf33", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", - "ordering": 3 + "element": "4d5191a9-5161-4587-a6bf-6430786f869a", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"99de88f1-9655-4bae-9203-01d56d097ef7\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "a47f8cec-6c57-4d9d-a7f8-0ab9ab56ab44", + "pk": "ae105ab9-b082-44e3-8dd9-eb14c65f83a1", "fields": { - "element": "b09ebead-3440-4040-8b82-5dc2bae4d2d9", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"8f92f79c-471a-4280-9530-66ec406ab8b2\"]", + "element": "cb1b2af4-9cd8-4ecb-99a3-a7f309198a7b", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"7e788454-afed-4bef-a370-bccf0dd3b2b4\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "b1b9f9f3-f543-4c57-aa49-a0aa716f1b71", + "pk": "b5b1d6ea-c2ee-4c91-8f90-be745e05559d", + "fields": { + "element": "9644857d-6c33-4241-ad43-38054fba11a0", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"ebdb0c60-e1e4-4f8a-a8b7-4127896ac194\"]", + "ordering": 1 + } +}, +{ + "model": "documents.elementpath", + "pk": "b5c3893e-2127-4a09-9325-8545962f325c", "fields": { - "element": "7e4d2f71-f52a-44a6-8cb5-f209a0b82408", - "path": "[\"209e1e3b-5808-43bf-99b7-801959d6c807\"]", + "element": "ce358ae1-5d50-476e-b3bb-2f5abb3a9ce9", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"ebdb0c60-e1e4-4f8a-a8b7-4127896ac194\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "bc93dbca-6436-4601-9ecd-0ab08972b309", + "pk": "c048eef3-0e7b-4f6a-836c-bd0cae40b509", "fields": { - "element": "51585cf0-35b6-4b13-a445-37bed0aae13b", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"7c022411-eb2c-4f06-9387-5d8418a0bf33\"]", + "element": "72f64b68-afc4-41e0-8020-e590bdaa9f7c", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"7945d738-4a6e-4a76-ad57-21c03d63625d\"]", + "ordering": 1 + } +}, +{ + "model": "documents.elementpath", + "pk": "d203709c-b528-4aa6-be17-56795c1d03fb", + "fields": { + "element": "5f20bff4-aa8b-4afa-ad96-e791c2433dc1", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"7945d738-4a6e-4a76-ad57-21c03d63625d\"]", + "ordering": 2 + } +}, +{ + "model": "documents.elementpath", + "pk": "dff4ed00-697a-4e42-aff3-5d09f8f7785e", + "fields": { + "element": "99de88f1-9655-4bae-9203-01d56d097ef7", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "d47680ce-4ab8-4eb4-8cbe-cf06373ed654", + "pk": "e456aee3-8cf4-45e3-83f4-b64e61733b9c", "fields": { - "element": "d19ded5f-c50c-4cc6-a0cf-4463e2f2df68", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\"]", - "ordering": 4 + "element": "3442efb5-4960-432e-b6f9-795a415186d8", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"c4ccff42-93d3-4f66-a073-397dd85e1806\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "f593ed74-c825-44fa-a7da-a0d9776f2e0b", + "pk": "f35b7715-2102-4827-9589-57c1b68405b9", "fields": { - "element": "56a2302d-e027-4724-85be-c4113ff2a243", - "path": "[\"3c4bce04-3ae8-4f17-86b0-46bc25408b8f\", \"cb69d995-2ce2-4404-a567-5598cf9dd231\"]", - "ordering": 1 + "element": "a493ea73-d17d-4f63-b6c9-1ebe04f36641", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"c53fc91f-58c7-4fda-a92c-72a5f4051176\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "fc3dfb9d-1763-4892-95a9-1c8adeb8f41c", + "pk": "f4affadd-0195-41da-80c5-d36e0a6b064c", "fields": { - "element": "4e9de67b-6e4f-46b7-936e-34ed9692900b", - "path": "[\"209e1e3b-5808-43bf-99b7-801959d6c807\"]", - "ordering": 1 + "element": "775da282-9bef-4dcd-b1fa-0a7ff555cb9d", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\", \"7945d738-4a6e-4a76-ad57-21c03d63625d\"]", + "ordering": 0 + } +}, +{ + "model": "documents.elementpath", + "pk": "f95a476b-a0b2-484d-ae3f-b720095c3f37", + "fields": { + "element": "b675ba10-f6ed-45cb-8a8c-449940471db5", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", + "ordering": 3 + } +}, +{ + "model": "documents.elementpath", + "pk": "fc030c8c-7dce-477f-913b-520a3ae6e545", + "fields": { + "element": "c53fc91f-58c7-4fda-a92c-72a5f4051176", + "path": "[\"e001d1ee-fa90-4f96-b722-e2ba20b84097\"]", + "ordering": 2 + } +}, +{ + "model": "documents.element", + "pk": "07422db5-10c5-404b-9325-db2689871761", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "1e98cd23-9b0b-44ed-84b5-3a9e4dec68b3", + "name": "Volume 2", + "zone": null, + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "1fcb137d-12aa-47ea-8d94-fffd821cc3da", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", + "name": "Surface E", + "zone": "440f85ff-25d9-4488-8654-98c5916df96f", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "326843be-0112-4954-a2ea-da78aded01a9", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", + "name": "Volume 2, page 1r", + "zone": "aefbd01b-3421-458f-a8e6-2b5ffdaccc31", + "source": null, + "worker_version": null } }, { "model": "documents.element", - "pk": "02d0fa30-2a2d-4972-a069-626c629357a4", + "pk": "3442efb5-4960-432e-b6f9-795a415186d8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", "name": "Surface F", - "zone": "d2e7ee12-14fa-46c8-a360-f5b31939052a", + "zone": "b70d7b27-c973-44ba-93c5-602957ae491e", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "209e1e3b-5808-43bf-99b7-801959d6c807", + "pk": "35fb048c-f441-4e9d-bb69-78a8dfdb4598", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "3bd6b808-4ced-41f5-8ae0-bf00804fcbaa", - "name": "Volume 2", - "zone": null, + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", + "name": "Volume 1, page 1v", + "zone": "0fb66d83-3d13-4d4e-82d1-1fd7fc82036f", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "3c4bce04-3ae8-4f17-86b0-46bc25408b8f", + "pk": "4d5191a9-5161-4587-a6bf-6430786f869a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "3bd6b808-4ced-41f5-8ae0-bf00804fcbaa", - "name": "Volume 1", - "zone": null, + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "DATUM", + "zone": "40edd01e-02e6-4fbf-b2c6-b7452a72c79e", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "4e9de67b-6e4f-46b7-936e-34ed9692900b", + "pk": "5130da8e-f2aa-477d-bf58-a2357efc8b2b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", - "name": "Volume 2, page 1v", - "zone": "e67eba1f-4993-492e-9ca0-67451ceb094c", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", + "name": "Volume 2, page 2r", + "zone": "11bb6bba-8a2e-4f9e-97b1-0f48bc34103f", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "51585cf0-35b6-4b13-a445-37bed0aae13b", + "pk": "5f20bff4-aa8b-4afa-ad96-e791c2433dc1", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", - "name": "Surface E", - "zone": "f3b62073-8fe4-46c9-b7bf-012c3014359b", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "DATUM", + "zone": "7e767702-f644-451f-91e8-05423bb8e074", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "564aa56f-b257-445c-9930-4b6110628c09", + "pk": "72f64b68-afc4-41e0-8020-e590bdaa9f7c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", - "name": "Volume 1, page 2r", - "zone": "20f0abb7-3aac-460b-b1af-b75ed7dab550", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "ROY", + "zone": "0039b5f5-078f-4667-825c-6e620b92b59a", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "56a2302d-e027-4724-85be-c4113ff2a243", + "pk": "73b56c40-4136-499b-9c10-82c2f569c78b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", - "name": "Surface C", - "zone": "dda78215-f30b-497f-aedb-fac49055b667", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "PARIS", + "zone": "72e1be99-f517-48c4-b45a-808dea668ec0", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "7c022411-eb2c-4f06-9387-5d8418a0bf33", + "pk": "775da282-9bef-4dcd-b1fa-0a7ff555cb9d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", - "name": "Act 4", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "PARIS", + "zone": "272f32c2-29eb-47e7-b244-b0a6c2a772c0", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "7945d738-4a6e-4a76-ad57-21c03d63625d", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", + "name": "Volume 1, page 2r", + "zone": "6a85fb05-f7d1-498d-b93c-62739562c3e3", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "7e788454-afed-4bef-a370-bccf0dd3b2b4", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", + "name": "Act 1", "zone": null, "source": null, "worker_version": null @@ -436,41 +598,83 @@ }, { "model": "documents.element", - "pk": "7e4d2f71-f52a-44a6-8cb5-f209a0b82408", + "pk": "9644857d-6c33-4241-ad43-38054fba11a0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", - "name": "Volume 2, page 1r", - "zone": "541c027b-3e0b-42f6-acf6-abcc49ff5f1e", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", + "name": "Surface C", + "zone": "0fb66d83-3d13-4d4e-82d1-1fd7fc82036f", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "8f92f79c-471a-4280-9530-66ec406ab8b2", + "pk": "96893993-5b8d-4ed0-b305-08f745d8df01", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "ROY", + "zone": "db138d8c-b5fc-46e6-8dc6-483a1ffc8bf6", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "99de88f1-9655-4bae-9203-01d56d097ef7", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", "name": "Volume 1, page 1r", - "zone": "5bd58b5d-8ed2-4e56-a466-b863101dfdc6", + "zone": "fb6dff0f-e117-48f0-b27a-9fa65f42a055", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "9621c876-e7da-4d43-a5a7-82470dead663", + "pk": "9f81a475-ed1f-470d-b11a-9740d6bb24d2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", - "name": "Act 1", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "77739a6b-f044-4a5e-9c19-bc881caafbbf", + "name": "Text line", + "zone": "f96e2027-4568-4527-a469-bcf27f4fcbb2", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "a493ea73-d17d-4f63-b6c9-1ebe04f36641", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", + "name": "Surface D", + "zone": "a236763a-6d34-4c41-a1e5-a5e19d115d50", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "b675ba10-f6ed-45cb-8a8c-449940471db5", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", + "name": "Act 4", "zone": null, "source": null, "worker_version": null @@ -478,41 +682,41 @@ }, { "model": "documents.element", - "pk": "b09ebead-3440-4040-8b82-5dc2bae4d2d9", + "pk": "b9f39256-6ff0-4daf-b56a-a5a316964133", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "33d93a96-58a9-4a7c-be54-12d4691f4e81", - "name": "Text line", - "zone": "59c89a4e-b961-4158-8107-f24463b40fca", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "PARIS", + "zone": "c12fb064-b0df-4d8a-af13-55b2e9862e8d", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "c0ec4456-f6a2-4253-a876-6eba4f96f863", + "pk": "bbd63fcc-23af-4593-bcb5-341c3008b7ab", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", - "name": "Volume 1, page 1v", - "zone": "dda78215-f30b-497f-aedb-fac49055b667", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "DATUM", + "zone": "bea07b51-a5bd-400a-b5d0-b0320dc77bd1", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "cb69d995-2ce2-4404-a567-5598cf9dd231", + "pk": "c4ccff42-93d3-4f66-a073-397dd85e1806", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", - "name": "Act 2", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", + "name": "Act 5", "zone": null, "source": null, "worker_version": null @@ -520,12 +724,12 @@ }, { "model": "documents.element", - "pk": "ccc2cfdb-2de5-4e42-8a1a-fc5e1041b21d", + "pk": "c53fc91f-58c7-4fda-a92c-72a5f4051176", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", "name": "Act 3", "zone": null, "source": null, @@ -534,77 +738,91 @@ }, { "model": "documents.element", - "pk": "d19ded5f-c50c-4cc6-a0cf-4463e2f2df68", + "pk": "cb1b2af4-9cd8-4ecb-99a3-a7f309198a7b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "bff45072-bc1b-408d-bd1f-b3ffd22d8796", - "name": "Act 5", - "zone": null, + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", + "name": "Surface A", + "zone": "9bae7b8e-b03b-40cf-a0d8-80393d76a304", + "source": null, + "worker_version": null + } +}, +{ + "model": "documents.element", + "pk": "cbafeb68-d66a-4e1c-bcd4-47dd88b02eec", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "8ccc9061-0bc7-4d74-8a83-d427273a8c56", + "name": "Volume 2, page 1v", + "zone": "34f21948-295f-48a6-9644-41af328fdce4", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "d7b6b144-88cb-4c47-a380-a84339773dda", + "pk": "ce358ae1-5d50-476e-b3bb-2f5abb3a9ce9", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "f56fb6dd-afe4-49b3-a870-963467404935", "name": "Surface B", - "zone": "b9cb9d9d-8de0-4fcf-8063-a5eb30634b8e", + "zone": "28c84829-8312-4f41-81e5-fa00fad02718", "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "dbfa2c38-8664-447a-bb09-215cc86c6ab4", + "pk": "e001d1ee-fa90-4f96-b722-e2ba20b84097", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "aad1941d-d7dd-485f-8cf3-90487ac522c8", - "name": "Volume 2, page 2r", - "zone": "543950e5-031d-41d1-83fd-7365c193c916", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "1e98cd23-9b0b-44ed-84b5-3a9e4dec68b3", + "name": "Volume 1", + "zone": null, "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "f3981e96-ed95-43a0-b1ea-fd288c55f5b3", + "pk": "ebdb0c60-e1e4-4f8a-a8b7-4127896ac194", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", - "name": "Surface D", - "zone": "3e6c6344-4dec-4609-b12d-14fd65c32664", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6bc2cbbd-b98e-4689-bf9d-8a9095710632", + "name": "Act 2", + "zone": null, "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "f5535f84-2ca7-4853-9ab6-44606a8dc462", + "pk": "fcf303df-36a5-40f2-997f-82eb47541bcf", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "a8c815f0-258f-4025-8f9b-f3bd6a87904c", - "name": "Surface A", - "zone": "53ad7c0c-efcd-4001-be5b-298d1e8498b6", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "6acab4ca-a475-485e-b9cf-fc8a8404bcd1", + "name": "ROY", + "zone": "f96e2027-4568-4527-a469-bcf27f4fcbb2", "source": null, "worker_version": null } }, { "model": "documents.datasource", - "pk": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "pk": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "fields": { "type": "recognizer", "slug": "test", @@ -615,7 +833,7 @@ }, { "model": "documents.datasource", - "pk": "c357f678-9e18-4179-a9ee-4f35fdf26cd8", + "pk": "def9eadf-7b67-4f82-be03-5175ae644424", "fields": { "type": "classifier", "slug": "test", @@ -626,25 +844,23 @@ }, { "model": "documents.transcription", - "pk": "2578a1cf-3e57-4a15-b4cb-a0008c13a563", + "pk": "05f120a9-1c8a-4344-bf09-ef258cc04eca", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", + "element": "775da282-9bef-4dcd-b1fa-0a7ff555cb9d", "type": "word", - "zone": "59c89a4e-b961-4158-8107-f24463b40fca", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "ROY", + "text": "PARIS", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "4fc2945e-e4be-44d6-a839-c3149cbc66e0", + "pk": "094d1e1b-d35d-4e86-a2fd-4fabd60ba5fa", "fields": { - "element": "564aa56f-b257-445c-9930-4b6110628c09", + "element": "fcf303df-36a5-40f2-997f-82eb47541bcf", "type": "word", - "zone": "a2723514-109f-4b13-8336-e0128c49aee4", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, "text": "ROY", "score": 1.0 @@ -652,64 +868,59 @@ }, { "model": "documents.transcription", - "pk": "51785975-0bf3-4bd1-8002-ae8c21dacbd4", + "pk": "0c7b5b2d-d1f6-494c-8ba4-169cdf255f5e", "fields": { - "element": "c0ec4456-f6a2-4253-a876-6eba4f96f863", + "element": "4d5191a9-5161-4587-a6bf-6430786f869a", "type": "word", - "zone": "2e3f07cc-d8cb-4735-bed0-63d4d109d893", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "PARIS", + "text": "DATUM", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "72409ca8-98ff-4055-98e8-5b5e8620e1fe", + "pk": "6c517add-09a0-41e4-bb03-0aea9ce1e628", "fields": { - "element": "c0ec4456-f6a2-4253-a876-6eba4f96f863", + "element": "73b56c40-4136-499b-9c10-82c2f569c78b", "type": "word", - "zone": "de6f5a26-52fd-4361-b30e-08b98b9d480a", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "DATUM", + "text": "PARIS", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "81e2a90b-d518-4280-b71b-7cdc24587472", + "pk": "802856d1-13a8-4d63-9af6-b0c8db086a89", "fields": { - "element": "564aa56f-b257-445c-9930-4b6110628c09", - "type": "word", - "zone": "658a1c75-a4de-4842-8a09-2a51c22c8269", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "element": "99de88f1-9655-4bae-9203-01d56d097ef7", + "type": "page", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "PARIS", + "text": "Lorem ipsum dolor sit amet", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "d4e38d93-48dc-4c46-881f-fe85ddfd01da", + "pk": "ab32afab-5d69-47d1-80f4-3ab1151462ef", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", + "element": "5f20bff4-aa8b-4afa-ad96-e791c2433dc1", "type": "word", - "zone": "2d6cbfa0-917e-4f3e-91db-3e050f236922", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "PARIS", + "text": "DATUM", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "d5297c59-e7b8-4c03-8b53-d84347ec726c", + "pk": "d5cc8936-0a81-4b01-83d8-1dd75b0d60c0", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", + "element": "bbd63fcc-23af-4593-bcb5-341c3008b7ab", "type": "word", - "zone": "1323086e-7117-4454-b6d2-d2e9b9b30251", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, "text": "DATUM", "score": 1.0 @@ -717,12 +928,11 @@ }, { "model": "documents.transcription", - "pk": "daef6593-28c3-4b81-a693-e1ec81f52718", + "pk": "d7789e57-9fae-4465-9ef3-2f552a3d1e86", "fields": { - "element": "c0ec4456-f6a2-4253-a876-6eba4f96f863", + "element": "72f64b68-afc4-41e0-8020-e590bdaa9f7c", "type": "word", - "zone": "4ae281d2-de97-42dc-ac66-d3edc93bf92e", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, "text": "ROY", "score": 1.0 @@ -730,65 +940,63 @@ }, { "model": "documents.transcription", - "pk": "e275ebac-db1d-48df-90d5-0513464f4ab0", + "pk": "dcd5a2be-5f89-4565-8acc-521c08672c65", "fields": { - "element": "564aa56f-b257-445c-9930-4b6110628c09", + "element": "96893993-5b8d-4ed0-b305-08f745d8df01", "type": "word", - "zone": "021f8b5e-4963-4638-a24f-fa99114ffd17", - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "DATUM", + "text": "ROY", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "ea21f8c6-1ecf-4be8-848b-af333c8b83a1", + "pk": "f97dc9b7-15d5-4b3c-ac3e-621604fd5522", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", - "type": "page", - "zone": null, - "source": "9ad06f06-000c-4d01-8038-1afc2e73c8e1", + "element": "b9f39256-6ff0-4daf-b56a-a5a316964133", + "type": "word", + "source": "1eaefd28-7475-47bb-aff5-b53a225e26ec", "worker_version": null, - "text": "Lorem ipsum dolor sit amet", + "text": "PARIS", "score": 1.0 } }, { "model": "documents.allowedmetadata", - "pk": "0f90ca4d-c698-4a14-89fc-32c153e6a5fd", + "pk": "2ae06c55-5609-4a22-9ae3-d3ec5fe78582", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", "type": "text", "name": "folio" } }, { "model": "documents.allowedmetadata", - "pk": "1ab75f14-57ca-4c05-a9fb-f10c082f4206", + "pk": "85f058a1-a3d3-43bc-8681-c11c5491e656", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "location", - "name": "location" + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "date", + "name": "date" } }, { "model": "documents.allowedmetadata", - "pk": "90cf5544-2b0a-465a-aae4-c7de6ade0592", + "pk": "d25ded48-d068-4fcd-a947-9a689ca271a8", "fields": { - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", - "type": "date", - "name": "date" + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", + "type": "location", + "name": "location" } }, { "model": "documents.metadata", - "pk": "0be7aa65-fa4d-4cea-bba2-86748478b595", + "pk": "0706361e-7c9a-4dc0-b058-ad365f015d01", "fields": { - "element": "564aa56f-b257-445c-9930-4b6110628c09", + "element": "99de88f1-9655-4bae-9203-01d56d097ef7", "name": "folio", "type": "text", - "value": "2r", + "value": "1r", "revision": null, "index": 0, "entity": null @@ -796,12 +1004,12 @@ }, { "model": "documents.metadata", - "pk": "12069432-7670-4378-a918-23daf27ea172", + "pk": "1000532c-4eb2-4d2b-91ac-1a8e7b743284", "fields": { - "element": "7c022411-eb2c-4f06-9387-5d8418a0bf33", - "name": "number", + "element": "35fb048c-f441-4e9d-bb69-78a8dfdb4598", + "name": "folio", "type": "text", - "value": "4", + "value": "1v", "revision": null, "index": 0, "entity": null @@ -809,12 +1017,12 @@ }, { "model": "documents.metadata", - "pk": "14e7fd76-4c9b-4e45-b01c-f6ac04b4d17c", + "pk": "31edcdb5-5b70-4848-b890-fb88afe1d3e0", "fields": { - "element": "cb69d995-2ce2-4404-a567-5598cf9dd231", + "element": "c4ccff42-93d3-4f66-a073-397dd85e1806", "name": "number", "type": "text", - "value": "2", + "value": "5", "revision": null, "index": 0, "entity": null @@ -822,12 +1030,12 @@ }, { "model": "documents.metadata", - "pk": "2f82730b-fc55-485c-869d-7aa2214aafef", + "pk": "6a916e68-e8f1-455e-b0ab-f5947355187c", "fields": { - "element": "c0ec4456-f6a2-4253-a876-6eba4f96f863", - "name": "folio", + "element": "b675ba10-f6ed-45cb-8a8c-449940471db5", + "name": "number", "type": "text", - "value": "1v", + "value": "4", "revision": null, "index": 0, "entity": null @@ -835,9 +1043,9 @@ }, { "model": "documents.metadata", - "pk": "63ff193e-fd67-4c2d-b52d-3c99a49f1131", + "pk": "74fd04ea-9352-41c9-bdbf-24e36f49a305", "fields": { - "element": "dbfa2c38-8664-447a-bb09-215cc86c6ab4", + "element": "5130da8e-f2aa-477d-bf58-a2357efc8b2b", "name": "folio", "type": "text", "value": "2r", @@ -848,12 +1056,12 @@ }, { "model": "documents.metadata", - "pk": "759c2edc-6e7b-4f96-9cd9-0daa38d9757a", + "pk": "99164dd7-bc9c-49c9-89f6-a8ffc0be7779", "fields": { - "element": "4e9de67b-6e4f-46b7-936e-34ed9692900b", - "name": "folio", + "element": "c53fc91f-58c7-4fda-a92c-72a5f4051176", + "name": "number", "type": "text", - "value": "1v", + "value": "3", "revision": null, "index": 0, "entity": null @@ -861,12 +1069,12 @@ }, { "model": "documents.metadata", - "pk": "79c508f3-4288-4a1b-8f1f-19af1a057e3e", + "pk": "9c5602e4-1fb8-444f-b23d-6f9175eab2c9", "fields": { - "element": "d19ded5f-c50c-4cc6-a0cf-4463e2f2df68", - "name": "number", + "element": "cbafeb68-d66a-4e1c-bcd4-47dd88b02eec", + "name": "folio", "type": "text", - "value": "5", + "value": "1v", "revision": null, "index": 0, "entity": null @@ -874,12 +1082,12 @@ }, { "model": "documents.metadata", - "pk": "7f3688fb-3a16-4cba-b218-47a372eb5480", + "pk": "afce1c14-2ee8-46ed-9c78-4360562e91c2", "fields": { - "element": "9621c876-e7da-4d43-a5a7-82470dead663", + "element": "ebdb0c60-e1e4-4f8a-a8b7-4127896ac194", "name": "number", "type": "text", - "value": "1", + "value": "2", "revision": null, "index": 0, "entity": null @@ -887,12 +1095,12 @@ }, { "model": "documents.metadata", - "pk": "d86e7d8d-2dc8-4ac7-ac5e-d8aff8df4548", + "pk": "cef63c02-d1b9-459b-88b2-771f3f9b7c87", "fields": { - "element": "7e4d2f71-f52a-44a6-8cb5-f209a0b82408", - "name": "folio", + "element": "7e788454-afed-4bef-a370-bccf0dd3b2b4", + "name": "number", "type": "text", - "value": "1r", + "value": "1", "revision": null, "index": 0, "entity": null @@ -900,12 +1108,12 @@ }, { "model": "documents.metadata", - "pk": "d9c74f49-670d-4b37-a21e-60ca50059cda", + "pk": "e1850c59-5a05-438a-a49d-e8a99710671e", "fields": { - "element": "ccc2cfdb-2de5-4e42-8a1a-fc5e1041b21d", - "name": "number", + "element": "326843be-0112-4954-a2ea-da78aded01a9", + "name": "folio", "type": "text", - "value": "3", + "value": "1r", "revision": null, "index": 0, "entity": null @@ -913,12 +1121,12 @@ }, { "model": "documents.metadata", - "pk": "f4103a83-94eb-4634-8cd3-32db7e94ac1e", + "pk": "edeaf010-9326-4a0f-9e31-f403b8363be6", "fields": { - "element": "8f92f79c-471a-4280-9530-66ec406ab8b2", + "element": "7945d738-4a6e-4a76-ad57-21c03d63625d", "name": "folio", "type": "text", - "value": "1r", + "value": "2r", "revision": null, "index": 0, "entity": null @@ -942,12 +1150,12 @@ }, { "model": "images.image", - "pk": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", + "pk": "5f4f5d85-42d7-48d0-bae3-6be0dfa1762f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img3", + "path": "img6", "width": 1000, "height": 1000, "hash": null, @@ -956,7 +1164,7 @@ }, { "model": "images.image", - "pk": "7ce02a7c-877a-46bc-8557-cd87b6945d81", + "pk": "5f870efa-b012-4344-92ac-e5226c4cd26b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -970,7 +1178,7 @@ }, { "model": "images.image", - "pk": "a415642f-af03-422d-acf2-a9fce1cf6ef1", + "pk": "68474756-161a-4199-96e1-e79dc216a8a0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -984,12 +1192,12 @@ }, { "model": "images.image", - "pk": "c1f75796-3a1c-4e85-b8fa-43472a719312", + "pk": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img5", + "path": "img3", "width": 1000, "height": 1000, "hash": null, @@ -998,7 +1206,7 @@ }, { "model": "images.image", - "pk": "ddbb369c-a6ed-488c-9108-e996ade67b29", + "pk": "8f7f17ff-9555-488f-acac-9cb6814e46a8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -1012,12 +1220,12 @@ }, { "model": "images.image", - "pk": "e6df804d-82f7-420e-a562-fb3d265a2bec", + "pk": "996b53e8-4b88-473f-ad2e-11ea4848c14e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img6", + "path": "img5", "width": 1000, "height": 1000, "hash": null, @@ -1026,209 +1234,209 @@ }, { "model": "images.zone", - "pk": "021f8b5e-4963-4638-a24f-fa99114ffd17", + "pk": "0039b5f5-078f-4667-825c-6e620b92b59a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "1323086e-7117-4454-b6d2-d2e9b9b30251", + "pk": "0fb66d83-3d13-4d4e-82d1-1fd7fc82036f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "68474756-161a-4199-96e1-e79dc216a8a0", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "20f0abb7-3aac-460b-b1af-b75ed7dab550", + "pk": "11bb6bba-8a2e-4f9e-97b1-0f48bc34103f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", + "image": "5f4f5d85-42d7-48d0-bae3-6be0dfa1762f", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "2d6cbfa0-917e-4f3e-91db-3e050f236922", + "pk": "272f32c2-29eb-47e7-b244-b0a6c2a772c0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "images.zone", - "pk": "2e3f07cc-d8cb-4735-bed0-63d4d109d893", + "pk": "28c84829-8312-4f41-81e5-fa00fad02718", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "a415642f-af03-422d-acf2-a9fce1cf6ef1", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" } }, { "model": "images.zone", - "pk": "3e6c6344-4dec-4609-b12d-14fd65c32664", + "pk": "34f21948-295f-48a6-9644-41af328fdce4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)" + "image": "996b53e8-4b88-473f-ad2e-11ea4848c14e", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "4ae281d2-de97-42dc-ac66-d3edc93bf92e", + "pk": "40edd01e-02e6-4fbf-b2c6-b7452a72c79e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "a415642f-af03-422d-acf2-a9fce1cf6ef1", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "53ad7c0c-efcd-4001-be5b-298d1e8498b6", + "pk": "440f85ff-25d9-4488-8654-98c5916df96f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", - "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)" + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", + "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)" } }, { "model": "images.zone", - "pk": "541c027b-3e0b-42f6-acf6-abcc49ff5f1e", + "pk": "6a85fb05-f7d1-498d-b93c-62739562c3e3", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "7ce02a7c-877a-46bc-8557-cd87b6945d81", + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "543950e5-031d-41d1-83fd-7365c193c916", + "pk": "72e1be99-f517-48c4-b45a-808dea668ec0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "e6df804d-82f7-420e-a562-fb3d265a2bec", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "images.zone", - "pk": "59c89a4e-b961-4158-8107-f24463b40fca", + "pk": "7e767702-f644-451f-91e8-05423bb8e074", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "5bd58b5d-8ed2-4e56-a466-b863101dfdc6", + "pk": "9bae7b8e-b03b-40cf-a0d8-80393d76a304", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)" } }, { "model": "images.zone", - "pk": "658a1c75-a4de-4842-8a09-2a51c22c8269", + "pk": "a236763a-6d34-4c41-a1e5-a5e19d115d50", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", + "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)" } }, { "model": "images.zone", - "pk": "a2723514-109f-4b13-8336-e0128c49aee4", + "pk": "aefbd01b-3421-458f-a8e6-2b5ffdaccc31", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" + "image": "5f870efa-b012-4344-92ac-e5226c4cd26b", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "b9cb9d9d-8de0-4fcf-8063-a5eb30634b8e", + "pk": "b70d7b27-c973-44ba-93c5-602957ae491e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ddbb369c-a6ed-488c-9108-e996ade67b29", + "image": "75371cf1-e321-47ec-bbc3-16c7c3d8f89a", "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" } }, { "model": "images.zone", - "pk": "d2e7ee12-14fa-46c8-a360-f5b31939052a", + "pk": "bea07b51-a5bd-400a-b5d0-b0320dc77bd1", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" + "image": "68474756-161a-4199-96e1-e79dc216a8a0", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "dda78215-f30b-497f-aedb-fac49055b667", + "pk": "c12fb064-b0df-4d8a-af13-55b2e9862e8d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "a415642f-af03-422d-acf2-a9fce1cf6ef1", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "68474756-161a-4199-96e1-e79dc216a8a0", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "images.zone", - "pk": "de6f5a26-52fd-4361-b30e-08b98b9d480a", + "pk": "db138d8c-b5fc-46e6-8dc6-483a1ffc8bf6", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "a415642f-af03-422d-acf2-a9fce1cf6ef1", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "68474756-161a-4199-96e1-e79dc216a8a0", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "e67eba1f-4993-492e-9ca0-67451ceb094c", + "pk": "f96e2027-4568-4527-a469-bcf27f4fcbb2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "c1f75796-3a1c-4e85-b8fa-43472a719312", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "f3b62073-8fe4-46c9-b7bf-012c3014359b", + "pk": "fb6dff0f-e117-48f0-b27a-9fa65f42a055", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "0a18ca58-69a7-44f8-b2ac-739a7a978cf2", - "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)" + "image": "8f7f17ff-9555-488f-acac-9cb6814e46a8", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "users.user", "pk": 1, "fields": { - "password": "pbkdf2_sha256$216000$l8Q874FbHSd8$nuAro6QYXwWYcYcFP1hVKgZUjTYnmbAKLqvFBRpLizU=", + "password": "pbkdf2_sha256$216000$nxRE2ciWpZ7g$92fj1x7D47uAXNxwfbkSX8ffAeO03T28riqjHM5yJVk=", "last_login": null, "email": "root@root.fr", "transkribus_email": null, @@ -1243,7 +1451,7 @@ "model": "users.user", "pk": 2, "fields": { - "password": "pbkdf2_sha256$216000$JbMVfYKIdmbH$HacY6ieeQBFMnMT32valEOgJnifIPuxNWx5LsaZSEN4=", + "password": "pbkdf2_sha256$216000$QEkgVJ40KttJ$98o0rhKrYu1vFJZ/+rOKk/D+oi52z6XGYIayxGO6P4I=", "last_login": null, "email": "internal@internal.fr", "transkribus_email": null, @@ -1258,7 +1466,7 @@ "model": "users.user", "pk": 3, "fields": { - "password": "pbkdf2_sha256$216000$RurQ9pYDub7S$bhJUTPQbo1jcqBoDS69RNbdzM6cr0ydKcE9Dk7mQyyU=", + "password": "pbkdf2_sha256$216000$mWYa0QHsUxjL$z8BQ0LObKxjCWFE29wIKXyRuL+rEtgWyGuboxc5DL1Q=", "last_login": null, "email": "user@user.fr", "transkribus_email": null, @@ -1271,7 +1479,7 @@ }, { "model": "users.oauthcredentials", - "pk": "d21e40e2-32d2-4903-ac4a-6672dc6c14ba", + "pk": "b07d0cb3-65b7-45c4-9bf2-d7153f4d27cd", "fields": { "user": 3, "provider_name": "gitlab", @@ -1288,7 +1496,7 @@ "pk": 1, "fields": { "user": 3, - "corpus": "1f7e82ec-4094-4397-b729-ec424ea7a37f", + "corpus": "78d45b37-36d1-4576-8bb2-8e5844f31feb", "can_write": true, "can_admin": true } @@ -2879,7 +3087,7 @@ }, { "model": "ponos.workflow", - "pk": "b391055a-84f5-431e-84ff-fec1836c4a5e", + "pk": "1fb1e3f8-67e4-45dd-acc8-1bdedf4e6f98", "fields": { "recipe": "tasks:\n docker_build:\n image: reco", "created": "2020-02-02T01:23:45.678Z", @@ -2888,7 +3096,7 @@ }, { "model": "ponos.task", - "pk": "71dd7910-8bef-4940-ab6a-50df883a2cb3", + "pk": "a9d5d4ce-188e-4659-9f35-4de56a3e3e1d", "fields": { "run": 0, "depth": 0, @@ -2901,7 +3109,7 @@ "has_docker_socket": false, "image_artifact": null, "agent": null, - "workflow": "b391055a-84f5-431e-84ff-fec1836c4a5e", + "workflow": "1fb1e3f8-67e4-45dd-acc8-1bdedf4e6f98", "container": null, "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -2910,9 +3118,9 @@ }, { "model": "ponos.artifact", - "pk": "71a4b60b-a2f2-4503-8f5d-9d3c38f66626", + "pk": "2f4c9981-6ee0-43a1-80f0-54cca23be3ff", "fields": { - "task": "71dd7910-8bef-4940-ab6a-50df883a2cb3", + "task": "a9d5d4ce-188e-4659-9f35-4de56a3e3e1d", "path": "/path/to/docker_build", "size": 42000, "content_type": "application/octet-stream", diff --git a/arkindex/documents/management/commands/build_fixtures.py b/arkindex/documents/management/commands/build_fixtures.py index 55d5ea4161003e8bd70441c1510de1275b8c576b..24bd321d0c783f18c968b3643fd6f507547ae292 100644 --- a/arkindex/documents/management/commands/build_fixtures.py +++ b/arkindex/documents/management/commands/build_fixtures.py @@ -2,7 +2,7 @@ from django.core.management.base import BaseCommand from arkindex_common.ml_tool import MLToolType from arkindex_common.enums import TranscriptionType, MetaType -from arkindex.documents.models import Corpus, Element, Transcription, DataSource, MetaData +from arkindex.documents.models import Corpus, Element, DataSource, MetaData from arkindex.dataimport.models import RepositoryType, WorkerVersion, WorkerVersionState, Workflow from arkindex.images.models import ImageServer, Image, Zone from arkindex.users.models import User, CorpusRight @@ -150,6 +150,11 @@ class Command(BaseCommand): # Allows manual transcriptions of type 'line' for text lines allowed_transcription=TranscriptionType.Line ) + word_type = corpus.types.create( + slug='word', + display_name='Word', + allowed_transcription=TranscriptionType.Word + ) # Create 2 volumes vol1 = Element.objects.create( @@ -217,21 +222,24 @@ class Command(BaseCommand): # Create transcriptions on images of volume 1 for page in (p1_1, p1_2, p1_3): for word, pos in [("PARIS", 100), ("ROY", 400), ("DATUM", 700)]: - Transcription.objects.create( - element=page, + element = corpus.elements.create( + type=word_type, + name=word, + zone=makezone(page.zone.image, pos, pos + 100) + ) + element.add_parent(page) + element.transcriptions.create( source=recognizer_source, text=word, type=TranscriptionType.Word, - zone=makezone(page.zone.image, pos, pos + 100), score=1.0, ) - # Create a page transcriptions on page 1 with no zone + # Create a page transcription on page 1 p1_1.transcriptions.create( source=recognizer_source, text='Lorem ipsum dolor sit amet', type=TranscriptionType.Page, - zone=None, score=1.0, ) diff --git a/arkindex/documents/management/commands/reindex.py b/arkindex/documents/management/commands/reindex.py index 528ff0f2c791c1b952c9cd3a3079c91190aaf0e7..3632b44ac042bef105467e2efd6eba012e944d88 100644 --- a/arkindex/documents/management/commands/reindex.py +++ b/arkindex/documents/management/commands/reindex.py @@ -16,13 +16,15 @@ logger = logging.getLogger(__name__) def get_transcriptions(corpus=None, folder=None): if folder: # Lookup all the transcriptions linked to a folder - return Transcription.objects.filter( + queryset = Transcription.objects.filter( element__in=Element.objects.get_descending(folder.id) ).distinct() elif corpus: - return Transcription.objects.filter(element__corpus=corpus) + queryset = Transcription.objects.filter(element__corpus=corpus) + else: + queryset = Transcription.objects.all() - return Transcription.objects.all() + return queryset.select_related('element') def get_elements(corpus=None, folder=None): @@ -33,7 +35,7 @@ def get_elements(corpus=None, folder=None): else: queryset = Element.objects.all() - return queryset.prefetch_related('metadatas', 'transcriptions') + return queryset.select_related('type').prefetch_related('metadatas', 'transcriptions') def get_entities(corpus=None, folder=None): diff --git a/arkindex/documents/migrations/0021_move_transcriptions.py b/arkindex/documents/migrations/0021_move_transcriptions.py new file mode 100644 index 0000000000000000000000000000000000000000..728be9aa5f17ccaa1c2a1ec864d7b02660c3925e --- /dev/null +++ b/arkindex/documents/migrations/0021_move_transcriptions.py @@ -0,0 +1,132 @@ +# Generated by Django 3.1 on 2020-09-01 07:48 + +from django.db import migrations, models +from arkindex_common.enums import TranscriptionType + + +def preflight_checks(apps, schema_editor): + ElementType = apps.get_model('documents', 'ElementType') + existing_types = [] + + for ts_type in TranscriptionType: + if ElementType.objects.filter(slug=f'transcription_{ts_type.value}').exists(): + existing_types.append(f'`transcription_{ts_type.value}`') + + if existing_types: + raise AssertionError( + 'This migration could not be run because one or more element types use the reserved slug(s) ' + + ', '.join(existing_types) + ) + + +FORWARD_SQL = [ + 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";', + # Early handling for the edge case of transcriptions already on the correct element + """ + UPDATE documents_transcription transcription + SET zone_id = NULL + FROM documents_element element + WHERE transcription.element_id = element.id + AND transcription.zone_id IS NOT NULL + AND transcription.zone_id = element.zone_id; + """, + # Create element types starting with transcription_* as needed + """ + INSERT INTO documents_elementtype (id, corpus_id, slug, display_name, folder, allowed_transcription) + SELECT + uuid_generate_v4(), + element.corpus_id, + 'transcription_' || transcription.type, + initcap(transcription.type) || ' Transcription', + FALSE, + transcription.type + FROM documents_transcription transcription + INNER JOIN documents_element element ON (element.id = transcription.element_id) + WHERE transcription.zone_id IS NOT NULL + GROUP BY element.corpus_id, transcription.type; + """, + # Create new elements + """ + INSERT INTO documents_element (id, corpus_id, type_id, name, zone_id, source_id, worker_version_id, created, updated) + SELECT + transcription.id, + element.corpus_id, + type.id, + (ROW_NUMBER() OVER ( + PARTITION BY + transcription.element_id, + transcription.source_id, + transcription.worker_version_id, + transcription.type + ORDER BY + ST_Y(ST_StartPoint(polygon)), + ST_X(ST_StartPoint(polygon)) + ))::varchar, + transcription.zone_id, + transcription.source_id, + transcription.worker_version_id, + NOW(), + NOW() + FROM + documents_transcription transcription + INNER JOIN documents_element element on (transcription.element_id = element.id) + INNER JOIN documents_elementtype type ON (type.corpus_id = element.corpus_id AND type.slug = 'transcription_' || transcription.type) + INNER JOIN images_zone zone ON (transcription.zone_id = zone.id); + """, + # Create element paths + # Append to existing parent paths of the parent element, or create one new element path with the parent element itself in it + """ + INSERT INTO documents_elementpath (id, element_id, path, ordering) + SELECT + uuid_generate_v4(), + transcription.id, + COALESCE(path.path, ARRAY[]::uuid[]) || transcription.element_id, + ROW_NUMBER() OVER ( + PARTITION BY transcription.element_id + ORDER BY + ST_Y(ST_StartPoint(polygon)), + ST_X(ST_StartPoint(polygon)) + ) + FROM + documents_transcription transcription + INNER JOIN images_zone zone ON (zone.id = transcription.zone_id) + LEFT JOIN documents_elementpath path ON (path.element_id = transcription.element_id); + """, + # Move transcriptions to their new elements + """ + UPDATE documents_transcription + SET element_id = id + WHERE zone_id IS NOT NULL; + """, + # At this point, we can drop the zone column, but this would fail due to 'pending trigger events' + # Postgres does not allow editing the schema *after* editing the data in the same transcription; + # This migration is continued in documents.0021 to allow a new database transaction to happen. +] + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0020_remove_source_xor_version_constraint'), + ('images', '0005_polygon_index') + ] + + operations = [ + migrations.AddConstraint( + model_name='transcription', + constraint=models.CheckConstraint( + check=~models.Q(source_id__isnull=False, worker_version_id__isnull=False), + name='transcription_source_not_worker_version', + ) + ), + migrations.RunPython( + preflight_checks, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + migrations.RunSQL( + FORWARD_SQL, + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ), + ] diff --git a/arkindex/documents/migrations/0022_remove_transcription_zone.py b/arkindex/documents/migrations/0022_remove_transcription_zone.py new file mode 100644 index 0000000000000000000000000000000000000000..edb333dab4f65cef1068ba654291e1468a32a201 --- /dev/null +++ b/arkindex/documents/migrations/0022_remove_transcription_zone.py @@ -0,0 +1,80 @@ +# Generated by Django 3.1 on 2020-09-01 07:48 + +from django.db import migrations + +FORWARD_SQL = [ + # Use a temporary table here to iterate over transcriptions just once before deleting, + # causing this migration to only take a few minutes + # Note the strange join conditions as either source_id or worker_version_id are NULL, + # which causes a NATURAL JOIN or a JOIN … USING to fail since comparing NULLs returns NULL. + """ + CREATE TEMPORARY TABLE duplicate_ids AS + WITH filters AS ( + SELECT + sub.*, + FIRST_VALUE(id) OVER ( + PARTITION BY + transcription.element_id, + transcription.source_id, + transcription.worker_version_id + ) AS keep_id + FROM documents_transcription transcription + INNER JOIN ( + SELECT element_id, source_id, worker_version_id + FROM documents_transcription + GROUP BY element_id, source_id, worker_version_id + HAVING COUNT(*) > 1 + ) sub ON ( + sub.element_id = transcription.element_id AND ( + sub.source_id = transcription.source_id + OR sub.worker_version_id = transcription.worker_version_id + ) + ) + ) + SELECT id + FROM documents_transcription transcription + INNER JOIN filters ON ( + filters.element_id = transcription.element_id AND ( + filters.source_id = transcription.source_id + OR filters.worker_version_id = transcription.worker_version_id + ) + ) + WHERE keep_id != id; + """, + # Remove any TranscriptionEntity that could be linked to the duplicate transcriptions + """ + DELETE FROM documents_transcriptionentity transcriptionentity + USING duplicate_ids + WHERE transcriptionentity.transcription_id = duplicate_ids.id; + """, + # Remove duplicate transcriptions + """ + DELETE FROM documents_transcription transcription + USING duplicate_ids + WHERE transcription.id = duplicate_ids.id; + """, + 'DROP TABLE duplicate_ids;', +] + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0021_move_transcriptions'), + ] + + operations = [ + migrations.RemoveField( + model_name='transcription', + name='zone', + ), + # Remove the few remaining transcriptions that would break the unique constraints we will add in documents.0023. + # Those are transcriptions from the same source, on the same element, with the exact same zones. + # This query is rather complex as we want to only remove duplicates, and window functions have their limits, + # but the GROUP BY…HAVING will quickly exclude most of the table so it isn't slow. + migrations.RunSQL( + FORWARD_SQL, + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 7f77e4d49446607b318f429da5d881962d9fafb2..9f4386df43feb4289c556b49773abf7684f1c12a 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -440,12 +440,6 @@ class Transcription(models.Model): max_length=50, db_index=True, ) - zone = models.ForeignKey( - 'images.Zone', - on_delete=models.PROTECT, - related_name='transcriptions', - null=True, - ) source = models.ForeignKey( DataSource, on_delete=models.CASCADE, @@ -468,17 +462,18 @@ class Transcription(models.Model): related_name='transcriptions', ) - class Meta: - # The following index was attempted with md5(text) in a manual migration - # but it causes too many performance issues. - # unique_together = ( - # ('element', 'zone', 'text') - # ) - pass - def __str__(self): return 'Transcription: {}'.format(self.text[:20]) + class Meta: + constraints = [ + # Require either a source, a worker version, or none (manual), but not both at once + models.CheckConstraint( + check=~Q(source_id__isnull=False, worker_version_id__isnull=False), + name='transcription_source_not_worker_version', + ) + ] + class TranscriptionEntity(models.Model): """ diff --git a/arkindex/documents/search.py b/arkindex/documents/search.py index bfc09fcd2ae8236cfe353317f032089c3e7ce1d5..6c69a9713cd353ed392abc38e152b6ecbd0432d1 100644 --- a/arkindex/documents/search.py +++ b/arkindex/documents/search.py @@ -1,5 +1,6 @@ -from arkindex.documents.models import Transcription, Element, Entity from itertools import chain +from django.db.models import prefetch_related_objects +from arkindex.documents.models import Transcription, Element, Entity import uuid @@ -12,7 +13,7 @@ def search_transcriptions_post(data): ts = Transcription.objects \ .filter(id__in=transcription_ids) \ .order_by('-score') \ - .prefetch_related('zone__image__server', 'element', 'source') + .prefetch_related('element__zone__image__server', 'source') element_ids = list(ts.values_list('element_id', flat=True)) all_parent_paths = Element.objects.get_ascendings_paths(*element_ids) for trans in ts: @@ -62,7 +63,7 @@ def search_elements_post(data): transcriptions = { t.id: t - for t in Transcription.objects.filter(id__in=tr_ids).prefetch_related('zone__image__server', 'source') + for t in Transcription.objects.filter(id__in=tr_ids).prefetch_related('source') } elts_tr_ids = { @@ -78,11 +79,15 @@ def search_elements_post(data): for result in data } - elts = list(Element.objects.filter(id__in=elt_ids).prefetch_related('corpus', 'type')) + elts = list(Element.objects.filter(id__in=elt_ids).prefetch_related('corpus', 'type', 'zone__image__server')) # Preserve the ordering given by ElasticSearch ordered_elts = list(filter(None, map(lambda eid: next((e for e in elts if e.id == eid), None), elt_ids))) all_paths = Element.objects.get_ascendings_paths(*(e.id for e in ordered_elts)) + prefetch_related_objects( + [element for paths in all_paths.values() for path in paths for element in path], + 'type', + ) for elt in ordered_elts: elt.transcriptions_results = list(filter(None, [transcriptions.get(tid) for tid in elts_tr_ids[elt.id]])) diff --git a/arkindex/documents/serializers/iiif/annotations.py b/arkindex/documents/serializers/iiif/annotations.py index 9b21d19f1c87d8532f8ae274691041aefc011d3b..0fdbb9fa99bf3dbfbc2efc970caddb1e421e2d51 100644 --- a/arkindex/documents/serializers/iiif/annotations.py +++ b/arkindex/documents/serializers/iiif/annotations.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from django.conf import settings +from django.db.models import Q from rest_framework import serializers from arkindex.documents.models import Element, Transcription from arkindex.project.tools import build_absolute_url, bounding_box @@ -77,7 +78,7 @@ class TranscriptionSearchAnnotationSerializer(TranscriptionAnnotationSerializer) def get_target(self, ts): assert isinstance(ts, Transcription) url = build_absolute_url(ts.element, self.context['request'], 'api:iiif-canvas') - x, y, w, h = bounding_box(ts.zone.polygon) + x, y, w, h = bounding_box(ts.element.zone.polygon) return f'{url}#xywh={x},{y},{w},{h}' @@ -103,8 +104,11 @@ class AnnotationListSerializer(serializers.BaseSerializer): } def get_elements(self, element): - return element.transcriptions.all() "Get a list of elements to serialize as annotations." + return Transcription.objects.filter( + Q(element=element) + | Q(element__in=Element.objects.get_descending(element.id)) + ) class ElementAnnotationListSerializer(AnnotationListSerializer): diff --git a/arkindex/documents/serializers/ml.py b/arkindex/documents/serializers/ml.py index 65a34c6e7d267d75baddd25d5aa2ab8371ecc91d..a72388fb09beea768adbf1072fee2730a917cb38 100644 --- a/arkindex/documents/serializers/ml.py +++ b/arkindex/documents/serializers/ml.py @@ -10,7 +10,6 @@ from arkindex.documents.models import ( Corpus, Element, ElementType, Transcription, DataSource, MLClass, Classification, ClassificationState ) from arkindex.project.serializer_fields import EnumField, LinearRingField -from arkindex.images.serializers import ZoneSerializer from arkindex.documents.serializers.light import ElementZoneSerializer import uuid @@ -232,18 +231,16 @@ class TranscriptionSerializer(serializers.ModelSerializer): Serialises a Transcription """ type = EnumField(TranscriptionType, read_only=True) - zone = ZoneSerializer(read_only=True) source = DataSourceSerializer(read_only=True) class Meta: model = Transcription - read_only_fields = ('id', 'type', 'score', 'zone', 'source') + read_only_fields = ('id', 'type', 'score', 'source') fields = ( 'id', 'type', 'text', 'score', - 'zone', 'source', 'worker_version_id', ) diff --git a/arkindex/documents/serializers/search.py b/arkindex/documents/serializers/search.py index 9bd27fbbe31554d82febad4050b17102c551e920..3b6a6dfce020d6f07a9e5a34ab92a170b1a486a2 100644 --- a/arkindex/documents/serializers/search.py +++ b/arkindex/documents/serializers/search.py @@ -2,10 +2,10 @@ from django.conf import settings from rest_framework import serializers from arkindex_common.enums import EntityType, TranscriptionType from arkindex.project.serializer_fields import EnumField +from arkindex.images.serializers import ZoneSerializer from arkindex.documents.date_parser import parse_date from arkindex.documents.models import Element, ElementType, Entity -from arkindex.documents.serializers.light import CorpusLightSerializer -from arkindex.documents.serializers.elements import ElementSlimSerializer +from arkindex.documents.serializers.light import CorpusLightSerializer, ElementLightSerializer from arkindex.documents.serializers.ml import TranscriptionSerializer import math @@ -84,16 +84,16 @@ class IIIFSearchQuerySerializer(serializers.Serializer): q = serializers.CharField(source='query', max_length=settings.ES_QUERY_STRING_MAX_LENGTH) -class ElementSearchResultSerializer(serializers.ModelSerializer): +class ElementSearchResultSerializer(ElementLightSerializer): """ A page search result with nested transcriptions """ - type = serializers.SlugRelatedField(slug_field='slug', read_only=True) + zone = ZoneSerializer() transcriptions = TranscriptionSerializer(many=True, source='transcriptions_results') total_transcriptions = serializers.IntegerField() parent_paths = serializers.ListField( child=serializers.ListField( - child=ElementSlimSerializer() + child=ElementLightSerializer() ), ) corpus = CorpusLightSerializer() @@ -106,6 +106,7 @@ class ElementSearchResultSerializer(serializers.ModelSerializer): 'id', 'name', 'type', + 'zone', 'transcriptions', 'total_transcriptions', 'parent_paths', diff --git a/arkindex/documents/tests/consumers/test_corpus_consumer.py b/arkindex/documents/tests/consumers/test_corpus_consumer.py index f9252283e0cef1b5c2a86779f28f057a199f888d..e677e2c4f2e0159452c97502bf2adf9674592f7e 100644 --- a/arkindex/documents/tests/consumers/test_corpus_consumer.py +++ b/arkindex/documents/tests/consumers/test_corpus_consumer.py @@ -71,7 +71,6 @@ class TestDeleteCorpus(FixtureTestCase): revision='-1', internal=False, ), - zone=cls.zone, type=TranscriptionType.Word, text='hi', score=0.75, @@ -122,7 +121,6 @@ class TestDeleteCorpus(FixtureTestCase): ts = self.page.transcriptions.get() self.assertEqual(ts.source.slug, 'reco') - self.assertEqual(ts.zone, self.zone) self.assertEqual(ts.type, TranscriptionType.Word) self.assertEqual(ts.text, 'hi') self.assertEqual(ts.score, 0.75) diff --git a/arkindex/documents/tests/consumers/test_ml_results_consumer.py b/arkindex/documents/tests/consumers/test_ml_results_consumer.py index c66f8524e031742f7d680b7fd26146cabf1842cc..1612370bffddb3ab3ec5ffcfe28271acef3ed7fe 100644 --- a/arkindex/documents/tests/consumers/test_ml_results_consumer.py +++ b/arkindex/documents/tests/consumers/test_ml_results_consumer.py @@ -64,7 +64,6 @@ class TestMLResultsConsumer(FixtureTestCase): text='some text', type=TranscriptionType.Line, source=source, - zone=cls.page2.zone, ).transcription_entities.create( offset=0, length=1, diff --git a/arkindex/documents/tests/consumers/test_reindex_consumer.py b/arkindex/documents/tests/consumers/test_reindex_consumer.py index 784908176fbed4ab38e4f3d91c804179bf11e288..bd5acb84965ae98a8863ccf9d870e9c0dcbab796 100644 --- a/arkindex/documents/tests/consumers/test_reindex_consumer.py +++ b/arkindex/documents/tests/consumers/test_reindex_consumer.py @@ -41,7 +41,6 @@ class TestReindexConsumer(FixtureTestCase): text='something', type=TranscriptionType.Word, source=source, - zone=zone, ) ts.transcription_entities.create( entity=corpus2.entities.create( @@ -74,8 +73,7 @@ class TestReindexConsumer(FixtureTestCase): (queryset, ), kwargs = call_args self.assertQuerysetEqual( queryset, - # Only transcriptions with a zone may be added to transcriptions index - Transcription.objects.filter(zone__isnull=False) + Transcription.objects.all() ) self.assertDictEqual(kwargs, {'bulk_size': 400}) @@ -150,7 +148,7 @@ class TestReindexConsumer(FixtureTestCase): (queryset, ), kwargs = ts_call self.assertQuerysetEqual( queryset, - Transcription.objects.filter(element__corpus_id=self.corpus.id, zone__isnull=False) + Transcription.objects.filter(element__corpus_id=self.corpus.id) ) self.assertDictEqual(kwargs, {'bulk_size': 400}) @@ -160,7 +158,7 @@ class TestReindexConsumer(FixtureTestCase): }) self.assertEqual(mock().drop_index.call_count, 0) self.assertEqual(mock().run_index.call_count, 3) - entities_call, ts_call, elements_call = sorted(mock().run_index.call_args_list, key=repr) + elements_call, entities_call, ts_call = sorted(mock().run_index.call_args_list, key=repr) elements_list = list(Element.objects.get_descending(self.folder.id)) elements_list.append(self.folder) @@ -182,21 +180,6 @@ class TestReindexConsumer(FixtureTestCase): (queryset, ), kwargs = ts_call self.assertQuerysetEqual( queryset, - Transcription.objects.filter(element__in=elements_list, zone__isnull=False) + Transcription.objects.filter(element__in=elements_list) ) self.assertDictEqual(kwargs, {'bulk_size': 400}) - - def test_reindex_transcriptions_without_zone(self, mock): - """ - Transcriptions with no zone may not be indexed in transcriptions index - """ - ReindexConsumer({}).reindex_start({ - 'transcriptions': True, - 'entities': False, - 'elements': False, - }) - transcription = Transcription.objects.filter(zone__isnull=True).first() - self.assertNotEqual(transcription, None) - self.assertEqual(mock().run_index.call_count, 1) - (queryset, ), kwargs = mock().run_index.call_args - self.assertFalse(queryset.filter(id=transcription.id).exists()) diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index ee81ca3fc470a29667573ede1164a810e283c9e4..94f66aad33c783b9dc76f58881729142098f0df5 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -7,8 +7,7 @@ from rest_framework import status from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import TranscriptionType from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Element, Corpus, DataSource -from arkindex_common.ml_tool import MLToolType +from arkindex.documents.models import Element, Corpus import uuid @@ -22,7 +21,6 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): super().setUpTestData() cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') cls.vol = cls.corpus.elements.get(name='Volume 1') - cls.src = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) cls.line = cls.corpus.elements.filter(type__slug='text_line').first() cls.private_corpus = Corpus.objects.create(name='Private') cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) @@ -34,8 +32,16 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): Bulk creates a list of element with an attached transcription generated by a worker_version """ get_layer_mock.return_value.send = AsyncMock() - self.src.internal = True - self.src.save() + + # Create a manual transcription on the element + self.line.transcriptions.create( + text='A manual transcription', + worker_version=self.worker_version, + type=TranscriptionType.Line + ) + self.assertEqual(self.line.transcriptions.count(), 1) + + existing_element_ids = list(Element.objects.get_descending(self.page.id).values_list('id', flat=True)) transcriptions = [ ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'Hello world !', 0.1337), @@ -59,7 +65,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - created_elts = Element.objects.get_descending(self.page.id).exclude(id=self.line.id) + created_elts = Element.objects.get_descending(self.page.id).exclude(id__in=existing_element_ids) self.assertEqual(created_elts.count(), 2) self.assertTrue(all(map(lambda elt: elt.zone.image == self.page.zone.image, created_elts))) self.assertListEqual( @@ -73,10 +79,10 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ] ) self.assertCountEqual( - created_elts.values_list('transcriptions__type', 'transcriptions__text', 'transcriptions__zone', 'transcriptions__source', 'transcriptions__worker_version'), + created_elts.values_list('transcriptions__type', 'transcriptions__text', 'transcriptions__source', 'transcriptions__worker_version'), [ - (TranscriptionType.Line, ('Hello world !'), None, None, self.worker_version.id), - (TranscriptionType.Line, ('I <3 JavaScript'), None, None, self.worker_version.id) + (TranscriptionType.Line, ('Hello world !'), None, self.worker_version.id), + (TranscriptionType.Line, ('I <3 JavaScript'), None, self.worker_version.id) ] ) get_layer_mock().send.assert_called_once_with('reindex', { @@ -95,9 +101,6 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): """ Sending 100 transcriptions should not increase the number of DB queries """ - self.src.internal = True - self.src.save() - # Create 100 transcriptions transcriptions = [( [[i, i], [i, i + 20], [i + 20, i + 20], [i + 20, i], [i, i]], @@ -139,8 +142,6 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): Does not erase present transcriptions, even from the same worker """ get_layer_mock.return_value.send = AsyncMock() - self.src.internal = True - self.src.save() # Create a manual transcription on the element self.line.transcriptions.create( @@ -150,6 +151,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ) self.assertEqual(self.line.transcriptions.count(), 1) + existing_element_ids = list(Element.objects.get_descending(self.page.id).values_list('id', flat=True)) transcriptions = [ ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'Hello world !', 0.1337), # Use line zone to create the second transcription @@ -175,13 +177,12 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - page_elts = Element.objects.get_descending(self.page.id) + created_elts = Element.objects.get_descending(self.page.id).exclude(id__in=existing_element_ids) # The existing text line has been reused - self.assertEqual(page_elts.count(), 2) - # There are now two transcriptions on the line - self.assertCountEqual(self.line.transcriptions.values_list('type', 'text', 'zone'), [ - (TranscriptionType.Line, 'A manual transcription', None), - (TranscriptionType.Line, 'I <3 JavaScript', None) + self.assertEqual(created_elts.count(), 1) + self.assertCountEqual(self.line.transcriptions.values_list('type', 'text'), [ + (TranscriptionType.Line, 'A manual transcription'), + (TranscriptionType.Line, 'I <3 JavaScript') ]) def test_bulk_transcriptions_requires_verified(self): @@ -268,14 +269,9 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): def test_bulk_transcriptions_return_ordered_elements(self): """ return_elements field allows to serialize an ordered list of sub-elements holding the transcriptions - Returned elements may be duplicated in case the same zone is used for multiple transcriptions """ - self.src.internal = True - self.src.save() - transcriptions = [ ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'stuck', 0.13), - ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'stick', 0.33), ([[24, 42], [64, 42], [64, 142], [24, 142], [24, 42]], 'stock', 0.37), # The existing line has the smallest ordering (self.line.zone.polygon.coords, 'stack', 0.42), @@ -302,16 +298,13 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_201_CREATED) created_elts = Element.objects.get_descending(self.page.id).exclude(id=self.line.id) - self.assertEqual(created_elts.count(), 2) + self.assertEqual(created_elts.count(), 5) first_elt = created_elts.get(transcriptions__text='stuck') second_elt = created_elts.get(transcriptions__text='stock') self.assertListEqual(response.json(), [{ 'id': str(first_elt.id), 'created': True - }, { - 'id': str(first_elt.id), - 'created': False }, { 'id': str(second_elt.id), 'created': True diff --git a/arkindex/documents/tests/test_children_elements.py b/arkindex/documents/tests/test_children_elements.py index 2423ae98229de620251581715943d1b0613ca58e..f46f6faeec64a92bb7c80c43317a931156937814 100644 --- a/arkindex/documents/tests/test_children_elements.py +++ b/arkindex/documents/tests/test_children_elements.py @@ -12,11 +12,11 @@ class TestChildrenElements(FixtureAPITestCase): def setUpTestData(cls): super().setUpTestData() cls.vol = cls.corpus.elements.get(name='Volume 1') - cls.element = Element.objects.get(name='Volume 1, page 2r') cls.worker_version = WorkerVersion.objects.get(worker__slug='dla') def test_element_children(self): - response = self.client.get(reverse('api:elements-children', kwargs={'pk': str(self.element.id)})) + lonely_element = Element.objects.get(name='Volume 2, page 2r') + response = self.client.get(reverse('api:elements-children', kwargs={'pk': str(lonely_element.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertListEqual(response.json()['results'], []) @@ -47,10 +47,10 @@ class TestChildrenElements(FixtureAPITestCase): response = self.client.get(reverse('api:elements-children', kwargs={'pk': str(self.vol.id)}) + '?recursive') self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.json()['results']), 15) + self.assertEqual(len(response.json()['results']), 20) self.assertSetEqual( {r['type'] for r in response.json()['results']}, - {'act', 'page', 'surface', 'text_line'}, + {'act', 'page', 'surface', 'text_line', 'word'}, ) response = self.client.get(reverse('api:elements-children', kwargs={'pk': str(self.vol.id)}) + '?type=page') @@ -154,6 +154,13 @@ class TestChildrenElements(FixtureAPITestCase): ) def test_children_with_has_children(self): + # Create a childless element + element = self.corpus.elements.create( + name='Spearow', + type=self.corpus.types.get(slug='surface') + ) + element.add_parent(self.vol) + with self.assertNumQueries(9): response = self.client.get( reverse('api:elements-children', kwargs={'pk': str(self.vol.id)}), @@ -169,8 +176,9 @@ class TestChildrenElements(FixtureAPITestCase): 'Act 4': True, 'Act 5': True, 'Volume 1, page 1r': True, - 'Volume 1, page 1v': False, - 'Volume 1, page 2r': False, + 'Volume 1, page 1v': True, + 'Volume 1, page 2r': True, + 'Spearow': False, } ) diff --git a/arkindex/documents/tests/test_corpus.py b/arkindex/documents/tests/test_corpus.py index 8552c664339c8209c95af5c78f4279b2c36993a1..f46df8feb0d0725726e25ae427be00902775a4fb 100644 --- a/arkindex/documents/tests/test_corpus.py +++ b/arkindex/documents/tests/test_corpus.py @@ -15,6 +15,45 @@ FAKE_NOW = datetime.datetime.now() # Fake DB fixtures creation date DB_CREATED = '2020-02-02T01:23:45.678000Z' +EXPECTED_CORPUS_TYPES = [ + { + 'slug': 'volume', + 'display_name': 'Volume', + 'folder': True, + 'allowed_transcription': None + }, + { + 'slug': 'page', + 'display_name': 'Page', + 'folder': False, + 'allowed_transcription': None + }, + { + 'slug': 'act', + 'display_name': 'Act', + 'folder': False, + 'allowed_transcription': None + }, + { + 'slug': 'surface', + 'display_name': 'Surface', + 'folder': False, + 'allowed_transcription': None + }, + { + 'slug': 'text_line', + 'display_name': 'Line', + 'folder': False, + 'allowed_transcription': 'line' + }, + { + 'slug': 'word', + 'display_name': 'Word', + 'folder': False, + 'allowed_transcription': 'word' + } +] + class TestCorpus(FixtureAPITestCase): @@ -43,38 +82,7 @@ class TestCorpus(FixtureAPITestCase): types = data[0].pop('types') for t in types: del t['id'] - self.assertCountEqual(types, [ - { - 'slug': 'volume', - 'display_name': 'Volume', - 'folder': True, - 'allowed_transcription': None - }, - { - 'slug': 'page', - 'display_name': 'Page', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'act', - 'display_name': 'Act', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'surface', - 'display_name': 'Surface', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'text_line', - 'display_name': 'Line', - 'folder': False, - 'allowed_transcription': 'line' - } - ]) + self.assertCountEqual(types, EXPECTED_CORPUS_TYPES) self.assertListEqual( data, @@ -103,38 +111,7 @@ class TestCorpus(FixtureAPITestCase): types = data[1].pop('types') for t in types: del t['id'] - self.assertCountEqual(types, [ - { - 'slug': 'volume', - 'display_name': 'Volume', - 'folder': True, - 'allowed_transcription': None - }, - { - 'slug': 'page', - 'display_name': 'Page', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'act', - 'display_name': 'Act', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'surface', - 'display_name': 'Surface', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'text_line', - 'display_name': 'Line', - 'folder': False, - 'allowed_transcription': 'line' - } - ]) + self.assertCountEqual(types, EXPECTED_CORPUS_TYPES) self.assertListEqual( data, @@ -173,38 +150,7 @@ class TestCorpus(FixtureAPITestCase): types = data[2].pop('types') for t in types: del t['id'] - self.assertCountEqual(types, [ - { - 'slug': 'volume', - 'display_name': 'Volume', - 'folder': True, - 'allowed_transcription': None - }, - { - 'slug': 'page', - 'display_name': 'Page', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'act', - 'display_name': 'Act', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'surface', - 'display_name': 'Surface', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'text_line', - 'display_name': 'Line', - 'folder': False, - 'allowed_transcription': 'line' - } - ]) + self.assertCountEqual(types, EXPECTED_CORPUS_TYPES) self.assertListEqual( data, @@ -336,38 +282,7 @@ class TestCorpus(FixtureAPITestCase): types = data.pop('types') for t in types: del t['id'] - self.assertCountEqual(types, [ - { - 'slug': 'volume', - 'display_name': 'Volume', - 'folder': True, - 'allowed_transcription': None - }, - { - 'slug': 'page', - 'display_name': 'Page', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'act', - 'display_name': 'Act', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'surface', - 'display_name': 'Surface', - 'folder': False, - 'allowed_transcription': None - }, - { - 'slug': 'text_line', - 'display_name': 'Line', - 'folder': False, - 'allowed_transcription': 'line' - } - ]) + self.assertCountEqual(types, EXPECTED_CORPUS_TYPES) self.assertDictEqual(data, { 'id': str(self.corpus_public.id), 'name': self.corpus_public.name, @@ -491,14 +406,14 @@ class TestCorpus(FixtureAPITestCase): Ensure corpus deletion deletes elements first to prevent a ProtectedError """ self.assertEqual(Corpus.objects.count(), 3) - self.assertEqual(Element.objects.count(), 20) + self.assertEqual(Element.objects.count(), 29) # Use Django's default deletion method and expect a ProtectedError with self.assertRaises(ProtectedError): Model.delete(self.corpus) self.assertEqual(Corpus.objects.count(), 3) - self.assertEqual(Element.objects.count(), 20) + self.assertEqual(Element.objects.count(), 29) # Use the corpus deletion method and it works! self.corpus.delete() diff --git a/arkindex/documents/tests/test_corpus_elements.py b/arkindex/documents/tests/test_corpus_elements.py index e8dedd55c88ac95ff418c34e1af6ca17d13a5d27..2a348376fff5294d6d1ad2e78919a92107b63a4a 100644 --- a/arkindex/documents/tests/test_corpus_elements.py +++ b/arkindex/documents/tests/test_corpus_elements.py @@ -68,7 +68,6 @@ class TestListElements(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertIsNone(data['next']) self.assertDictEqual( {element['name']: element['has_children'] for element in data['results']}, @@ -87,8 +86,8 @@ class TestListElements(FixtureAPITestCase): 'Surface F': False, 'Volume 1': True, 'Volume 1, page 1r': True, - 'Volume 1, page 1v': False, - 'Volume 1, page 2r': False, + 'Volume 1, page 1v': True, + 'Volume 1, page 2r': True, 'Volume 2': True, 'Volume 2, page 1r': False, 'Volume 2, page 1v': False, diff --git a/arkindex/documents/tests/test_create_transcriptions.py b/arkindex/documents/tests/test_create_transcriptions.py index 999fc147aa70bd419a38c9fa09af249de36d07eb..f3415b7063d5b0b72646971a64def664ae180b57 100644 --- a/arkindex/documents/tests/test_create_transcriptions.py +++ b/arkindex/documents/tests/test_create_transcriptions.py @@ -81,11 +81,9 @@ class TestTranscriptionCreate(FixtureAPITestCase): 'type': 'line', 'source': None, 'worker_version_id': None, - 'zone': None }) - new_ts = Transcription.objects.get(text='A perfect day in a perfect place', type=TranscriptionType.Line.value) - self.assertIsNone(new_ts.zone) + new_ts = Transcription.objects.get(text='A perfect day in a perfect place', type=TranscriptionType.Line) self.assertIsNone(new_ts.score) self.assertEqual(new_ts.worker_version, None) self.assertTrue(self.line.transcriptions.filter(pk=new_ts.id).exists()) @@ -101,28 +99,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): 'drop': False, }) - @patch('arkindex.project.triggers.get_channel_layer') - def test_create_transcription_takes_no_zone(self, get_layer_mock): - """ - TranscriptionCreate endpoint do not create any zone for the transcription - """ - get_layer_mock.return_value.send = AsyncMock() - - self.client.force_login(self.user) - response = self.client.post( - reverse('api:transcription-create', kwargs={'pk': self.line.id}), - format='json', - data={ - 'type': 'line', - 'polygon': [(0, 0), (42, 0), (42, 42), (0, 42), (0, 0)], - 'text': 'SQUARE', - } - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - new_ts = Transcription.objects.get(text='SQUARE', type=TranscriptionType.Line.value) - self.assertEqual(new_ts.zone, None) - @patch('arkindex.project.triggers.get_channel_layer') def test_create_duplicated_transcription(self, get_layer_mock): """ @@ -222,7 +198,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): 'text': 'NEKUDOTAYIM', 'type': 'word', 'worker_version_id': str(self.worker_version.id), - 'zone': None }) get_layer_mock().send.assert_called_once_with('reindex', { diff --git a/arkindex/documents/tests/test_edit_transcriptions.py b/arkindex/documents/tests/test_edit_transcriptions.py index 37b88f76f51cb51998f39b52dd3305620c029075..d2f0678a06712a10a25de1f7f20d3799d868fee2 100644 --- a/arkindex/documents/tests/test_edit_transcriptions.py +++ b/arkindex/documents/tests/test_edit_transcriptions.py @@ -2,7 +2,7 @@ from django.urls import reverse from rest_framework import status from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import TranscriptionType -from arkindex.documents.models import Corpus, Transcription, DataSource +from arkindex.documents.models import Corpus, Element, Transcription, DataSource from arkindex.users.models import User from arkindex_common.ml_tool import MLToolType from uuid import uuid4 @@ -16,16 +16,17 @@ class TestEditTranscription(FixtureAPITestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') + page = cls.corpus.elements.get(name='Volume 1, page 1r') + element = Element.objects.get_descending(page.id).filter(type__slug='word', name='PARIS').get() cls.line = cls.corpus.elements.filter(type__slug='text_line').first() - cls.ml_transcription = cls.page.transcriptions.get(text='PARIS') - cls.private_corpus = Corpus.objects.create(name='Private') - cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) + cls.ml_transcription = element.transcriptions.get(text='PARIS') + private_corpus = Corpus.objects.create(name='Private') + cls.private_page = private_corpus.elements.create(type=page.type) # Create an user with a read right only on the private corpus cls.private_read_user = User.objects.create_user('a@bc.de', 'a') cls.private_read_user.verified_email = True cls.private_read_user.save() - cls.private_corpus.corpus_right.create(user=cls.private_read_user) + private_corpus.corpus_right.create(user=cls.private_read_user) def setUp(self): self.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=False) @@ -63,7 +64,6 @@ class TestEditTranscription(FixtureAPITestCase): 'text': 'A manual transcription', 'type': 'line', 'worker_version_id': None, - 'zone': None }) def test_transcription_retrieve_ml(self): @@ -135,7 +135,6 @@ class TestEditTranscription(FixtureAPITestCase): 'text': 'a knight was living lonely', 'type': 'line', 'worker_version_id': None, - 'zone': None }) def test_transcription_patch_write_right(self): diff --git a/arkindex/documents/tests/test_entities_api.py b/arkindex/documents/tests/test_entities_api.py index b538a335a94eb484f4327d28300791827f10d2c1..b6e201e63a416ad39efeff54091ba853f4d29a8d 100644 --- a/arkindex/documents/tests/test_entities_api.py +++ b/arkindex/documents/tests/test_entities_api.py @@ -30,10 +30,11 @@ class TestEntitiesAPI(FixtureAPITestCase): cls.source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) cls.private_corpus = Corpus.objects.create(name='private') cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') + cls.element_type = cls.corpus.types.get(slug='text_line') def setUp(self): super().setUp() - self.page = self.corpus.elements.get(name='Volume 1, page 1r') self.entity = Entity.objects.create( type=EntityType.Person, corpus=self.corpus, @@ -56,17 +57,22 @@ class TestEntitiesAPI(FixtureAPITestCase): zone, _ = self.page.zone.image.zones.get_or_create( polygon=LinearRing((0, 0), (0, 42), (42, 42), (42, 0), (0, 0)) ) - self.transcription = self.page.transcriptions.create( - text='Some transcribed text', + self.element = self.corpus.elements.create( + type=self.element_type, + name='Transcription', zone=zone, - source_id=self.source.id, + source=self.source, + ) + self.transcription = self.element.transcriptions.create( + text='Some transcribed text', + source=self.source, type=TranscriptionType.Line, ) - self.metadata = self.page.metadatas.create( + self.metadata = self.element.metadatas.create( name='test 1', type=MetaType.Text, value='Blah', - element=self.page, + element=self.element, entity=self.entity, ) self.transcriptionentity = TranscriptionEntity.objects.create( @@ -91,19 +97,19 @@ class TestEntitiesAPI(FixtureAPITestCase): self.assertEqual(data['name'], self.entity.name) def test_get_entity_elements(self): - with self.assertNumQueries(9): + with self.assertNumQueries(8): response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() results = data['results'] self.assertEqual(len(results), 1) - self.assertEqual(results[0]['id'], str(self.page.id)) - self.assertEqual(results[0]['name'], self.page.name) + self.assertEqual(results[0]['id'], str(self.element.id)) + self.assertEqual(results[0]['name'], self.element.name) def test_get_entity_elements_corpus_acl(self): self.client.force_login(self.user) - self.page.corpus = self.private_corpus - self.page.save() + self.element.corpus = self.private_corpus + self.element.save() response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertListEqual(response.json().get('results'), []) @@ -115,10 +121,11 @@ class TestEntitiesAPI(FixtureAPITestCase): elt = self.corpus.elements.create( type=self.corpus.types.get(slug='act'), name='001', + zone=zone, ) - elt_tr = elt.transcriptions.create(source_id=self.source.id, text='goodbye', zone=zone) + elt_tr = elt.transcriptions.create(source_id=self.source.id, text='goodbye') TranscriptionEntity.objects.create(transcription=elt_tr, entity=self.entity, offset=42, length=7) - with self.assertNumQueries(9): + with self.assertNumQueries(8): response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -160,8 +167,8 @@ class TestEntitiesAPI(FixtureAPITestCase): 'url': 'http://server', } } - } if e.zone else None, - } for e in (elt, self.page)] + }, + } for e in (elt, self.element)] ) def test_get_role_in_corpus(self): @@ -405,8 +412,8 @@ class TestEntitiesAPI(FixtureAPITestCase): def test_create_transcription_entity_wrong_acl(self): self.client.force_login(self.user) - self.page.corpus = self.private_corpus - self.page.save() + self.element.corpus = self.private_corpus + self.element.save() response = self.client.post( reverse('api:transcription-entity-create', kwargs={'pk': str(self.transcription.id)}), data=self.tr_entities_sample, @@ -559,19 +566,19 @@ class TestEntitiesAPI(FixtureAPITestCase): """ List an element's entities sorted depending on their origin (metadata or transcriptions) """ - md = self.page.metadatas.create(name='some_metadata', type=MetaType.Location, value='something') + md = self.element.metadatas.create(name='some_metadata', type=MetaType.Location, value='something') md.entity = self.entity_bis md.save() with self.assertNumQueries(9): - response = self.client.get(reverse('api:element-entities', kwargs={'pk': str(self.page.id)})) + response = self.client.get(reverse('api:element-entities', kwargs={'pk': str(self.element.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertDictEqual( data, { - 'id': str(self.page.id), - 'type': self.page.type.slug, - 'name': self.page.name, + 'id': str(self.element.id), + 'type': self.element.type.slug, + 'name': self.element.name, 'transcriptions': [{ 'entity': { 'id': str(t.entity.id), @@ -617,14 +624,14 @@ class TestEntitiesAPI(FixtureAPITestCase): 'value': m.value, 'revision': m.revision, 'dates': [] - } for m in self.page.metadatas.exclude(entity=None)] + } for m in self.element.metadatas.exclude(entity=None)] } ) def test_list_element_entities_worker_version_validation(self): with self.assertNumQueries(1): response = self.client.get( - reverse('api:element-entities', kwargs={'pk': str(self.page.id)}), + reverse('api:element-entities', kwargs={'pk': str(self.element.id)}), data={'worker_version': 'blah'} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -632,7 +639,7 @@ class TestEntitiesAPI(FixtureAPITestCase): with self.assertNumQueries(2): response = self.client.get( - reverse('api:element-entities', kwargs={'pk': str(self.page.id)}), + reverse('api:element-entities', kwargs={'pk': str(self.element.id)}), data={'worker_version': str(uuid.uuid4())} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -645,7 +652,7 @@ class TestEntitiesAPI(FixtureAPITestCase): with self.assertNumQueries(6): response = self.client.get( - reverse('api:element-entities', kwargs={'pk': str(self.page.id)}), + reverse('api:element-entities', kwargs={'pk': str(self.element.id)}), data={'worker_version': str(self.worker_version.id)} ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -653,9 +660,9 @@ class TestEntitiesAPI(FixtureAPITestCase): self.assertDictEqual( response.json(), { - 'id': str(self.page.id), - 'name': 'Volume 1, page 1r', - 'type': 'page', + 'id': str(self.element.id), + 'name': 'Transcription', + 'type': 'text_line', 'transcriptions': [], 'metadata': [ { @@ -681,9 +688,9 @@ class TestEntitiesAPI(FixtureAPITestCase): ) def test_list_element_entities_wrong_acl(self): - self.page.corpus = self.private_corpus - self.page.save() - response = self.client.get(reverse('api:element-entities', kwargs={'pk': str(self.page.id)})) + self.element.corpus = self.private_corpus + self.element.save() + response = self.client.get(reverse('api:element-entities', kwargs={'pk': str(self.element.id)})) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @patch('arkindex.documents.api.entities.ESEntity') @@ -765,7 +772,7 @@ class TestEntitiesAPI(FixtureAPITestCase): def test_list_element_links(self): link = EntityLink.objects.create(parent=self.entity, child=self.entity_bis, role=self.role) - response = self.client.get(reverse('api:element-links', kwargs={'pk': str(self.page.id)})) + response = self.client.get(reverse('api:element-links', kwargs={'pk': str(self.element.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertListEqual( diff --git a/arkindex/documents/tests/test_ml_results.py b/arkindex/documents/tests/test_ml_results.py index e166cac4f377319adccb8214eb3facb0ef887b95..fa8ddd48c8c181407c65653e0179c291339f4ca0 100644 --- a/arkindex/documents/tests/test_ml_results.py +++ b/arkindex/documents/tests/test_ml_results.py @@ -49,7 +49,7 @@ class TestMLResults(FixtureTestCase): def test_element_stats(self): self.client.force_login(self.superuser) - self.assertEqual(self.page.transcriptions.count(), 4) + self.assertEqual(self.page.transcriptions.count(), 1) self.assertEqual(self.page.classifications.count(), 1) self.assertEqual(Entity.objects.filter(transcriptions__element=self.page).count(), 1) self.assertEqual(Entity.objects.filter(metadatas__element=self.page).count(), 1) @@ -66,7 +66,7 @@ class TestMLResults(FixtureTestCase): 'revision': '4.2', 'internal': False, 'classifications_count': 1, - 'transcriptions_count': 4, + 'transcriptions_count': 1, 'entities_count': 2, } ]) diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py index d30b8bf5105eed5d09e086e229c0ea1a4656ef11..26839b442a56988da09828f9745555876d87f860 100644 --- a/arkindex/documents/tests/test_search.py +++ b/arkindex/documents/tests/test_search.py @@ -98,7 +98,7 @@ class TestSearchAPI(FixtureAPITestCase): def test_element_transcription_search(self): elt = Element.objects.get(name="Volume 1, page 1r") - ts = Transcription.objects.filter(text="PARIS", zone__image__path='img1') + ts = Transcription.objects.filter(text="PARIS", element__zone__image__path='img1') self.es_mock.count.return_value = {'count': 1} self.es_mock.search.return_value = self.build_es_response( @@ -190,7 +190,7 @@ class TestSearchAPI(FixtureAPITestCase): def test_iiif_transcription_search(self): # Filter to only get transcriptions from volume 1 unfiltered = Transcription.objects.filter(text="PARIS") - expected = Transcription.objects.filter(text="PARIS", zone__image__path__in=['img1', 'img2', 'img3']) + expected = Transcription.objects.filter(text="PARIS", element__zone__image__path__in=['img1', 'img2', 'img3']) vol = Element.objects.get(name='Volume 1') self.es_mock.count.return_value = {'count': len(unfiltered)} diff --git a/arkindex/documents/tests/test_transcriptions.py b/arkindex/documents/tests/test_transcriptions.py index 2024377959a673cca1a94bcb54853db34cbe6cd0..e1d08bc7e2a6801ca55e0b7a2f7d94e4535438c2 100644 --- a/arkindex/documents/tests/test_transcriptions.py +++ b/arkindex/documents/tests/test_transcriptions.py @@ -1,7 +1,6 @@ from django.urls import reverse from rest_framework import status from arkindex.project.tests import FixtureAPITestCase -from django.contrib.gis.geos import LinearRing from arkindex_common.enums import TranscriptionType from arkindex_common.ml_tool import MLToolType from arkindex.documents.models import Corpus, DataSource @@ -52,116 +51,123 @@ class TestTranscriptions(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_list_element_transcriptions(self): - self.page.transcriptions.all().delete() - transcriptions = [] - for i in range(1, 11): - zone, _ = self.page.zone.image.zones.get_or_create( - polygon=LinearRing((0, 0), (0, i), (i, i), (i, 0), (0, 0)) - ) - # Create transcriptions on the page with their own zones - transcriptions.append(self.page.transcriptions.create( - source_id=self.src.id, type=TranscriptionType.Word, zone=zone, - )) + tr1 = self.page.transcriptions.get() + tr2 = self.page.transcriptions.create( + type=TranscriptionType.Word, + text='something', + worker_version=self.worker_version, + score=0.369, + ) self.client.force_login(self.user) - with self.assertNumQueries(10): + with self.assertNumQueries(11): response = self.client.get(reverse('api:element-transcriptions', kwargs={'pk': str(self.page.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) - results = response.json()['results'] - self.assertEqual(len(results), 10) - self.assertCountEqual( - [(tr['id'], tr['element']) for tr in results], - # Element should not be serialized in case recursive parameter is not set - [(str(tr.id), None) for tr in transcriptions] - ) + self.assertCountEqual(response.json()['results'], [ + { + 'id': str(tr1.id), + 'type': 'page', + 'text': 'Lorem ipsum dolor sit amet', + 'score': 1.0, + 'source': { + 'id': str(self.src.id), + 'type': 'recognizer', + 'slug': 'test', + 'name': 'Test Recognizer', + 'revision': '4.2', + 'internal': False, + }, + 'worker_version_id': None, + 'element': None, + }, + { + 'id': str(tr2.id), + 'type': 'word', + 'text': 'something', + 'score': 0.369, + 'source': None, + 'worker_version_id': str(self.worker_version.id), + 'element': None, + } + ]) def test_list_transcriptions_recursive(self): - for i in range(1, 5): - # Add 4 transcriptions on the page line - self.line.transcriptions.create(worker_version=self.worker_version, type=TranscriptionType.Line, text=f'Text {i}') - for i in range(1, 5): - # Add 4 transcribed line children - zone, _ = self.page.zone.image.zones.get_or_create( - polygon=LinearRing((0, 0), (0, i + 1), (i + 1, i + 1), (i + 1, 0), (0, 0)) - ) - line = self.page.corpus.elements.create(zone=zone, type=self.line.type, name=f'Added line {i}') - line.transcriptions.create(source_id=self.src.id, type=TranscriptionType.Line, text=f'Added text {i}') - line.add_parent(self.page) - self.client.force_login(self.user) - with self.assertNumQueries(14): + with self.assertNumQueries(13): response = self.client.get( reverse('api:element-transcriptions', kwargs={'pk': str(self.page.id)}), data={'recursive': 'true'} ) self.assertEqual(response.status_code, status.HTTP_200_OK) - results = response.json()['results'] - self.assertEqual(len(results), 12) - - page_polygon = [[0, 0], [0, 1000], [1000, 1000], [1000, 0], [0, 0]] - line_polygon = [[400, 400], [400, 500], [500, 500], [500, 400], [400, 400]] self.assertCountEqual( [ - (data['element']['type'], data['worker_version_id'], data['element']['zone']['polygon'], data['text']) - for data in results + (data['element']['type'], data['element']['zone']['polygon'], data['text']) + for data in response.json()['results'] ], [ - ('page', None, page_polygon, 'PARIS'), - ('page', None, page_polygon, 'ROY'), - ('page', None, page_polygon, 'Lorem ipsum dolor sit amet'), - ('page', None, page_polygon, 'DATUM'), - ('text_line', str(self.worker_version.id), line_polygon, 'Text 1'), - ('text_line', str(self.worker_version.id), line_polygon, 'Text 2'), - ('text_line', str(self.worker_version.id), line_polygon, 'Text 3'), - ('text_line', str(self.worker_version.id), line_polygon, 'Text 4'), - ('text_line', None, [[0, 0], [0, 2], [2, 2], [2, 0], [0, 0]], 'Added text 1'), - ('text_line', None, [[0, 0], [0, 3], [3, 3], [3, 0], [0, 0]], 'Added text 2'), - ('text_line', None, [[0, 0], [0, 4], [4, 4], [4, 0], [0, 0]], 'Added text 3'), - ('text_line', None, [[0, 0], [0, 5], [5, 5], [5, 0], [0, 0]], 'Added text 4') + ('page', [[0, 0], [0, 1000], [1000, 1000], [1000, 0], [0, 0]], 'Lorem ipsum dolor sit amet'), + ('word', [[100, 100], [100, 200], [200, 200], [200, 100], [100, 100]], 'PARIS'), + ('word', [[400, 400], [400, 500], [500, 500], [500, 400], [400, 400]], 'ROY'), + ('word', [[700, 700], [700, 800], [800, 800], [800, 700], [700, 700]], 'DATUM'), ] ) def test_list_transcriptions_recursive_filtered(self): - for i in range(1, 5): - # Add 4 transcriptions on the page line - self.line.transcriptions.create(source_id=self.src.id, type=TranscriptionType.Line, text=f'Text {i}') - self.client.force_login(self.user) - with self.assertNumQueries(12): + with self.assertNumQueries(13): response = self.client.get( reverse('api:element-transcriptions', kwargs={'pk': str(self.page.id)}), - data={'recursive': 'true', 'type': 'line'} + data={'recursive': 'true', 'type': 'word'} ) self.assertEqual(response.status_code, status.HTTP_200_OK) - results = response.json()['results'] - self.assertEqual(len(results), 4) - for tr in results: - self.assertEqual(tr.get('type'), 'line') + self.assertCountEqual( + [ + (data['element']['type'], data['element']['zone']['polygon'], data['text']) + for data in response.json()['results'] + ], [ + ('word', [[100, 100], [100, 200], [200, 200], [200, 100], [100, 100]], 'PARIS'), + ('word', [[400, 400], [400, 500], [500, 500], [500, 400], [400, 400]], 'ROY'), + ('word', [[700, 700], [700, 800], [800, 800], [800, 700], [700, 700]], 'DATUM'), + ] + ) def test_list_worker_version_transcriptions(self): - - for i in range(1, 5): - # Add 4 transcriptions on the page line with a specific worker_version - self.line.transcriptions.create( - type=TranscriptionType.Line, - text=f'Text {i}', - worker_version=self.worker_version - ) + worker_transcription = self.page.transcriptions.create( + type=TranscriptionType.Word, + text='something', + worker_version=self.worker_version, + score=0.369, + ) self.client.force_login(self.user) - with self.assertNumQueries(12): + with self.assertNumQueries(13): response = self.client.get( reverse('api:element-transcriptions', kwargs={'pk': str(self.page.id)}), data={'recursive': 'true', 'worker_version': str(self.worker_version.id)} ) self.assertEqual(response.status_code, status.HTTP_200_OK) - results = response.json()['results'] - self.assertEqual(len(results), 4) - for tr in results: - self.assertEqual(tr.get('worker_version_id'), str(self.worker_version.id)) + self.assertListEqual(response.json()['results'], [ + { + 'id': str(worker_transcription.id), + 'type': 'word', + 'text': 'something', + 'score': 0.369, + 'source': None, + 'worker_version_id': str(self.worker_version.id), + 'element': { + 'id': str(self.page.id), + 'name': 'Volume 1, page 1r', + 'type': 'page', + 'zone': { + 'id': str(self.page.zone_id), + 'polygon': [[0, 0], [0, 1000], [1000, 1000], [1000, 0], [0, 0]], + }, + }, + } + ]) diff --git a/arkindex/images/tests/test_image_elements.py b/arkindex/images/tests/test_image_elements.py index 9b5a138cba8f8d548218590a4f046b499cf60441..1aeb63f8276a26a93dbf2c96ab50a1d707aa8987 100644 --- a/arkindex/images/tests/test_image_elements.py +++ b/arkindex/images/tests/test_image_elements.py @@ -16,7 +16,7 @@ class TestImageElements(FixtureTestCase): data = response.json() self.assertListEqual( [element['name'] for element in data['results']], - ['Volume 1, page 1r', 'Surface A', 'Surface B', 'Text line'], + ['Volume 1, page 1r', 'Surface A', 'Surface B', 'Text line', 'DATUM', 'PARIS', 'ROY'], ) def test_image_elements_type_filter(self): diff --git a/arkindex/project/elastic.py b/arkindex/project/elastic.py index 8b8430c84ba0fe83cfa87a41f73272152c8b850f..b92db343c83548843061d91ca4c15936936069d9 100644 --- a/arkindex/project/elastic.py +++ b/arkindex/project/elastic.py @@ -107,22 +107,7 @@ class ESElement(Document): @classmethod def from_model(cls, instance): - from arkindex.documents.models import Element, Transcription - - if instance.zone: - transcriptions = instance.transcriptions.all() - else: - children = Element.objects \ - .get_descending(instance.id) \ - .filter(zone__isnull=False) \ - .prefetch_related('zone') - transcriptions = chain(*[ - Transcription.objects.filter( - zone__image_id=elt.zone.image_id, - zone__polygon__within=elt.zone.polygon - ) - for elt in children - ]) + from arkindex.documents.models import Element interpreted_dates = chain(*[md.get_dates() for md in instance.metadatas.all()]) date_range = { @@ -142,13 +127,16 @@ class ESElement(Document): element.name for element in Element.objects.get_ascending(instance.id) ], + # Filter using Python here as metadatas are prefetched entirely + # and applying .filter would make an unnecessary SQL query references=[ md.value.lower() - for md in instance.metadatas.filter(type=MetaType.Reference) + for md in instance.metadatas.all() + if md.type == MetaType.Reference ], transcriptions=list(map( ESTranscriptionInnerDoc.from_model, - transcriptions, + instance.transcriptions.all(), )), date_range=date_range, ) diff --git a/arkindex/project/tests/test_elastic.py b/arkindex/project/tests/test_elastic.py index 9db3b5a55b13e5b070a30217cd2032ecfd77451d..9f38e1d749192e63b1ad9727f6676f05111c23ff 100644 --- a/arkindex/project/tests/test_elastic.py +++ b/arkindex/project/tests/test_elastic.py @@ -61,7 +61,6 @@ class TestESDocuments(FixtureAPITestCase): surface.transcriptions.create( type=TranscriptionType.Word, text='invisible transcription', - zone=page.zone.image.zones.create(polygon=[(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]), source=DataSource.objects.get(slug='test', type=MLToolType.Recognizer), ) texts = [tr['text'] for tr in ESElement.from_model(page).to_dict()['transcriptions']]