From dcfe32caccabc76e1d76f611799ce92abea21c11 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 27 Nov 2020 17:21:00 +0100 Subject: [PATCH] Drop DataSource --- README.md | 1 - .../migrations/0007_worker_workerversion.py | 5 +- .../dataimport/tests/test_process_elements.py | 38 +- arkindex/documents/admin.py | 11 +- arkindex/documents/api/elements.py | 5 +- arkindex/documents/api/ml.py | 138 +- arkindex/documents/api/search.py | 2 +- arkindex/documents/fixtures/data.json | 1481 ++++++++--------- .../management/commands/build_fixtures.py | 113 +- arkindex/documents/migrations/0001_initial.py | 4 +- .../migrations/0013_datasource_type_length.py | 6 +- .../migrations/0024_migrate_datasource.py | 113 ++ .../migrations/0025_drop_datasource.py | 52 + arkindex/documents/models.py | 59 +- arkindex/documents/search.py | 4 +- arkindex/documents/serializers/elements.py | 5 +- arkindex/documents/serializers/entities.py | 4 - arkindex/documents/serializers/ml.py | 49 +- arkindex/documents/tasks.py | 78 - .../documents/tests/commands/test_reindex.py | 7 +- .../tests/tasks/test_corpus_delete.py | 25 +- .../tests/tasks/test_ml_results_delete.py | 169 -- .../documents/tests/tasks/test_reindex.py | 13 +- .../tests/test_bulk_classification.py | 21 +- .../tests/test_bulk_element_transcriptions.py | 12 +- arkindex/documents/tests/test_classes.py | 71 +- .../documents/tests/test_create_elements.py | 4 - .../tests/test_create_transcriptions.py | 6 +- arkindex/documents/tests/test_datasource.py | 17 - .../tests/test_edit_transcriptions.py | 32 +- arkindex/documents/tests/test_entities.py | 14 +- arkindex/documents/tests/test_entities_api.py | 136 +- arkindex/documents/tests/test_indexer.py | 10 +- arkindex/documents/tests/test_manifest.py | 10 +- arkindex/documents/tests/test_metadata.py | 36 +- arkindex/documents/tests/test_ml_results.py | 225 --- arkindex/documents/tests/test_moderation.py | 159 +- .../documents/tests/test_parents_elements.py | 4 +- .../documents/tests/test_patch_elements.py | 4 +- .../documents/tests/test_retrieve_elements.py | 21 +- arkindex/documents/tests/test_search.py | 9 +- .../documents/tests/test_transcriptions.py | 29 +- arkindex/project/api_v1.py | 4 - arkindex/project/openapi/patch.yml | 10 - arkindex/project/tests/test_elastic.py | 5 +- arkindex/project/triggers.py | 40 - .../sql_validation/element_trash_children.sql | 3 - .../sql_validation/element_trash_deep.sql | 5 - .../sql_validation/element_trash_ml_class.sql | 4 +- .../element_trash_no_children.sql | 1 - 50 files changed, 1164 insertions(+), 2110 deletions(-) create mode 100644 arkindex/documents/migrations/0024_migrate_datasource.py create mode 100644 arkindex/documents/migrations/0025_drop_datasource.py delete mode 100644 arkindex/documents/tests/tasks/test_ml_results_delete.py delete mode 100644 arkindex/documents/tests/test_datasource.py delete mode 100644 arkindex/documents/tests/test_ml_results.py diff --git a/README.md b/README.md index 6173d8b887..715d73f69d 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,6 @@ You may want to also uninstall `django-nose`, as it is an optional test runner t We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org/project/django-rq/), to run tasks without blocking an API request or causing timeouts. To call them in Python code, you should use the trigger methods in `arkindex.project.triggers`; those will do some safety checks to make catching some errors easier in dev. The actual tasks are in `arkindex.documents.tasks`. The following tasks exist: -* Delete ML results from a corpus or an element and its children: `ml_results_delete` * Delete a corpus: `corpus_delete` * Reindex elements, transcriptions or entities into ElasticSearch: `reindex_start` diff --git a/arkindex/dataimport/migrations/0007_worker_workerversion.py b/arkindex/dataimport/migrations/0007_worker_workerversion.py index 0f7d0fc81d..8190e417d9 100644 --- a/arkindex/dataimport/migrations/0007_worker_workerversion.py +++ b/arkindex/dataimport/migrations/0007_worker_workerversion.py @@ -1,14 +1,13 @@ # Generated by Django 2.2.11 on 2020-05-19 14:55 import uuid +from enum import Enum import django.contrib.postgres.fields.jsonb import django.db.models.deletion import enumfields.fields from django.db import migrations, models -import arkindex_common.ml_tool - class Migration(migrations.Migration): @@ -23,7 +22,7 @@ class Migration(migrations.Migration): ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), ('name', models.CharField(max_length=100)), ('slug', models.CharField(max_length=100)), - ('type', enumfields.fields.EnumField(enum=arkindex_common.ml_tool.MLToolType, max_length=50)), + ('type', enumfields.fields.EnumField(enum=Enum('MLToolType', ''), max_length=50)), ('repository', models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, related_name='workers', diff --git a/arkindex/dataimport/tests/test_process_elements.py b/arkindex/dataimport/tests/test_process_elements.py index c0bc419ebe..bfc367349c 100644 --- a/arkindex/dataimport/tests/test_process_elements.py +++ b/arkindex/dataimport/tests/test_process_elements.py @@ -3,8 +3,8 @@ import uuid from django.urls import reverse from rest_framework import status -from arkindex.dataimport.models import DataImport, DataImportMode -from arkindex.documents.models import Classification, ClassificationState, Corpus, DataSource, Element, MLClass +from arkindex.dataimport.models import DataImport, DataImportMode, WorkerVersion +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase @@ -127,44 +127,44 @@ class TestProcessElements(FixtureAPITestCase): cls.line_5.add_parent(cls.page_5) # Create best classes - source = DataSource.objects.first() - cls.coffee_source = MLClass.objects.create(name='C0FFEE', corpus=cls.private_corpus) - cls.food_source = MLClass.objects.create(name='F00D', corpus=cls.private_corpus) + worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.coffee_class = MLClass.objects.create(name='C0FFEE', corpus=cls.private_corpus) + cls.food_class = MLClass.objects.create(name='F00D', corpus=cls.private_corpus) Classification.objects.create( element=cls.folder_2, state=ClassificationState.Validated, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_1, state=ClassificationState.Validated, - ml_class=cls.coffee_source, - source=source + ml_class=cls.coffee_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_2, high_confidence=True, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_3, state=ClassificationState.Validated, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_5, high_confidence=True, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_5, state=ClassificationState.Validated, - ml_class=cls.coffee_source , - source=source + ml_class=cls.coffee_class, + worker_version=worker_version, ) def setUp(self): @@ -323,7 +323,7 @@ class TestProcessElements(FixtureAPITestCase): ]) def test_filter_best_class_by_id(self): - self.dataimport.best_class = self.food_source.id + self.dataimport.best_class = self.food_class.id self.dataimport.save() elements = [self.page_5, self.page_3, self.folder_2, self.page_2] @@ -472,7 +472,7 @@ class TestProcessElements(FixtureAPITestCase): ]) def test_load_children_and_filter_best_class_by_id(self): - self.dataimport.best_class = self.food_source.id + self.dataimport.best_class = self.food_class.id self.dataimport.load_children = True self.dataimport.save() elements = [self.folder_2, self.page_2, self.page_3, self.page_5] diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py index 3286682ba9..d4aa5ebdf0 100644 --- a/arkindex/documents/admin.py +++ b/arkindex/documents/admin.py @@ -7,7 +7,6 @@ from arkindex.documents.models import ( AllowedMetaData, Classification, Corpus, - DataSource, Element, ElementType, Entity, @@ -34,12 +33,6 @@ class CorpusAdmin(admin.ModelAdmin): return False -class DataSourceAdmin(admin.ModelAdmin): - list_display = ('id', 'type', 'slug', 'revision', 'internal') - list_filter = [('type', EnumFieldListFilter), 'internal'] - readonly_fields = ('id', ) - - class ClassificationInline(admin.TabularInline): model = Classification readonly_fields = ('confidence', 'high_confidence', ) @@ -85,8 +78,7 @@ class ElementAdmin(admin.ModelAdmin): class TranscriptionAdmin(admin.ModelAdmin): list_display = ('id', 'text', 'score', 'element', ) - list_filter = ['source'] - fields = ('id', 'text', 'score', 'element', 'source', ) + fields = ('id', 'text', 'score', 'element', ) readonly_fields = ('id', ) raw_id_fields = ('element', ) @@ -125,7 +117,6 @@ class EntityRoleAdmin(admin.ModelAdmin): admin.site.register(Corpus, CorpusAdmin) -admin.site.register(DataSource, DataSourceAdmin) admin.site.register(Element, ElementAdmin) admin.site.register(Transcription, TranscriptionAdmin) admin.site.register(MLClass, MLClassAdmin) diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 5ed7d8dd3c..db1092b952 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -58,7 +58,7 @@ from arkindex.project.permissions import IsAuthenticated, IsVerified, IsVerified from arkindex.project.tools import BulkMap from arkindex.project.triggers import corpus_delete, element_trash -classifications_queryset = Classification.objects.select_related('ml_class', 'source').order_by('-confidence') +classifications_queryset = Classification.objects.select_related('ml_class', 'worker_version').order_by('-confidence') best_classifications_prefetch = Prefetch( 'classifications', @@ -884,8 +884,9 @@ class ElementTranscriptions(ListAPIView): self.check_object_permissions(self.request, element) # ORDER BY casting IDs as char to avoid the PostgreSQL optimizer's inefficient scan + # TODO: See if select_related is faster than a prefetch on this endpoint queryset = Transcription.objects \ - .prefetch_related('element__zone__image__server', 'source') \ + .prefetch_related('element__zone__image__server', 'worker_version') \ .annotate(char_id=Cast('id', output_field=CharField())) \ .order_by('char_id') diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index bd46eb8a71..ce49c4f205 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -11,7 +11,6 @@ from rest_framework.generics import ( GenericAPIView, ListAPIView, ListCreateAPIView, - RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, ) from rest_framework.response import Response @@ -20,10 +19,8 @@ from arkindex.documents.models import ( Classification, ClassificationState, Corpus, - DataSource, Element, ElementPath, - Entity, MLClass, Right, Transcription, @@ -36,7 +33,6 @@ from arkindex.documents.serializers.ml import ( ClassificationsSelectionSerializer, ClassificationsSerializer, CountMLClassSerializer, - DataSourceStatsSerializer, ElementTranscriptionsBulkSerializer, TranscriptionBulkSerializer, TranscriptionCreateSerializer, @@ -45,9 +41,8 @@ from arkindex.documents.serializers.ml import ( from arkindex.images.models import Zone from arkindex.project.filters import SafeSearchFilter from arkindex.project.mixins import CorpusACLMixin, DeprecatedMixin, SelectionMixin -from arkindex.project.permissions import IsAdminUser, IsVerified, IsVerifiedOrReadOnly -from arkindex.project.triggers import ml_results_delete, reindex_start -from arkindex_common.ml_tool import MLToolType +from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly +from arkindex.project.triggers import reindex_start logger = logging.getLogger(__name__) @@ -134,10 +129,9 @@ class TranscriptionEdit(RetrieveUpdateDestroyAPIView): rights = transcription.element.corpus.get_acl_rights(request.user) errors = defaultdict(list) - non_manual_transcription = bool(transcription.worker_version or transcription.source and transcription.source.slug != 'manual') if Right.Write not in rights: errors['__all__'].append('A write access to transcription element corpus is required.') - if Right.Admin not in rights and non_manual_transcription: + if Right.Admin not in rights and transcription.worker_version_id: errors['__all__'].append('Only admins can edit non-manual transcription.') if (errors): raise PermissionDenied(errors) @@ -460,20 +454,12 @@ class ManageClassificationsSelection(SelectionMixin, CorpusACLMixin, CreateAPIVi def create(self, corpus, request, *args, **kwargs): ml_class = MLClass.objects.filter(id=request.data['ml_class']).first() - data_source, _ = DataSource.objects.get_or_create( - type=MLToolType.Classifier, - slug='manual', - defaults={ - 'revision': '', - 'internal': False, - } - ) elements = self.get_selection(corpus.id) existing_element_ids = set(Classification.objects.filter( element_id__in=elements, - source_id=data_source.id, - ml_class_id=ml_class.id + ml_class_id=ml_class.id, + worker_version_id=None, ).values_list('element_id', flat=True)) classifications = [] @@ -481,7 +467,6 @@ class ManageClassificationsSelection(SelectionMixin, CorpusACLMixin, CreateAPIVi classifications.append(Classification( element=element, ml_class=ml_class, - source=data_source, moderator=self.request.user, state=ClassificationState.Validated, high_confidence=False, @@ -535,11 +520,7 @@ class ClassificationReject(ClassificationModerationActionsMixin): def put(self, request, *args, **kwargs): instance = self.get_object() - manual = ( - instance.source and instance.source.slug == 'manual' - or not instance.source and not instance.worker_version - ) - if manual: + if not instance.worker_version_id: # Delete manual classifications upon rejection instance.delete() return Response(None, status=status.HTTP_204_NO_CONTENT) @@ -549,110 +530,3 @@ class ClassificationReject(ClassificationModerationActionsMixin): instance.save(update_fields=['moderator', 'state']) serializer = self.get_serializer(instance) return Response(serializer.data, status=status.HTTP_200_OK) - - -class MLStatsBase(object): - serializer_class = DataSourceStatsSerializer - permission_classes = (IsAdminUser, ) - # Make DRF understand we return an unpaginated list, for OpenAPI schema generation - action = 'list' - pagination_class = None - - def get_count_querysets(self, instance): - """ - Given an object returned by Django REST Framework's get_object, - should return a dict mapping attribute names to querysets - """ - if isinstance(instance, Element): - if not instance.type.folder: - return { - 'transcriptions_count': Transcription.objects.filter(element_id=instance.id), - 'entities_count': Entity.objects.filter( - Q(transcriptions__element_id=instance.id) - | Q(metadatas__element_id=instance.id) - ), - 'classifications_count': Classification.objects.filter(element_id=instance.id), - } - # The folder AND its children - elements = Element.objects.filter(id=instance.id).values('id').union( - # Disable ordering here because we do not need it and it adds an extra column, - # causing the UNION to fail - Element.objects.get_descending(instance.id).order_by().values('id') - ) - elif isinstance(instance, Corpus): - elements = instance.elements.all() - else: - raise ValueError('Instance is not a corpus or an element: {}'.format(instance)) - - return { - 'transcriptions_count': Transcription.objects.filter(element__in=elements), - 'entities_count': Entity.objects.filter( - Q(transcriptions__element__in=elements) - | Q(metadatas__element__in=elements) - ), - 'classifications_count': Classification.objects.filter(element__in=elements), - } - - def get_counts(self): - count_querysets = self.get_count_querysets(self.get_object()) - - # A dict that links source IDs to another dict holding their stats: - # {id: {transcriptions_count: 42, …}, …} - counts = defaultdict(dict) - - # Request statistics for each kind of ML result, grouped by source ID: lowers the need for any joins or unions - for field_name, queryset in count_querysets.items(): - queryset = queryset.values('source_id').annotate(count=Count('id')).values_list('source_id', 'count') - for source_id, count in queryset: - counts[source_id][field_name] = count - - # Fetch the source IDs returned by the previous queries, - # and set the counts as if they were usual queryset annotations. - sources = list(DataSource.objects.filter(id__in=counts.keys()).order_by('name')) - for source in sources: - for name, value in counts[source.id].items(): - setattr(source, name, value) - - return sources - - def get_serializer(self, *args, **kwargs): - # Force the serializer to work as a list, except when get_serializer is called without arguments - # because the OpenAPI schema generator needs a normal serializer - if args or kwargs: - kwargs['many'] = True - return super().get_serializer(*args, **kwargs) - - def retrieve(self, *args, **kwargs): - serializer = self.get_serializer(self.get_counts()) - return Response(serializer.data) - - -class ElementMLStats(MLStatsBase, RetrieveDestroyAPIView): - openapi_overrides = { - 'operationId': 'RetrieveElementMLStats', - 'description': 'List machine learning result sources along with their result counts for an element', - 'tags': ['ml'], - } - - def get_queryset(self): - return Element.objects.filter(corpus__in=Corpus.objects.readable(self.request.user)).select_related('type') - - def destroy(self, *args, **kwargs): - ml_results_delete(element=self.get_object(), user_id=self.request.user.id) - return Response(status=status.HTTP_204_NO_CONTENT) - - -class CorpusMLStats(MLStatsBase, RetrieveDestroyAPIView): - openapi_overrides = { - 'operationId': 'RetrieveCorpusMLStats', - 'description': 'List machine learning results sources along with their result counts ' - 'for all elements in a corpus', - 'tags': ['ml'], - } - - def get_queryset(self): - return Corpus.objects.readable(self.request.user).only('id') - - def destroy(self, *args, **kwargs): - ml_results_delete(corpus=self.get_object(), user_id=self.request.user.id) - return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/documents/api/search.py b/arkindex/documents/api/search.py index 756f020c0c..eeda073222 100644 --- a/arkindex/documents/api/search.py +++ b/arkindex/documents/api/search.py @@ -25,7 +25,7 @@ class ElementSearch(SearchAPIView): 'operationId': 'SearchElements', 'security': [], 'description': 'Get a list of elements with their parents, the total number of transcriptions ' - 'in each element, and a few (not all) of their transcriptions, with their source, ' + 'in each element, and a few (not all) of their transcriptions, with their worker version, ' 'type, zone and image, for a given query.', 'tags': ['search'], } diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json index 70becd0f67..215284260f 100644 --- a/arkindex/documents/fixtures/data.json +++ b/arkindex/documents/fixtures/data.json @@ -1,986 +1,925 @@ [ { "model": "dataimport.repository", - "pk": "32416126-e127-4b7a-8042-ad9a47eb9d78", + "pk": "85c68a03-b624-4e39-93c0-4499b3308e00", "fields": { - "url": "http://gitlab/repo", - "type": "iiif", - "hook_token": "hook-token", - "credentials": "e0910bb1-91f7-4672-a51f-888ad5173fdd", + "url": "http://my_repo.fake/workers/worker", + "type": "worker", + "hook_token": "worker-hook-token", + "credentials": "8d60ea1e-7df3-4d07-9f1f-422e9b0d0bba", "provider_name": "GitLabProvider" } }, { "model": "dataimport.repository", - "pk": "4730656e-3cda-4bd7-b6f8-73f8aec667c4", + "pk": "e2bc896a-699d-419c-b6c1-ddf5abee8c65", "fields": { - "url": "http://my_repo.fake/workers/worker", - "type": "worker", - "hook_token": "worker-hook-token", - "credentials": "e0910bb1-91f7-4672-a51f-888ad5173fdd", + "url": "http://gitlab/repo", + "type": "iiif", + "hook_token": "hook-token", + "credentials": "8d60ea1e-7df3-4d07-9f1f-422e9b0d0bba", "provider_name": "GitLabProvider" } }, { "model": "dataimport.revision", - "pk": "a71737e4-3b10-4c2f-92ba-4d23c99459e5", + "pk": "002c185c-c6ce-4aa0-b5ee-a894588ae4d2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "32416126-e127-4b7a-8042-ad9a47eb9d78", - "hash": "42", - "message": "a", - "author": "me" + "repo": "85c68a03-b624-4e39-93c0-4499b3308e00", + "hash": "1337", + "message": "My w0rk3r", + "author": "Test user" } }, { "model": "dataimport.revision", - "pk": "fe788ef9-0e48-4a1e-90e8-e2cbbfe3367f", + "pk": "4b14fe17-c4a7-4a5a-b494-329ff26ec0b4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "4730656e-3cda-4bd7-b6f8-73f8aec667c4", - "hash": "1337", - "message": "My w0rk3r", - "author": "Test user" + "repo": "e2bc896a-699d-419c-b6c1-ddf5abee8c65", + "hash": "42", + "message": "a", + "author": "me" } }, { "model": "dataimport.worker", - "pk": "1af12340-dd0c-4cc4-8bb6-b0d9da01e58b", + "pk": "7bb37217-42dc-4e41-bebc-41b008c2ea4b", "fields": { - "name": "Document layout analyser", - "slug": "dla", - "type": "dla", - "repository": "4730656e-3cda-4bd7-b6f8-73f8aec667c4" + "name": "Recognizer", + "slug": "reco", + "type": "recognizer", + "repository": "85c68a03-b624-4e39-93c0-4499b3308e00" } }, { "model": "dataimport.worker", - "pk": "29026552-c9ed-4c8b-8b5f-84cc5ae18d2b", + "pk": "8bd87722-06cb-461d-95cd-0b6a3eb7d23b", "fields": { - "name": "Recognizer", - "slug": "reco", - "type": "recognizer", - "repository": "4730656e-3cda-4bd7-b6f8-73f8aec667c4" + "name": "Document layout analyser", + "slug": "dla", + "type": "dla", + "repository": "85c68a03-b624-4e39-93c0-4499b3308e00" } }, { "model": "dataimport.workerversion", - "pk": "9ce4bb8a-37ce-4a1d-84c8-55b23da22377", + "pk": "65ef6400-06c1-4d81-a8fb-11f3de0cc9ed", "fields": { - "worker": "1af12340-dd0c-4cc4-8bb6-b0d9da01e58b", - "revision": "fe788ef9-0e48-4a1e-90e8-e2cbbfe3367f", + "worker": "8bd87722-06cb-461d-95cd-0b6a3eb7d23b", + "revision": "002c185c-c6ce-4aa0-b5ee-a894588ae4d2", "configuration": { "test": 42 }, "state": "available", - "docker_image": "2f612d65-6b97-4f7d-a91b-a901a88624e1", + "docker_image": "2a2343d0-4535-4439-8685-07811b1e4e7a", "docker_image_iid": null } }, { "model": "dataimport.workerversion", - "pk": "ae5a0896-a8d2-427b-be14-fd9130823851", + "pk": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", "fields": { - "worker": "29026552-c9ed-4c8b-8b5f-84cc5ae18d2b", - "revision": "fe788ef9-0e48-4a1e-90e8-e2cbbfe3367f", + "worker": "7bb37217-42dc-4e41-bebc-41b008c2ea4b", + "revision": "002c185c-c6ce-4aa0-b5ee-a894588ae4d2", "configuration": { "test": 42 }, "state": "available", - "docker_image": "2f612d65-6b97-4f7d-a91b-a901a88624e1", + "docker_image": "2a2343d0-4535-4439-8685-07811b1e4e7a", "docker_image_iid": null } }, { "model": "documents.corpus", - "pk": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", + "pk": "f0c8bfd9-da72-4219-ad07-e9391735af2f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "name": "Unit Tests", "description": "", - "repository": "32416126-e127-4b7a-8042-ad9a47eb9d78", + "repository": "e2bc896a-699d-419c-b6c1-ddf5abee8c65", "public": true } }, { "model": "documents.elementtype", - "pk": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", + "pk": "276c67e2-5400-46c1-a001-5ed5bb2848ec", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "word", - "display_name": "Word", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "page", + "display_name": "Page", "folder": false } }, { "model": "documents.elementtype", - "pk": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", + "pk": "318a7ab3-c1d8-458a-be1f-9802cbfdbbfa", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "surface", - "display_name": "Surface", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "text_line", + "display_name": "Line", "folder": false } }, { "model": "documents.elementtype", - "pk": "d64f4c8d-2d15-4965-b0e2-54397dfcc0cb", + "pk": "7d154447-65c6-4050-895f-e76bdc847fb0", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "volume", - "display_name": "Volume", - "folder": true + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "word", + "display_name": "Word", + "folder": false } }, { "model": "documents.elementtype", - "pk": "baef17a0-86b7-4777-8f9c-c1a2512c42c5", + "pk": "9a72b609-e7b4-48e9-9f9b-cd9f6a548855", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "text_line", - "display_name": "Line", - "folder": false + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "volume", + "display_name": "Volume", + "folder": true } }, { "model": "documents.elementtype", - "pk": "c5d070a5-7344-4de4-812b-28f2e3d8a970", + "pk": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "page", - "display_name": "Page", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "act", + "display_name": "Act", "folder": false } }, { "model": "documents.elementtype", - "pk": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", + "pk": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "slug": "act", - "display_name": "Act", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "slug": "surface", + "display_name": "Surface", "folder": false } }, { "model": "documents.elementpath", - "pk": "14cbb024-f6b2-4371-8933-42f6d17a86b5", + "pk": "07f0e206-6c31-409d-8b2e-95a9bc4432d3", "fields": { - "element": "8200fd63-c7e8-4724-8dc4-04b8168dd243", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", + "element": "8f4cebfe-268b-447b-bb4b-090639f0aa53", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "16c1910f-64ae-40bc-92d4-0f2f537ced90", + "pk": "0bf44a5f-74c6-4bdf-be9a-3f72339c5d09", "fields": { - "element": "4a780492-5a8b-437e-a002-f1572dd7520d", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 0 + "element": "2b4f0ab6-9651-4357-b79a-7a5f07a95445", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"c82a340b-fadb-43c2-b601-46fc33afe3f3\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "18c9eecf-1b1b-4dd6-be56-cacde3f44dab", + "pk": "24f3e858-ff59-4b51-927b-37fa1e960f77", "fields": { - "element": "2a5a1104-09a6-4189-9a7e-e306de03834a", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"8200fd63-c7e8-4724-8dc4-04b8168dd243\"]", - "ordering": 2 + "element": "ea77ce48-4060-4f92-996a-3fcb19ea65c0", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"8f4cebfe-268b-447b-bb4b-090639f0aa53\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "2840f5ec-5b0c-4d3e-bd16-41e77f1e39b5", + "pk": "2a980695-ad68-47e5-bf6a-b23f1a609705", "fields": { - "element": "c66b0527-75ee-455e-a280-075bae7fb61a", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"8200fd63-c7e8-4724-8dc4-04b8168dd243\"]", + "element": "977dfca9-9a51-4ce1-bdb8-9e6ebfe4777b", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"1cd34035-2869-473f-9d5f-9fd57b69172e\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "287df024-4b40-43c1-b38c-b1c4dcb37884", + "pk": "33226b07-b4c2-453d-ac23-392b42966c05", "fields": { - "element": "13cf31d6-5d9a-43c0-ade0-05fe0933cfcc", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"127daf48-f035-446f-bc3c-926b386c1fd5\"]", + "element": "f3045910-bae8-480f-ac94-663d04d6668b", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "304d552e-687c-4d84-b640-4f18787f67cf", + "pk": "3deffa63-7f80-4447-b74f-15565b27c635", "fields": { - "element": "c3fd6c14-6714-4e39-bff5-21def7c76065", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"8200fd63-c7e8-4724-8dc4-04b8168dd243\"]", - "ordering": 0 + "element": "400c09f2-90ca-4894-aff2-a1b02fd6fd71", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "31459c9e-21d3-418e-8ab6-38627d61a100", + "pk": "4a9ba149-88a9-4087-bcf2-79e5e6fae1c3", "fields": { - "element": "829eec34-a36f-4cf1-a385-27dfc59efbeb", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 3 + "element": "c82a340b-fadb-43c2-b601-46fc33afe3f3", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "338f4573-6cd3-4b60-bf8d-cfea7e113423", + "pk": "4ad41a30-33cc-41dc-9a82-8ec8bce98247", "fields": { - "element": "ffa127a6-82f4-41f6-9041-2b6cb3bd27d6", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"b2401d97-cff8-45fd-a859-9ff72f5af322\"]", - "ordering": 0 + "element": "f1124a61-1c88-48f2-842d-d20fb61480c2", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"8f4cebfe-268b-447b-bb4b-090639f0aa53\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "39d1c344-1fc2-48b1-8be1-53882a0abef3", + "pk": "4ffa6f17-9d7c-46a3-b79b-099cc9c73399", "fields": { - "element": "127daf48-f035-446f-bc3c-926b386c1fd5", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 2 + "element": "c62a1c87-dfd3-405c-87b6-2a975965937c", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"400c09f2-90ca-4894-aff2-a1b02fd6fd71\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "3d431978-8257-406d-8dce-ca7b766d2d0b", + "pk": "5c2b96ad-7e5c-4407-bbfc-aba07bbf7d7d", "fields": { - "element": "d843555c-4b00-486d-8fca-8f2bcf22bf51", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"17770006-9a53-410c-b2e8-4fd662d97f39\"]", - "ordering": 0 + "element": "87a88cea-b07b-4a9b-b2d1-236fb522c61e", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"f3045910-bae8-480f-ac94-663d04d6668b\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "4df49c4c-c08e-4872-9a71-f1eed968cadb", + "pk": "6ad8eba7-9072-49a2-ab47-6b3a8462f697", "fields": { - "element": "30dda265-4638-4e93-9ca9-c18da4b4b067", - "path": "[\"b1ef0e22-d9e5-4506-b9b8-ae04c0f0e9c9\"]", + "element": "f70ccd9d-c6e6-48da-9418-38df1684c73a", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"f3045910-bae8-480f-ac94-663d04d6668b\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "52cc2f64-afd4-44b4-a6ef-03b0c0b99f19", + "pk": "82712ef3-4f56-4281-92c0-6034109ad574", "fields": { - "element": "fa3cfa49-01ea-4ab7-a5e0-dcc129289864", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"e0b3c9f2-753a-402a-8923-dfae7c630406\"]", - "ordering": 2 + "element": "c26ee257-d271-48ff-a623-f31b2f2d126d", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"b241b2ad-b7c1-460a-94aa-2e15b294f68d\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "57b40d4a-41fd-49e8-8183-b5f8218c4040", + "pk": "a39fb463-b693-4a92-a3a3-d79844750b21", "fields": { - "element": "a43644ab-cd3d-4463-b237-f54b642fb747", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"17770006-9a53-410c-b2e8-4fd662d97f39\"]", - "ordering": 1 + "element": "1cd34035-2869-473f-9d5f-9fd57b69172e", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", + "ordering": 4 } }, { "model": "documents.elementpath", - "pk": "5ab81995-b3af-4b59-b327-11ae4510b81b", + "pk": "a4d45dfb-94e0-437d-9fb4-ab78b88eee5a", "fields": { - "element": "074c4f63-f6e2-4f0f-a5b3-8c9afc896a91", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"4a780492-5a8b-437e-a002-f1572dd7520d\"]", + "element": "806badbb-ac1b-42c1-8ca7-2c5bc02284ac", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"8f4cebfe-268b-447b-bb4b-090639f0aa53\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "7229f4d8-8082-4bb4-aaf0-0afc9b9f63a1", + "pk": "aa9d9a37-fca5-48e8-98e0-fa883fde13ee", "fields": { - "element": "de95d376-b6a6-4145-8741-d22867d8f4ad", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"e0b3c9f2-753a-402a-8923-dfae7c630406\"]", - "ordering": 1 + "element": "5d827922-3335-4aa4-8f77-08d393caa177", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"72acbc89-a61e-430c-beca-c81e3fd5394f\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "7674d247-d576-49fa-a5bf-ee217b38acc9", + "pk": "bf641e06-5886-47b9-b47c-8c5b36ed1488", "fields": { - "element": "791f6f70-022c-4675-835b-d1b42a74ebca", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"8200fd63-c7e8-4724-8dc4-04b8168dd243\"]", - "ordering": 1 + "element": "2fe024a5-7fa3-421f-a01e-5f6958d11c86", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"8f4cebfe-268b-447b-bb4b-090639f0aa53\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "837569e5-bace-4e37-a0e4-4f35db89e8bb", + "pk": "c70e78af-c7f9-4570-8943-4366e1ad4731", "fields": { - "element": "83e43013-f987-4374-ad1b-4187245846e5", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 2 + "element": "7a10db08-4bd0-4fc6-b3ed-7a797287fdfc", + "path": "[\"a38bdddb-77c2-4a3e-906e-5a58e2cb6d10\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "899b8423-5c2f-418a-967f-d4ab41b27e09", + "pk": "c77ff79f-830b-4a40-989a-aab4f1b027b9", "fields": { - "element": "d638b6a9-33fe-47dc-9bd4-f985d912fd20", - "path": "[\"b1ef0e22-d9e5-4506-b9b8-ae04c0f0e9c9\"]", + "element": "9611a73c-efcc-4725-918b-00789a8b1dea", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"b241b2ad-b7c1-460a-94aa-2e15b294f68d\"]", "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "a437870d-8142-438c-8702-b15e9d3db734", + "pk": "cf00a83b-5a1c-4974-a917-edadb05b9786", "fields": { - "element": "b2401d97-cff8-45fd-a859-9ff72f5af322", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 4 + "element": "49712988-9240-4cc8-8107-9c5162e69247", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"c82a340b-fadb-43c2-b601-46fc33afe3f3\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "ac98ff1a-1985-48c0-9474-0c4f959d04f2", + "pk": "d0cc98f6-59ec-409f-a3da-e6e5c7bcde70", "fields": { - "element": "17770006-9a53-410c-b2e8-4fd662d97f39", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", + "element": "bd75bf54-7f70-4e38-a43b-9a051f0a235d", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"b241b2ad-b7c1-460a-94aa-2e15b294f68d\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "af214327-54bd-406f-92b4-6d42b9b9b504", + "pk": "def02316-8b8d-4d4d-965d-b8aba1756903", "fields": { - "element": "857c1df7-e89b-4329-a3d5-01585dd4aa94", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"83e43013-f987-4374-ad1b-4187245846e5\"]", - "ordering": 0 + "element": "90e40bfb-e603-425f-934a-f890d3318ff2", + "path": "[\"a38bdddb-77c2-4a3e-906e-5a58e2cb6d10\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "bd13c121-11f3-4812-aeaa-17b6d16221dc", + "pk": "e5cc04eb-1484-4070-aec1-353a4a8fcd31", "fields": { - "element": "1d5f2b0b-4c6e-47f4-a51b-6a349e9edecb", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"127daf48-f035-446f-bc3c-926b386c1fd5\"]", - "ordering": 0 + "element": "b241b2ad-b7c1-460a-94aa-2e15b294f68d", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "c39a75e9-b229-4bf4-81e7-4cc6e6e1ac83", + "pk": "e6444c26-5019-4f32-bec5-107f625f4654", "fields": { - "element": "e0b3c9f2-753a-402a-8923-dfae7c630406", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\"]", - "ordering": 1 + "element": "72acbc89-a61e-430c-beca-c81e3fd5394f", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", + "ordering": 3 } }, { "model": "documents.elementpath", - "pk": "cf020c8f-e78d-4360-9978-5ae68f5a2788", + "pk": "e9b44c87-35cd-4b5a-8c1b-8e5b8a989d77", "fields": { - "element": "e0f42ad4-6c5c-4ee7-a50e-e0394fb864fa", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"127daf48-f035-446f-bc3c-926b386c1fd5\"]", - "ordering": 2 + "element": "2573bf00-57c7-4556-8a3f-dbc04a5268a0", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"6793a264-486c-4df1-86c2-5d465114a684\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "cfe2f0bd-0bdc-43ec-8bb2-2d54761a1cba", + "pk": "f50ad9f5-b3c3-404c-806b-b0675ddf4d62", "fields": { - "element": "f082c5a6-5080-413b-a25a-9f5e44ffde36", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"e0b3c9f2-753a-402a-8923-dfae7c630406\"]", - "ordering": 0 + "element": "7931eee8-ad36-4757-85bf-560f30c4e055", + "path": "[\"a38bdddb-77c2-4a3e-906e-5a58e2cb6d10\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "e613a821-c8dc-4339-a426-aa857c29f894", + "pk": "f73eb37e-43ff-48ac-a217-d79a0224155b", "fields": { - "element": "7feab108-63d6-49ba-9d58-56305cff534a", - "path": "[\"83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa\", \"829eec34-a36f-4cf1-a385-27dfc59efbeb\"]", + "element": "6793a264-486c-4df1-86c2-5d465114a684", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "fa8f0af2-ea9e-4b4e-88a7-6c4f1b695d9b", + "pk": "fcd41524-621b-49c2-b8b1-f8bf21ee18b8", "fields": { - "element": "fb79b21b-76fb-4aa4-a80a-5ad3253343d8", - "path": "[\"b1ef0e22-d9e5-4506-b9b8-ae04c0f0e9c9\"]", - "ordering": 1 + "element": "fe9529e5-5bc3-44d4-bfb0-3b592ec79c53", + "path": "[\"8a25ab8a-a0ae-40f2-b256-6a67d6638797\", \"c82a340b-fadb-43c2-b601-46fc33afe3f3\"]", + "ordering": 0 } }, { "model": "documents.element", - "pk": "074c4f63-f6e2-4f0f-a5b3-8c9afc896a91", + "pk": "1cd34035-2869-473f-9d5f-9fd57b69172e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface A", - "zone": "cb095815-8630-42f0-b9c8-2bc7d4f171a9", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", + "name": "Act 5", + "zone": null, "worker_version": null } }, { "model": "documents.element", - "pk": "127daf48-f035-446f-bc3c-926b386c1fd5", + "pk": "2573bf00-57c7-4556-8a3f-dbc04a5268a0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 1, page 2r", - "zone": "9e353033-4865-43ee-970c-111eb2711c22", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface A", + "zone": "be65ea52-22fe-4258-8eaf-1ff69f212e05", "worker_version": null } }, { "model": "documents.element", - "pk": "13cf31d6-5d9a-43c0-ade0-05fe0933cfcc", + "pk": "2b4f0ab6-9651-4357-b79a-7a5f07a95445", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "ROY", - "zone": "856e0865-1bde-4dee-89be-3cf5372bbbab", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "DATUM", + "zone": "ab9be037-dbed-4060-b6c9-49350c41e08b", "worker_version": null } }, { "model": "documents.element", - "pk": "17770006-9a53-410c-b2e8-4fd662d97f39", + "pk": "2fe024a5-7fa3-421f-a01e-5f6958d11c86", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", - "name": "Act 2", - "zone": null, - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "318a7ab3-c1d8-458a-be1f-9802cbfdbbfa", + "name": "Text line", + "zone": "6879216d-f399-4346-9d6a-d99b1d11a02c", "worker_version": null } }, { "model": "documents.element", - "pk": "1d5f2b0b-4c6e-47f4-a51b-6a349e9edecb", + "pk": "400c09f2-90ca-4894-aff2-a1b02fd6fd71", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "PARIS", - "zone": "f507c59b-7f7d-4acf-9125-5d880836c90e", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", + "name": "Act 3", + "zone": null, "worker_version": null } }, { "model": "documents.element", - "pk": "2a5a1104-09a6-4189-9a7e-e306de03834a", + "pk": "49712988-9240-4cc8-8107-9c5162e69247", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "DATUM", - "zone": "678785d2-17f6-42bb-964a-3ad45da5d949", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "ROY", + "zone": "d6aa344f-0936-4763-8f47-22015f19d8cc", "worker_version": null } }, { "model": "documents.element", - "pk": "30dda265-4638-4e93-9ca9-c18da4b4b067", + "pk": "5d827922-3335-4aa4-8f77-08d393caa177", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 2, page 1r", - "zone": "e06803f1-fd2b-4429-bfde-a143d561c20e", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface E", + "zone": "de5d7d7b-6ed2-4041-906a-bf43cbae1290", "worker_version": null } }, { "model": "documents.element", - "pk": "4a780492-5a8b-437e-a002-f1572dd7520d", + "pk": "6793a264-486c-4df1-86c2-5d465114a684", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", "name": "Act 1", "zone": null, - "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "791f6f70-022c-4675-835b-d1b42a74ebca", + "pk": "72acbc89-a61e-430c-beca-c81e3fd5394f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "ROY", - "zone": "dd3a3989-caac-42f2-b046-5835195b55d0", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", + "name": "Act 4", + "zone": null, "worker_version": null } }, { "model": "documents.element", - "pk": "7feab108-63d6-49ba-9d58-56305cff534a", + "pk": "7931eee8-ad36-4757-85bf-560f30c4e055", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface E", - "zone": "17c8ea0f-ed35-4bb0-ae27-c2b9468fc704", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 2, page 1v", + "zone": "6a08c63d-4036-4ece-a2b4-f87d9703e707", "worker_version": null } }, { "model": "documents.element", - "pk": "8200fd63-c7e8-4724-8dc4-04b8168dd243", + "pk": "7a10db08-4bd0-4fc6-b3ed-7a797287fdfc", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 1, page 1r", - "zone": "a715c81a-3010-428e-8e37-77701048de8e", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 2, page 1r", + "zone": "3a05974f-408e-4302-a064-31b3e57e99bf", "worker_version": null } }, { "model": "documents.element", - "pk": "829eec34-a36f-4cf1-a385-27dfc59efbeb", + "pk": "806badbb-ac1b-42c1-8ca7-2c5bc02284ac", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", - "name": "Act 4", - "zone": null, - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "PARIS", + "zone": "fda24726-7298-452a-b672-0691a3347f3d", "worker_version": null } }, { "model": "documents.element", - "pk": "83a3df5a-25f0-4c6c-aae9-4d90eaf2f6fa", + "pk": "87a88cea-b07b-4a9b-b2d1-236fb522c61e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "d64f4c8d-2d15-4965-b0e2-54397dfcc0cb", - "name": "Volume 1", - "zone": null, - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface C", + "zone": "f249a7f3-b187-4ec3-a150-c572f58d4681", "worker_version": null } }, { "model": "documents.element", - "pk": "83e43013-f987-4374-ad1b-4187245846e5", + "pk": "8a25ab8a-a0ae-40f2-b256-6a67d6638797", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", - "name": "Act 3", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "9a72b609-e7b4-48e9-9f9b-cd9f6a548855", + "name": "Volume 1", "zone": null, - "source": null, "worker_version": null } }, { "model": "documents.element", - "pk": "857c1df7-e89b-4329-a3d5-01585dd4aa94", + "pk": "8f4cebfe-268b-447b-bb4b-090639f0aa53", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface D", - "zone": "a35697ac-d9ba-4d50-ae0c-caf98f2ffa79", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 1, page 1r", + "zone": "af8aff0e-885e-4aad-8773-e40a7760d80c", "worker_version": null } }, { "model": "documents.element", - "pk": "a43644ab-cd3d-4463-b237-f54b642fb747", + "pk": "90e40bfb-e603-425f-934a-f890d3318ff2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface C", - "zone": "323e029a-1d5b-4cef-8776-0ad51266d7ed", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 2, page 2r", + "zone": "28183cfe-98a0-4b9a-901c-c1c96de85dfc", "worker_version": null } }, { "model": "documents.element", - "pk": "b1ef0e22-d9e5-4506-b9b8-ae04c0f0e9c9", + "pk": "9611a73c-efcc-4725-918b-00789a8b1dea", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "d64f4c8d-2d15-4965-b0e2-54397dfcc0cb", - "name": "Volume 2", - "zone": null, - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "DATUM", + "zone": "12ece286-d176-4443-9709-9a92b323718e", "worker_version": null } }, { "model": "documents.element", - "pk": "b2401d97-cff8-45fd-a859-9ff72f5af322", + "pk": "977dfca9-9a51-4ce1-bdb8-9e6ebfe4777b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "ec70e16c-4d4a-4108-8b86-1a9edceb9e87", - "name": "Act 5", - "zone": null, - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface F", + "zone": "058e79a3-47ea-4451-b055-3993406b5d9a", "worker_version": null } }, { "model": "documents.element", - "pk": "c3fd6c14-6714-4e39-bff5-21def7c76065", + "pk": "a38bdddb-77c2-4a3e-906e-5a58e2cb6d10", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "baef17a0-86b7-4777-8f9c-c1a2512c42c5", - "name": "Text line", - "zone": "dd3a3989-caac-42f2-b046-5835195b55d0", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "9a72b609-e7b4-48e9-9f9b-cd9f6a548855", + "name": "Volume 2", + "zone": null, "worker_version": null } }, { "model": "documents.element", - "pk": "c66b0527-75ee-455e-a280-075bae7fb61a", + "pk": "b241b2ad-b7c1-460a-94aa-2e15b294f68d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "PARIS", - "zone": "fd25cbf5-7bbd-4411-b8b1-f1815474a09a", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 1, page 1v", + "zone": "f249a7f3-b187-4ec3-a150-c572f58d4681", "worker_version": null } }, { "model": "documents.element", - "pk": "d638b6a9-33fe-47dc-9bd4-f985d912fd20", + "pk": "bd75bf54-7f70-4e38-a43b-9a051f0a235d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 2, page 2r", - "zone": "289df828-c2b7-4bb1-94a8-200c2b352df5", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "ROY", + "zone": "56b3a3a4-fbec-4f48-8c88-aa2087388b8e", "worker_version": null } }, { "model": "documents.element", - "pk": "d843555c-4b00-486d-8fca-8f2bcf22bf51", + "pk": "c26ee257-d271-48ff-a623-f31b2f2d126d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface B", - "zone": "14858536-cb2f-44ec-9aaf-f954ed1a34ce", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "PARIS", + "zone": "e4163c96-f320-4c19-82d3-05b4c88f6e89", "worker_version": null } }, { "model": "documents.element", - "pk": "de95d376-b6a6-4145-8741-d22867d8f4ad", + "pk": "c62a1c87-dfd3-405c-87b6-2a975965937c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "ROY", - "zone": "afe78b66-3b80-4431-9e30-b0f08691a366", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface D", + "zone": "4451bfdc-7423-47cf-af3d-8f68da44b455", "worker_version": null } }, { "model": "documents.element", - "pk": "e0b3c9f2-753a-402a-8923-dfae7c630406", + "pk": "c82a340b-fadb-43c2-b601-46fc33afe3f3", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 1, page 1v", - "zone": "323e029a-1d5b-4cef-8776-0ad51266d7ed", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "276c67e2-5400-46c1-a001-5ed5bb2848ec", + "name": "Volume 1, page 2r", + "zone": "b55c4678-b402-4d51-a4e8-394cca874557", "worker_version": null } }, { "model": "documents.element", - "pk": "e0f42ad4-6c5c-4ee7-a50e-e0394fb864fa", + "pk": "ea77ce48-4060-4f92-996a-3fcb19ea65c0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "DATUM", - "zone": "b21b6855-6e47-4b89-9f7c-7611e7c857f3", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "ROY", + "zone": "6879216d-f399-4346-9d6a-d99b1d11a02c", "worker_version": null } }, { "model": "documents.element", - "pk": "f082c5a6-5080-413b-a25a-9f5e44ffde36", + "pk": "f1124a61-1c88-48f2-842d-d20fb61480c2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "PARIS", - "zone": "deefcad2-6b09-4214-90d4-3dc1e35b350b", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "DATUM", + "zone": "f1208c77-7d53-4c2e-8259-a6aa34fb81ea", "worker_version": null } }, { "model": "documents.element", - "pk": "fa3cfa49-01ea-4ab7-a5e0-dcc129289864", + "pk": "f3045910-bae8-480f-ac94-663d04d6668b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "6b750f47-efa2-4a49-9f9f-7f28474fc3e0", - "name": "DATUM", - "zone": "1f834158-327d-4eb0-9707-081c563142b0", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c13ef184-cb9e-4078-978f-bc9e6e9471ee", + "name": "Act 2", + "zone": null, "worker_version": null } }, { "model": "documents.element", - "pk": "fb79b21b-76fb-4aa4-a80a-5ad3253343d8", + "pk": "f70ccd9d-c6e6-48da-9418-38df1684c73a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "c5d070a5-7344-4de4-812b-28f2e3d8a970", - "name": "Volume 2, page 1v", - "zone": "38b0be9b-d2e7-40ce-926e-29b7a9451a1a", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "c7203498-3b3b-4e6a-a4c5-2a868085c29a", + "name": "Surface B", + "zone": "f0579930-52e6-41a3-925b-ba99e22c8c5e", "worker_version": null } }, { "model": "documents.element", - "pk": "ffa127a6-82f4-41f6-9041-2b6cb3bd27d6", + "pk": "fe9529e5-5bc3-44d4-bfb0-3b592ec79c53", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "8bc8de12-809d-4f2e-acd8-fad0aafa7428", - "name": "Surface F", - "zone": "d2fec326-095d-4394-ba5b-063e03b4a1c6", - "source": null, + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "7d154447-65c6-4050-895f-e76bdc847fb0", + "name": "PARIS", + "zone": "2cc0bb68-218d-4bfb-9656-c2489498fe42", "worker_version": null } }, { - "model": "documents.datasource", - "pk": "772fd44c-5ea2-4af9-8172-03272a3ccef6", - "fields": { - "type": "classifier", - "slug": "test", - "name": "Test Classifier", - "revision": "5.1", - "internal": false - } -}, -{ - "model": "documents.datasource", - "pk": "e313bb2d-e535-48aa-858c-10728ae3ef26", + "model": "documents.transcription", + "pk": "019c7917-bdf6-4454-ad21-4073cbe062a1", "fields": { - "type": "recognizer", - "slug": "test", - "name": "Test Recognizer", - "revision": "4.2", - "internal": false + "element": "fe9529e5-5bc3-44d4-bfb0-3b592ec79c53", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "PARIS", + "score": 1.0 } }, { "model": "documents.transcription", - "pk": "24927df2-8c29-4850-be42-916332c62e13", + "pk": "0be5032b-559d-4481-b574-ea5c4478c8f2", "fields": { - "element": "8200fd63-c7e8-4724-8dc4-04b8168dd243", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "Lorem ipsum dolor sit amet", + "element": "f1124a61-1c88-48f2-842d-d20fb61480c2", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "DATUM", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "277bcdb9-a9b4-4b99-81fc-7bf35dafc99a", + "pk": "54ffeb9c-9b27-47d9-bd82-ae2bf8a16b14", "fields": { - "element": "de95d376-b6a6-4145-8741-d22867d8f4ad", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, + "element": "bd75bf54-7f70-4e38-a43b-9a051f0a235d", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", "text": "ROY", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "6dba36d2-5a56-41e5-90e0-b930454f995f", + "pk": "67e36646-b894-44fc-99cd-2ebbe56a01e9", "fields": { - "element": "e0f42ad4-6c5c-4ee7-a50e-e0394fb864fa", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "DATUM", + "element": "49712988-9240-4cc8-8107-9c5162e69247", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "ROY", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "83832e84-8a84-491b-a84e-6dde77cb44ac", + "pk": "852744ca-dade-4289-93b2-e013ba69ff98", "fields": { - "element": "1d5f2b0b-4c6e-47f4-a51b-6a349e9edecb", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "PARIS", + "element": "9611a73c-efcc-4725-918b-00789a8b1dea", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "DATUM", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "8448ea5d-0935-40a6-991f-7f79f1d4b697", + "pk": "90caeb9f-9e81-4721-8f1b-377e0c2ec433", "fields": { - "element": "f082c5a6-5080-413b-a25a-9f5e44ffde36", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, + "element": "c26ee257-d271-48ff-a623-f31b2f2d126d", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", "text": "PARIS", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "9f0daddd-3423-4298-b7f1-fa8f19806f15", + "pk": "9e9663d8-386e-4318-9b54-bc7d8199fb01", "fields": { - "element": "13cf31d6-5d9a-43c0-ade0-05fe0933cfcc", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "ROY", + "element": "2b4f0ab6-9651-4357-b79a-7a5f07a95445", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "DATUM", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "adbcf82d-7d7e-4bfa-95e4-cf3ba9f44222", + "pk": "a8219dae-10cc-4f0a-a3f2-8e32a44194a8", "fields": { - "element": "c66b0527-75ee-455e-a280-075bae7fb61a", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "PARIS", + "element": "ea77ce48-4060-4f92-996a-3fcb19ea65c0", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "ROY", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "ca4659bb-6456-47d8-bd58-68e6f77e6058", + "pk": "d0acf347-3668-4bfe-a78f-887ac7669e00", "fields": { - "element": "2a5a1104-09a6-4189-9a7e-e306de03834a", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "DATUM", + "element": "8f4cebfe-268b-447b-bb4b-090639f0aa53", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "Lorem ipsum dolor sit amet", "score": 1.0 } }, { "model": "documents.transcription", - "pk": "ec4bb95f-4a37-467e-acdc-e166822f5f7b", + "pk": "d77a1d0e-6f38-4a23-92ec-1664a2c9613b", "fields": { - "element": "791f6f70-022c-4675-835b-d1b42a74ebca", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "ROY", + "element": "806badbb-ac1b-42c1-8ca7-2c5bc02284ac", + "worker_version": "a21e5a40-1ad3-4c44-ade6-0a812b5afd21", + "text": "PARIS", "score": 1.0 } }, { - "model": "documents.transcription", - "pk": "ff16ef70-d666-4c15-a483-d75f8d23531c", + "model": "documents.allowedmetadata", + "pk": "0bbba40d-9089-4e70-8214-f5b27e66e8f8", "fields": { - "element": "fa3cfa49-01ea-4ab7-a5e0-dcc129289864", - "source": "e313bb2d-e535-48aa-858c-10728ae3ef26", - "worker_version": null, - "text": "DATUM", - "score": 1.0 + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", + "type": "text", + "name": "folio" } }, { "model": "documents.allowedmetadata", - "pk": "5baf09ed-27d4-49f4-b824-4699db956f9f", + "pk": "93ea2d2e-9646-403a-a365-9e2599fe8c13", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", "type": "location", "name": "location" } }, { "model": "documents.allowedmetadata", - "pk": "6ecef8ac-a3bb-44de-9fa2-60171029fecf", + "pk": "fc4e2faa-5e68-40ce-88c0-aaec19dfcac8", "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", "type": "date", "name": "date" } }, -{ - "model": "documents.allowedmetadata", - "pk": "745a55fa-e849-4aa8-9308-27bcc7970f57", - "fields": { - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", - "type": "text", - "name": "folio" - } -}, { "model": "documents.metadata", - "pk": "0f691996-a08f-4a9f-af74-fe8220f9abb9", + "pk": "533218ea-1a43-4289-84b3-c120549ab01d", "fields": { - "element": "d638b6a9-33fe-47dc-9bd4-f985d912fd20", - "name": "folio", + "element": "6793a264-486c-4df1-86c2-5d465114a684", + "name": "number", "type": "text", - "value": "2r", + "value": "1", "revision": null, "index": 0, "entity": null @@ -988,12 +927,12 @@ }, { "model": "documents.metadata", - "pk": "6768f88e-111f-45e1-9074-b0c1a0dea583", + "pk": "54a5ae9c-aac2-466d-8cb7-d0b32b48dc2c", "fields": { - "element": "4a780492-5a8b-437e-a002-f1572dd7520d", + "element": "400c09f2-90ca-4894-aff2-a1b02fd6fd71", "name": "number", "type": "text", - "value": "1", + "value": "3", "revision": null, "index": 0, "entity": null @@ -1001,12 +940,12 @@ }, { "model": "documents.metadata", - "pk": "7aae934a-2829-4a06-8364-cc0132551bbf", + "pk": "57d7120e-5b27-4445-a13c-f8d1f5464caf", "fields": { - "element": "b2401d97-cff8-45fd-a859-9ff72f5af322", - "name": "number", + "element": "7931eee8-ad36-4757-85bf-560f30c4e055", + "name": "folio", "type": "text", - "value": "5", + "value": "1v", "revision": null, "index": 0, "entity": null @@ -1014,12 +953,12 @@ }, { "model": "documents.metadata", - "pk": "8c82f260-b23e-4f80-bb64-d19d1d4858de", + "pk": "74f7ea3a-7c28-4bf3-bdce-8e9bac10b51a", "fields": { - "element": "fb79b21b-76fb-4aa4-a80a-5ad3253343d8", + "element": "c82a340b-fadb-43c2-b601-46fc33afe3f3", "name": "folio", "type": "text", - "value": "1v", + "value": "2r", "revision": null, "index": 0, "entity": null @@ -1027,9 +966,9 @@ }, { "model": "documents.metadata", - "pk": "9d783eca-9432-4ab5-85d8-2a35e20e92f5", + "pk": "784237b0-d428-416a-8968-819f03eb8a5a", "fields": { - "element": "17770006-9a53-410c-b2e8-4fd662d97f39", + "element": "f3045910-bae8-480f-ac94-663d04d6668b", "name": "number", "type": "text", "value": "2", @@ -1040,12 +979,12 @@ }, { "model": "documents.metadata", - "pk": "c733a7d2-5e55-4c36-a7a0-6cf9f79fe7cd", + "pk": "85641c49-a61d-4a27-b428-17f40340af60", "fields": { - "element": "e0b3c9f2-753a-402a-8923-dfae7c630406", - "name": "folio", + "element": "1cd34035-2869-473f-9d5f-9fd57b69172e", + "name": "number", "type": "text", - "value": "1v", + "value": "5", "revision": null, "index": 0, "entity": null @@ -1053,12 +992,12 @@ }, { "model": "documents.metadata", - "pk": "c783d362-df59-4042-802b-354c637ac771", + "pk": "886fde80-7702-495c-a3e5-c2db02a4b88c", "fields": { - "element": "8200fd63-c7e8-4724-8dc4-04b8168dd243", - "name": "folio", + "element": "72acbc89-a61e-430c-beca-c81e3fd5394f", + "name": "number", "type": "text", - "value": "1r", + "value": "4", "revision": null, "index": 0, "entity": null @@ -1066,12 +1005,12 @@ }, { "model": "documents.metadata", - "pk": "d4bca037-d70f-4e70-9462-f61b346a2078", + "pk": "92322669-10c6-47d9-9727-5be1a4fd0a79", "fields": { - "element": "83e43013-f987-4374-ad1b-4187245846e5", - "name": "number", + "element": "7a10db08-4bd0-4fc6-b3ed-7a797287fdfc", + "name": "folio", "type": "text", - "value": "3", + "value": "1r", "revision": null, "index": 0, "entity": null @@ -1079,12 +1018,12 @@ }, { "model": "documents.metadata", - "pk": "e4aba6d5-735c-4cbf-9a51-0310cc6fe587", + "pk": "a9d70691-c8d7-4770-be5b-b88cee5173c1", "fields": { - "element": "829eec34-a36f-4cf1-a385-27dfc59efbeb", - "name": "number", + "element": "b241b2ad-b7c1-460a-94aa-2e15b294f68d", + "name": "folio", "type": "text", - "value": "4", + "value": "1v", "revision": null, "index": 0, "entity": null @@ -1092,12 +1031,12 @@ }, { "model": "documents.metadata", - "pk": "e8f3731e-5731-443f-a32a-0c7d5e978d72", + "pk": "b1dd3026-fd30-481d-a673-17930f428104", "fields": { - "element": "30dda265-4638-4e93-9ca9-c18da4b4b067", + "element": "90e40bfb-e603-425f-934a-f890d3318ff2", "name": "folio", "type": "text", - "value": "1r", + "value": "2r", "revision": null, "index": 0, "entity": null @@ -1105,12 +1044,12 @@ }, { "model": "documents.metadata", - "pk": "f1bbb0fc-f377-42cf-9738-2cb9fac64fa6", + "pk": "e7370fc4-857d-4492-a113-444de3215dc8", "fields": { - "element": "127daf48-f035-446f-bc3c-926b386c1fd5", + "element": "8f4cebfe-268b-447b-bb4b-090639f0aa53", "name": "folio", "type": "text", - "value": "2r", + "value": "1r", "revision": null, "index": 0, "entity": null @@ -1134,7 +1073,7 @@ }, { "model": "images.image", - "pk": "015c8492-5aca-474f-961a-e7c8ad3466a3", + "pk": "21891c68-bdd5-4fb0-95b0-3eb5ba318cf8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -1148,7 +1087,7 @@ }, { "model": "images.image", - "pk": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", + "pk": "3d343607-6532-410f-9e86-4fdbdf36d671", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -1162,12 +1101,12 @@ }, { "model": "images.image", - "pk": "6766383c-c549-4506-b2f2-3f167e04252b", + "pk": "6cce17b5-3a2f-4a74-8ee6-15d3599f8062", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img6", + "path": "img5", "width": 1000, "height": 1000, "hash": null, @@ -1176,12 +1115,12 @@ }, { "model": "images.image", - "pk": "9f0f0803-389d-4daf-bf5f-512fab2d4274", + "pk": "73c6faec-3118-42bd-9452-d24c1b637931", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img4", + "path": "img1", "width": 1000, "height": 1000, "hash": null, @@ -1190,12 +1129,12 @@ }, { "model": "images.image", - "pk": "e5f640a5-3782-4780-8f4c-0f7f4985246e", + "pk": "8b483ece-8acb-4aa9-b5ff-4242fadd02c7", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img5", + "path": "img6", "width": 1000, "height": 1000, "hash": null, @@ -1204,12 +1143,12 @@ }, { "model": "images.image", - "pk": "ff1ff29f-5f94-4880-a318-dd864546f195", + "pk": "b4be1aa3-8421-46bc-ba69-f18093568089", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img1", + "path": "img4", "width": 1000, "height": 1000, "hash": null, @@ -1218,242 +1157,242 @@ }, { "model": "images.zone", - "pk": "14858536-cb2f-44ec-9aaf-f954ed1a34ce", + "pk": "058e79a3-47ea-4451-b055-3993406b5d9a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" } }, { "model": "images.zone", - "pk": "17c8ea0f-ed35-4bb0-ae27-c2b9468fc704", + "pk": "12ece286-d176-4443-9709-9a92b323718e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", - "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)" + "image": "21891c68-bdd5-4fb0-95b0-3eb5ba318cf8", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "1f834158-327d-4eb0-9707-081c563142b0", + "pk": "28183cfe-98a0-4b9a-901c-c1c96de85dfc", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "015c8492-5aca-474f-961a-e7c8ad3466a3", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "8b483ece-8acb-4aa9-b5ff-4242fadd02c7", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "289df828-c2b7-4bb1-94a8-200c2b352df5", + "pk": "2cc0bb68-218d-4bfb-9656-c2489498fe42", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "6766383c-c549-4506-b2f2-3f167e04252b", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "images.zone", - "pk": "323e029a-1d5b-4cef-8776-0ad51266d7ed", + "pk": "3a05974f-408e-4302-a064-31b3e57e99bf", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "015c8492-5aca-474f-961a-e7c8ad3466a3", + "image": "b4be1aa3-8421-46bc-ba69-f18093568089", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "38b0be9b-d2e7-40ce-926e-29b7a9451a1a", + "pk": "4451bfdc-7423-47cf-af3d-8f68da44b455", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "e5f640a5-3782-4780-8f4c-0f7f4985246e", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)" } }, { "model": "images.zone", - "pk": "678785d2-17f6-42bb-964a-3ad45da5d949", + "pk": "56b3a3a4-fbec-4f48-8c88-aa2087388b8e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "21891c68-bdd5-4fb0-95b0-3eb5ba318cf8", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "856e0865-1bde-4dee-89be-3cf5372bbbab", + "pk": "6879216d-f399-4346-9d6a-d99b1d11a02c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", + "image": "73c6faec-3118-42bd-9452-d24c1b637931", "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "9e353033-4865-43ee-970c-111eb2711c22", + "pk": "6a08c63d-4036-4ece-a2b4-f87d9703e707", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", + "image": "6cce17b5-3a2f-4a74-8ee6-15d3599f8062", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "a35697ac-d9ba-4d50-ae0c-caf98f2ffa79", + "pk": "ab9be037-dbed-4060-b6c9-49350c41e08b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", - "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "a715c81a-3010-428e-8e37-77701048de8e", + "pk": "af8aff0e-885e-4aad-8773-e40a7760d80c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", + "image": "73c6faec-3118-42bd-9452-d24c1b637931", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "afe78b66-3b80-4431-9e30-b0f08691a366", + "pk": "b55c4678-b402-4d51-a4e8-394cca874557", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "015c8492-5aca-474f-961a-e7c8ad3466a3", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "b21b6855-6e47-4b89-9f7c-7611e7c857f3", + "pk": "be65ea52-22fe-4258-8eaf-1ff69f212e05", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" + "image": "73c6faec-3118-42bd-9452-d24c1b637931", + "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)" } }, { "model": "images.zone", - "pk": "cb095815-8630-42f0-b9c8-2bc7d4f171a9", + "pk": "d6aa344f-0936-4763-8f47-22015f19d8cc", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", - "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" } }, { "model": "images.zone", - "pk": "d2fec326-095d-4394-ba5b-063e03b4a1c6", + "pk": "de5d7d7b-6ed2-4041-906a-bf43cbae1290", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" + "image": "3d343607-6532-410f-9e86-4fdbdf36d671", + "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)" } }, { "model": "images.zone", - "pk": "dd3a3989-caac-42f2-b046-5835195b55d0", + "pk": "e4163c96-f320-4c19-82d3-05b4c88f6e89", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)" + "image": "21891c68-bdd5-4fb0-95b0-3eb5ba318cf8", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "images.zone", - "pk": "deefcad2-6b09-4214-90d4-3dc1e35b350b", + "pk": "f0579930-52e6-41a3-925b-ba99e22c8c5e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "015c8492-5aca-474f-961a-e7c8ad3466a3", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" + "image": "73c6faec-3118-42bd-9452-d24c1b637931", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)" } }, { "model": "images.zone", - "pk": "e06803f1-fd2b-4429-bfde-a143d561c20e", + "pk": "f1208c77-7d53-4c2e-8259-a6aa34fb81ea", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "9f0f0803-389d-4daf-bf5f-512fab2d4274", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" + "image": "73c6faec-3118-42bd-9452-d24c1b637931", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)" } }, { "model": "images.zone", - "pk": "f507c59b-7f7d-4acf-9125-5d880836c90e", + "pk": "f249a7f3-b187-4ec3-a150-c572f58d4681", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "5bcdc8df-671c-43a0-a644-0ee48dbd4ca2", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" + "image": "21891c68-bdd5-4fb0-95b0-3eb5ba318cf8", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)" } }, { "model": "images.zone", - "pk": "fd25cbf5-7bbd-4411-b8b1-f1815474a09a", + "pk": "fda24726-7298-452a-b672-0691a3347f3d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "image": "ff1ff29f-5f94-4880-a318-dd864546f195", + "image": "73c6faec-3118-42bd-9452-d24c1b637931", "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)" } }, { "model": "users.right", - "pk": "2353198d-8b85-4e0e-9cb0-143677816c46", + "pk": "90d268a8-6058-44cd-b547-01fc377e9a7c", "fields": { - "user": 5, + "user": 4, "group": null, - "content_type": 37, - "content_id": "ed2836d2-7576-403b-837e-3af7e8b1a8ae", - "level": 10 + "content_type": 36, + "content_id": "9fe31c1d-f9fb-46c5-a7cd-04db402cb36d", + "level": 50 } }, { "model": "users.right", - "pk": "261bee50-3f5c-45b4-954e-94324f2c89cc", + "pk": "bf256d99-faf1-473f-be39-c612eff28c0b", "fields": { - "user": 3, + "user": 5, "group": null, - "content_type": 37, - "content_id": "ed2836d2-7576-403b-837e-3af7e8b1a8ae", - "level": 100 + "content_type": 36, + "content_id": "9fe31c1d-f9fb-46c5-a7cd-04db402cb36d", + "level": 10 } }, { "model": "users.right", - "pk": "26c45a40-240b-4090-92df-e0443f1297b8", + "pk": "ede6add6-a325-4ff7-8727-81e8340a356e", "fields": { - "user": 4, + "user": 3, "group": null, - "content_type": 37, - "content_id": "ed2836d2-7576-403b-837e-3af7e8b1a8ae", - "level": 50 + "content_type": 36, + "content_id": "9fe31c1d-f9fb-46c5-a7cd-04db402cb36d", + "level": 100 } }, { "model": "users.user", "pk": 1, "fields": { - "password": "pbkdf2_sha256$216000$LuIx8UPn35hP$IIrSaRj9KbHz6cF4AGvPPMmgif0ctNrlDz9JKW+lTNI=", + "password": "pbkdf2_sha256$216000$zEfaR6PKemZw$9zgI2RuHubcbm/+6tY2I+COugAPHStwbEB9P1CCkRMg=", "last_login": null, "email": "root@root.fr", "display_name": "Admin", @@ -1468,7 +1407,7 @@ "model": "users.user", "pk": 2, "fields": { - "password": "pbkdf2_sha256$216000$g6La1np2jsjr$K7sK1pAk8oed4EFCYpni6zpeTyM7vD+C95G4zgtAEq8=", + "password": "pbkdf2_sha256$216000$4pgkzfbLZ1nS$bc/zQiDTHy/gOOAfKJmOw1b2qRZ+CeqhXmP0yy/TxSE=", "last_login": null, "email": "internal@internal.fr", "display_name": "Internal user", @@ -1483,7 +1422,7 @@ "model": "users.user", "pk": 3, "fields": { - "password": "pbkdf2_sha256$216000$5qVMEjobtNAi$f9GwxVUGxEnPzK7KSJvp2j7oY8DFs24tmCWBr/otIPc=", + "password": "pbkdf2_sha256$216000$dbd6KhiXq4B6$SSVRbZkcRZceLn0CsmM2H/TbWsSmJS62Vf6pzMcWHUo=", "last_login": null, "email": "user@user.fr", "display_name": "Test user", @@ -1526,7 +1465,7 @@ }, { "model": "users.group", - "pk": "ed2836d2-7576-403b-837e-3af7e8b1a8ae", + "pk": "9fe31c1d-f9fb-46c5-a7cd-04db402cb36d", "fields": { "name": "User group", "public": false @@ -1534,7 +1473,7 @@ }, { "model": "users.oauthcredentials", - "pk": "e0910bb1-91f7-4672-a51f-888ad5173fdd", + "pk": "8d60ea1e-7df3-4d07-9f1f-422e9b0d0bba", "fields": { "user": 3, "provider_name": "gitlab", @@ -1551,7 +1490,7 @@ "pk": 1, "fields": { "user": 3, - "corpus": "3ea05d94-46b0-4270-a0be-2dfa7983edc5", + "corpus": "f0c8bfd9-da72-4219-ad07-e9391735af2f", "can_write": true, "can_admin": true } @@ -2279,978 +2218,942 @@ { "model": "auth.permission", "pk": 81, - "fields": { - "name": "Can add data source", - "content_type": 21, - "codename": "add_datasource" - } -}, -{ - "model": "auth.permission", - "pk": 82, - "fields": { - "name": "Can change data source", - "content_type": 21, - "codename": "change_datasource" - } -}, -{ - "model": "auth.permission", - "pk": 83, - "fields": { - "name": "Can delete data source", - "content_type": 21, - "codename": "delete_datasource" - } -}, -{ - "model": "auth.permission", - "pk": 84, - "fields": { - "name": "Can view data source", - "content_type": 21, - "codename": "view_datasource" - } -}, -{ - "model": "auth.permission", - "pk": 85, "fields": { "name": "Can add element", - "content_type": 22, + "content_type": 21, "codename": "add_element" } }, { "model": "auth.permission", - "pk": 86, + "pk": 82, "fields": { "name": "Can change element", - "content_type": 22, + "content_type": 21, "codename": "change_element" } }, { "model": "auth.permission", - "pk": 87, + "pk": 83, "fields": { "name": "Can delete element", - "content_type": 22, + "content_type": 21, "codename": "delete_element" } }, { "model": "auth.permission", - "pk": 88, + "pk": 84, "fields": { "name": "Can view element", - "content_type": 22, + "content_type": 21, "codename": "view_element" } }, { "model": "auth.permission", - "pk": 89, + "pk": 85, "fields": { "name": "Can add element path", - "content_type": 23, + "content_type": 22, "codename": "add_elementpath" } }, { "model": "auth.permission", - "pk": 90, + "pk": 86, "fields": { "name": "Can change element path", - "content_type": 23, + "content_type": 22, "codename": "change_elementpath" } }, { "model": "auth.permission", - "pk": 91, + "pk": 87, "fields": { "name": "Can delete element path", - "content_type": 23, + "content_type": 22, "codename": "delete_elementpath" } }, { "model": "auth.permission", - "pk": 92, + "pk": 88, "fields": { "name": "Can view element path", - "content_type": 23, + "content_type": 22, "codename": "view_elementpath" } }, { "model": "auth.permission", - "pk": 93, + "pk": 89, "fields": { "name": "Can add element type", - "content_type": 24, + "content_type": 23, "codename": "add_elementtype" } }, { "model": "auth.permission", - "pk": 94, + "pk": 90, "fields": { "name": "Can change element type", - "content_type": 24, + "content_type": 23, "codename": "change_elementtype" } }, { "model": "auth.permission", - "pk": 95, + "pk": 91, "fields": { "name": "Can delete element type", - "content_type": 24, + "content_type": 23, "codename": "delete_elementtype" } }, { "model": "auth.permission", - "pk": 96, + "pk": 92, "fields": { "name": "Can view element type", - "content_type": 24, + "content_type": 23, "codename": "view_elementtype" } }, { "model": "auth.permission", - "pk": 97, + "pk": 93, "fields": { "name": "Can add entity", - "content_type": 25, + "content_type": 24, "codename": "add_entity" } }, { "model": "auth.permission", - "pk": 98, + "pk": 94, "fields": { "name": "Can change entity", - "content_type": 25, + "content_type": 24, "codename": "change_entity" } }, { "model": "auth.permission", - "pk": 99, + "pk": 95, "fields": { "name": "Can delete entity", - "content_type": 25, + "content_type": 24, "codename": "delete_entity" } }, { "model": "auth.permission", - "pk": 100, + "pk": 96, "fields": { "name": "Can view entity", - "content_type": 25, + "content_type": 24, "codename": "view_entity" } }, { "model": "auth.permission", - "pk": 101, + "pk": 97, "fields": { "name": "Can add ml class", - "content_type": 26, + "content_type": 25, "codename": "add_mlclass" } }, { "model": "auth.permission", - "pk": 102, + "pk": 98, "fields": { "name": "Can change ml class", - "content_type": 26, + "content_type": 25, "codename": "change_mlclass" } }, { "model": "auth.permission", - "pk": 103, + "pk": 99, "fields": { "name": "Can delete ml class", - "content_type": 26, + "content_type": 25, "codename": "delete_mlclass" } }, { "model": "auth.permission", - "pk": 104, + "pk": 100, "fields": { "name": "Can view ml class", - "content_type": 26, + "content_type": 25, "codename": "view_mlclass" } }, { "model": "auth.permission", - "pk": 105, + "pk": 101, "fields": { "name": "Can add transcription", - "content_type": 27, + "content_type": 26, "codename": "add_transcription" } }, { "model": "auth.permission", - "pk": 106, + "pk": 102, "fields": { "name": "Can change transcription", - "content_type": 27, + "content_type": 26, "codename": "change_transcription" } }, { "model": "auth.permission", - "pk": 107, + "pk": 103, "fields": { "name": "Can delete transcription", - "content_type": 27, + "content_type": 26, "codename": "delete_transcription" } }, { "model": "auth.permission", - "pk": 108, + "pk": 104, "fields": { "name": "Can view transcription", - "content_type": 27, + "content_type": 26, "codename": "view_transcription" } }, { "model": "auth.permission", - "pk": 109, + "pk": 105, "fields": { "name": "Can add transcription entity", - "content_type": 28, + "content_type": 27, "codename": "add_transcriptionentity" } }, { "model": "auth.permission", - "pk": 110, + "pk": 106, "fields": { "name": "Can change transcription entity", - "content_type": 28, + "content_type": 27, "codename": "change_transcriptionentity" } }, { "model": "auth.permission", - "pk": 111, + "pk": 107, "fields": { "name": "Can delete transcription entity", - "content_type": 28, + "content_type": 27, "codename": "delete_transcriptionentity" } }, { "model": "auth.permission", - "pk": 112, + "pk": 108, "fields": { "name": "Can view transcription entity", - "content_type": 28, + "content_type": 27, "codename": "view_transcriptionentity" } }, { "model": "auth.permission", - "pk": 113, + "pk": 109, "fields": { "name": "Can add meta data", - "content_type": 29, + "content_type": 28, "codename": "add_metadata" } }, { "model": "auth.permission", - "pk": 114, + "pk": 110, "fields": { "name": "Can change meta data", - "content_type": 29, + "content_type": 28, "codename": "change_metadata" } }, { "model": "auth.permission", - "pk": 115, + "pk": 111, "fields": { "name": "Can delete meta data", - "content_type": 29, + "content_type": 28, "codename": "delete_metadata" } }, { "model": "auth.permission", - "pk": 116, + "pk": 112, "fields": { "name": "Can view meta data", - "content_type": 29, + "content_type": 28, "codename": "view_metadata" } }, { "model": "auth.permission", - "pk": 117, + "pk": 113, "fields": { "name": "Can add entity role", - "content_type": 30, + "content_type": 29, "codename": "add_entityrole" } }, { "model": "auth.permission", - "pk": 118, + "pk": 114, "fields": { "name": "Can change entity role", - "content_type": 30, + "content_type": 29, "codename": "change_entityrole" } }, { "model": "auth.permission", - "pk": 119, + "pk": 115, "fields": { "name": "Can delete entity role", - "content_type": 30, + "content_type": 29, "codename": "delete_entityrole" } }, { "model": "auth.permission", - "pk": 120, + "pk": 116, "fields": { "name": "Can view entity role", - "content_type": 30, + "content_type": 29, "codename": "view_entityrole" } }, { "model": "auth.permission", - "pk": 121, + "pk": 117, "fields": { "name": "Can add entity link", - "content_type": 31, + "content_type": 30, "codename": "add_entitylink" } }, { "model": "auth.permission", - "pk": 122, + "pk": 118, "fields": { "name": "Can change entity link", - "content_type": 31, + "content_type": 30, "codename": "change_entitylink" } }, { "model": "auth.permission", - "pk": 123, + "pk": 119, "fields": { "name": "Can delete entity link", - "content_type": 31, + "content_type": 30, "codename": "delete_entitylink" } }, { "model": "auth.permission", - "pk": 124, + "pk": 120, "fields": { "name": "Can view entity link", - "content_type": 31, + "content_type": 30, "codename": "view_entitylink" } }, { "model": "auth.permission", - "pk": 125, + "pk": 121, "fields": { "name": "Can add selection", - "content_type": 32, + "content_type": 31, "codename": "add_selection" } }, { "model": "auth.permission", - "pk": 126, + "pk": 122, "fields": { "name": "Can change selection", - "content_type": 32, + "content_type": 31, "codename": "change_selection" } }, { "model": "auth.permission", - "pk": 127, + "pk": 123, "fields": { "name": "Can delete selection", - "content_type": 32, + "content_type": 31, "codename": "delete_selection" } }, { "model": "auth.permission", - "pk": 128, + "pk": 124, "fields": { "name": "Can view selection", - "content_type": 32, + "content_type": 31, "codename": "view_selection" } }, { "model": "auth.permission", - "pk": 129, + "pk": 125, "fields": { "name": "Can add user", - "content_type": 33, + "content_type": 32, "codename": "add_user" } }, { "model": "auth.permission", - "pk": 130, + "pk": 126, "fields": { "name": "Can change user", - "content_type": 33, + "content_type": 32, "codename": "change_user" } }, { "model": "auth.permission", - "pk": 131, + "pk": 127, "fields": { "name": "Can delete user", - "content_type": 33, + "content_type": 32, "codename": "delete_user" } }, { "model": "auth.permission", - "pk": 132, + "pk": 128, "fields": { "name": "Can view user", - "content_type": 33, + "content_type": 32, "codename": "view_user" } }, { "model": "auth.permission", - "pk": 133, + "pk": 129, "fields": { "name": "Can add o auth credentials", - "content_type": 34, + "content_type": 33, "codename": "add_oauthcredentials" } }, { "model": "auth.permission", - "pk": 134, + "pk": 130, "fields": { "name": "Can change o auth credentials", - "content_type": 34, + "content_type": 33, "codename": "change_oauthcredentials" } }, { "model": "auth.permission", - "pk": 135, + "pk": 131, "fields": { "name": "Can delete o auth credentials", - "content_type": 34, + "content_type": 33, "codename": "delete_oauthcredentials" } }, { "model": "auth.permission", - "pk": 136, + "pk": 132, "fields": { "name": "Can view o auth credentials", - "content_type": 34, + "content_type": 33, "codename": "view_oauthcredentials" } }, { "model": "auth.permission", - "pk": 137, + "pk": 133, "fields": { "name": "Can add corpus right", - "content_type": 35, + "content_type": 34, "codename": "add_corpusright" } }, { "model": "auth.permission", - "pk": 138, + "pk": 134, "fields": { "name": "Can change corpus right", - "content_type": 35, + "content_type": 34, "codename": "change_corpusright" } }, { "model": "auth.permission", - "pk": 139, + "pk": 135, "fields": { "name": "Can delete corpus right", - "content_type": 35, + "content_type": 34, "codename": "delete_corpusright" } }, { "model": "auth.permission", - "pk": 140, + "pk": 136, "fields": { "name": "Can view corpus right", - "content_type": 35, + "content_type": 34, "codename": "view_corpusright" } }, { "model": "auth.permission", - "pk": 141, + "pk": 137, "fields": { "name": "Can add user scope", - "content_type": 36, + "content_type": 35, "codename": "add_userscope" } }, { "model": "auth.permission", - "pk": 142, + "pk": 138, "fields": { "name": "Can change user scope", - "content_type": 36, + "content_type": 35, "codename": "change_userscope" } }, { "model": "auth.permission", - "pk": 143, + "pk": 139, "fields": { "name": "Can delete user scope", - "content_type": 36, + "content_type": 35, "codename": "delete_userscope" } }, { "model": "auth.permission", - "pk": 144, + "pk": 140, "fields": { "name": "Can view user scope", - "content_type": 36, + "content_type": 35, "codename": "view_userscope" } }, { "model": "auth.permission", - "pk": 145, + "pk": 141, "fields": { "name": "Can add group", - "content_type": 37, + "content_type": 36, "codename": "add_group" } }, { "model": "auth.permission", - "pk": 146, + "pk": 142, "fields": { "name": "Can change group", - "content_type": 37, + "content_type": 36, "codename": "change_group" } }, { "model": "auth.permission", - "pk": 147, + "pk": 143, "fields": { "name": "Can delete group", - "content_type": 37, + "content_type": 36, "codename": "delete_group" } }, { "model": "auth.permission", - "pk": 148, + "pk": 144, "fields": { "name": "Can view group", - "content_type": 37, + "content_type": 36, "codename": "view_group" } }, { "model": "auth.permission", - "pk": 149, + "pk": 145, "fields": { "name": "Can add right", - "content_type": 38, + "content_type": 37, "codename": "add_right" } }, { "model": "auth.permission", - "pk": 150, + "pk": 146, "fields": { "name": "Can change right", - "content_type": 38, + "content_type": 37, "codename": "change_right" } }, { "model": "auth.permission", - "pk": 151, + "pk": 147, "fields": { "name": "Can delete right", - "content_type": 38, + "content_type": 37, "codename": "delete_right" } }, { "model": "auth.permission", - "pk": 152, + "pk": 148, "fields": { "name": "Can view right", - "content_type": 38, + "content_type": 37, "codename": "view_right" } }, { "model": "auth.permission", - "pk": 153, + "pk": 149, "fields": { "name": "Can add data file", - "content_type": 39, + "content_type": 38, "codename": "add_datafile" } }, { "model": "auth.permission", - "pk": 154, + "pk": 150, "fields": { "name": "Can change data file", - "content_type": 39, + "content_type": 38, "codename": "change_datafile" } }, { "model": "auth.permission", - "pk": 155, + "pk": 151, "fields": { "name": "Can delete data file", - "content_type": 39, + "content_type": 38, "codename": "delete_datafile" } }, { "model": "auth.permission", - "pk": 156, + "pk": 152, "fields": { "name": "Can view data file", - "content_type": 39, + "content_type": 38, "codename": "view_datafile" } }, { "model": "auth.permission", - "pk": 157, + "pk": 153, "fields": { "name": "Can add data import", - "content_type": 40, + "content_type": 39, "codename": "add_dataimport" } }, { "model": "auth.permission", - "pk": 158, + "pk": 154, "fields": { "name": "Can change data import", - "content_type": 40, + "content_type": 39, "codename": "change_dataimport" } }, { "model": "auth.permission", - "pk": 159, + "pk": 155, "fields": { "name": "Can delete data import", - "content_type": 40, + "content_type": 39, "codename": "delete_dataimport" } }, { "model": "auth.permission", - "pk": 160, + "pk": 156, "fields": { "name": "Can view data import", - "content_type": 40, + "content_type": 39, "codename": "view_dataimport" } }, { "model": "auth.permission", - "pk": 161, + "pk": 157, "fields": { "name": "Can add repository", - "content_type": 41, + "content_type": 40, "codename": "add_repository" } }, { "model": "auth.permission", - "pk": 162, + "pk": 158, "fields": { "name": "Can change repository", - "content_type": 41, + "content_type": 40, "codename": "change_repository" } }, { "model": "auth.permission", - "pk": 163, + "pk": 159, "fields": { "name": "Can delete repository", - "content_type": 41, + "content_type": 40, "codename": "delete_repository" } }, { "model": "auth.permission", - "pk": 164, + "pk": 160, "fields": { "name": "Can view repository", - "content_type": 41, + "content_type": 40, "codename": "view_repository" } }, { "model": "auth.permission", - "pk": 165, + "pk": 161, "fields": { "name": "Can add revision", - "content_type": 42, + "content_type": 41, "codename": "add_revision" } }, { "model": "auth.permission", - "pk": 166, + "pk": 162, "fields": { "name": "Can change revision", - "content_type": 42, + "content_type": 41, "codename": "change_revision" } }, { "model": "auth.permission", - "pk": 167, + "pk": 163, "fields": { "name": "Can delete revision", - "content_type": 42, + "content_type": 41, "codename": "delete_revision" } }, { "model": "auth.permission", - "pk": 168, + "pk": 164, "fields": { "name": "Can view revision", - "content_type": 42, + "content_type": 41, "codename": "view_revision" } }, { "model": "auth.permission", - "pk": 169, + "pk": 165, "fields": { "name": "Can add worker", - "content_type": 43, + "content_type": 42, "codename": "add_worker" } }, { "model": "auth.permission", - "pk": 170, + "pk": 166, "fields": { "name": "Can change worker", - "content_type": 43, + "content_type": 42, "codename": "change_worker" } }, { "model": "auth.permission", - "pk": 171, + "pk": 167, "fields": { "name": "Can delete worker", - "content_type": 43, + "content_type": 42, "codename": "delete_worker" } }, { "model": "auth.permission", - "pk": 172, + "pk": 168, "fields": { "name": "Can view worker", - "content_type": 43, + "content_type": 42, "codename": "view_worker" } }, { "model": "auth.permission", - "pk": 173, + "pk": 169, "fields": { "name": "Can add worker version", - "content_type": 44, + "content_type": 43, "codename": "add_workerversion" } }, { "model": "auth.permission", - "pk": 174, + "pk": 170, "fields": { "name": "Can change worker version", - "content_type": 44, + "content_type": 43, "codename": "change_workerversion" } }, { "model": "auth.permission", - "pk": 175, + "pk": 171, "fields": { "name": "Can delete worker version", - "content_type": 44, + "content_type": 43, "codename": "delete_workerversion" } }, { "model": "auth.permission", - "pk": 176, + "pk": 172, "fields": { "name": "Can view worker version", - "content_type": 44, + "content_type": 43, "codename": "view_workerversion" } }, { "model": "auth.permission", - "pk": 177, + "pk": 173, "fields": { "name": "Can add git ref", - "content_type": 45, + "content_type": 44, "codename": "add_gitref" } }, { "model": "auth.permission", - "pk": 178, + "pk": 174, "fields": { "name": "Can change git ref", - "content_type": 45, + "content_type": 44, "codename": "change_gitref" } }, { "model": "auth.permission", - "pk": 179, + "pk": 175, "fields": { "name": "Can delete git ref", - "content_type": 45, + "content_type": 44, "codename": "delete_gitref" } }, { "model": "auth.permission", - "pk": 180, + "pk": 176, "fields": { "name": "Can view git ref", - "content_type": 45, + "content_type": 44, "codename": "view_gitref" } }, { "model": "auth.permission", - "pk": 181, + "pk": 177, "fields": { "name": "Can add worker run", - "content_type": 46, + "content_type": 45, "codename": "add_workerrun" } }, { "model": "auth.permission", - "pk": 182, + "pk": 178, "fields": { "name": "Can change worker run", - "content_type": 46, + "content_type": 45, "codename": "change_workerrun" } }, { "model": "auth.permission", - "pk": 183, + "pk": 179, "fields": { "name": "Can delete worker run", - "content_type": 46, + "content_type": 45, "codename": "delete_workerrun" } }, { "model": "auth.permission", - "pk": 184, + "pk": 180, "fields": { "name": "Can view worker run", - "content_type": 46, + "content_type": 45, "codename": "view_workerrun" } }, { "model": "auth.permission", - "pk": 185, + "pk": 181, "fields": { "name": "Can add data import element", - "content_type": 47, + "content_type": 46, "codename": "add_dataimportelement" } }, { "model": "auth.permission", - "pk": 186, + "pk": 182, "fields": { "name": "Can change data import element", - "content_type": 47, + "content_type": 46, "codename": "change_dataimportelement" } }, { "model": "auth.permission", - "pk": 187, + "pk": 183, "fields": { "name": "Can delete data import element", - "content_type": 47, + "content_type": 46, "codename": "delete_dataimportelement" } }, { "model": "auth.permission", - "pk": 188, + "pk": 184, "fields": { "name": "Can view data import element", - "content_type": 47, + "content_type": 46, "codename": "view_dataimportelement" } }, { "model": "ponos.workflow", - "pk": "21cc38da-a445-458c-8d09-c6a1c2f37eee", + "pk": "7e46a158-97c2-4e77-a731-a7aed864b225", "fields": { "recipe": "tasks:\n docker_build:\n image: reco", "created": "2020-02-02T01:23:45.678Z", @@ -3259,7 +3162,7 @@ }, { "model": "ponos.task", - "pk": "ce2b799e-2c3b-4c1e-80e6-9833c4c67cae", + "pk": "3865b3ad-7887-4db9-bc4a-c323ed1fbee9", "fields": { "run": 0, "depth": 0, @@ -3273,7 +3176,7 @@ "image_artifact": null, "agent": null, "gpu": null, - "workflow": "21cc38da-a445-458c-8d09-c6a1c2f37eee", + "workflow": "7e46a158-97c2-4e77-a731-a7aed864b225", "container": null, "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -3282,9 +3185,9 @@ }, { "model": "ponos.artifact", - "pk": "2f612d65-6b97-4f7d-a91b-a901a88624e1", + "pk": "2a2343d0-4535-4439-8685-07811b1e4e7a", "fields": { - "task": "ce2b799e-2c3b-4c1e-80e6-9833c4c67cae", + "task": "3865b3ad-7887-4db9-bc4a-c323ed1fbee9", "path": "/path/to/docker_build", "size": 42000, "content_type": "application/octet-stream", diff --git a/arkindex/documents/management/commands/build_fixtures.py b/arkindex/documents/management/commands/build_fixtures.py index 18e76690ad..a643b54929 100644 --- a/arkindex/documents/management/commands/build_fixtures.py +++ b/arkindex/documents/management/commands/build_fixtures.py @@ -7,11 +7,10 @@ from django.core.management.base import BaseCommand from django.utils import timezone as DjangoTimeZone from arkindex.dataimport.models import RepositoryType, WorkerVersion, WorkerVersionState, Workflow -from arkindex.documents.models import Corpus, DataSource, Element, MetaData +from arkindex.documents.models import Corpus, Element, MetaData from arkindex.images.models import Image, ImageServer, Zone from arkindex.users.models import CorpusRight, Group, User from arkindex_common.enums import MetaType -from arkindex_common.ml_tool import MLToolType from ponos.models import State @@ -90,24 +89,6 @@ class Command(BaseCommand): level=10 ) - # Create 1 data source for transcriptions - recognizer_source = DataSource.objects.create( - type=MLToolType.Recognizer, - slug='test', - name='Test Recognizer', - revision='4.2', - internal=False, - ) - - # Create 1 data source for classifications - DataSource.objects.create( - type=MLToolType.Classifier, - slug='test', - name='Test Classifier', - revision='5.1', - internal=False, - ) - # Create OAuth credentials for a user creds = user.credentials.create( provider_name='gitlab', @@ -115,6 +96,50 @@ class Command(BaseCommand): token='oauth-token', ) + # Create a worker repository + worker_repo = creds.repos.create( + type=RepositoryType.Worker, + url="http://my_repo.fake/workers/worker", + hook_token='worker-hook-token', + provider_name='GitLabProvider' + ) + + # Create a revision on this repository + revision = worker_repo.revisions.create( + hash="1337", + message="My w0rk3r", + author="Test user" + ) + + # Create a fake docker build with a docker image task + workflow = Workflow.objects.create(recipe='tasks:\n docker_build:\n image: reco') + build_task = workflow.tasks.create(run=0, depth=0, slug='docker_build', state=State.Completed) + docker_image = build_task.artifacts.create(size=42_000, path='/path/to/docker_build') + + # Create two workers for the repository with their available version + recognizer_worker = WorkerVersion.objects.create( + worker=worker_repo.workers.create( + name='Recognizer', + slug='reco', + type='recognizer', + ), + revision=revision, + configuration={'test': 42}, + state=WorkerVersionState.Available, + docker_image=docker_image + ) + WorkerVersion.objects.create( + worker=worker_repo.workers.create( + name='Document layout analyser', + slug='dla', + type='dla', + ), + revision=revision, + configuration={'test': 42}, + state=WorkerVersionState.Available, + docker_image=docker_image + ) + # Create a IIIF repository repo = creds.repos.create( url='http://gitlab/repo', @@ -245,14 +270,14 @@ class Command(BaseCommand): ) element.add_parent(page) element.transcriptions.create( - source=recognizer_source, + worker_version=recognizer_worker, text=word, score=1.0, ) # Create a page transcription on page 1 p1_1.transcriptions.create( - source=recognizer_source, + worker_version=recognizer_worker, text='Lorem ipsum dolor sit amet', score=1.0, ) @@ -328,47 +353,3 @@ class Command(BaseCommand): sd.add_parent(act3) se.add_parent(act4) sf.add_parent(act5) - - # Create a worker repository - worker_repo = creds.repos.create( - type=RepositoryType.Worker, - url="http://my_repo.fake/workers/worker", - hook_token='worker-hook-token', - provider_name='GitLabProvider' - ) - - # Create a revision on this repository - revision = worker_repo.revisions.create( - hash="1337", - message="My w0rk3r", - author="Test user" - ) - - # Create a fake docker build with a docker image task - workflow = Workflow.objects.create(recipe='tasks:\n docker_build:\n image: reco') - build_task = workflow.tasks.create(run=0, depth=0, slug='docker_build', state=State.Completed) - docker_image = build_task.artifacts.create(size=42_000, path='/path/to/docker_build') - - # Create two workers for the repository with their available version - WorkerVersion.objects.create( - worker=worker_repo.workers.create( - name='Recognizer', - slug='reco', - type='recognizer' - ), - revision=revision, - configuration={'test': 42}, - state=WorkerVersionState.Available, - docker_image=docker_image - ) - WorkerVersion.objects.create( - worker=worker_repo.workers.create( - name='Document layout analyser', - slug='dla', - type='dla' - ), - revision=revision, - configuration={'test': 42}, - state=WorkerVersionState.Available, - docker_image=docker_image - ) diff --git a/arkindex/documents/migrations/0001_initial.py b/arkindex/documents/migrations/0001_initial.py index 81c04e49b9..3e3e3aaac5 100644 --- a/arkindex/documents/migrations/0001_initial.py +++ b/arkindex/documents/migrations/0001_initial.py @@ -6,12 +6,12 @@ import django.contrib.postgres.fields.hstore import django.db.models.deletion import enumfields.fields from django.db import migrations, models +from enumfields import Enum import arkindex.documents.dates import arkindex.documents.models import arkindex.project.fields import arkindex_common.enums -import arkindex_common.ml_tool class Migration(migrations.Migration): @@ -66,7 +66,7 @@ class Migration(migrations.Migration): name='DataSource', fields=[ ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), - ('type', enumfields.fields.EnumField(enum=arkindex_common.ml_tool.MLToolType, max_length=10)), + ('type', enumfields.fields.EnumField(enum=Enum('MLToolType', ''), max_length=10)), ('slug', models.CharField(max_length=100)), ('name', models.CharField(max_length=100)), ('revision', models.CharField(max_length=100)), diff --git a/arkindex/documents/migrations/0013_datasource_type_length.py b/arkindex/documents/migrations/0013_datasource_type_length.py index 93ff7fdd96..d9b10a58b7 100644 --- a/arkindex/documents/migrations/0013_datasource_type_length.py +++ b/arkindex/documents/migrations/0013_datasource_type_length.py @@ -1,10 +1,10 @@ # Generated by Django 2.2.10 on 2020-05-19 14:59 +from enum import Enum + import enumfields.fields from django.db import migrations -import arkindex_common.ml_tool - class Migration(migrations.Migration): @@ -16,6 +16,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='datasource', name='type', - field=enumfields.fields.EnumField(enum=arkindex_common.ml_tool.MLToolType, max_length=50), + field=enumfields.fields.EnumField(enum=Enum('MLToolType', ''), max_length=50), ), ] diff --git a/arkindex/documents/migrations/0024_migrate_datasource.py b/arkindex/documents/migrations/0024_migrate_datasource.py new file mode 100644 index 0000000000..b164ee3621 --- /dev/null +++ b/arkindex/documents/migrations/0024_migrate_datasource.py @@ -0,0 +1,113 @@ +# Generated by Django 3.1.3 on 2020-11-30 10:40 + +import os +import uuid + +from django.db import migrations, models + +from arkindex.dataimport.models import RepositoryType + + +def migrate_data_sources(apps, schema_editor): + DataSource = apps.get_model('documents', 'DataSource') + if not DataSource.objects.exists(): + return + + repo_prefix = os.environ.get('REPOSITORY_PREFIX', 'https://gitlab.com/teklia/workers') + + Element = apps.get_model('documents', 'Element') + Classification = apps.get_model('documents', 'Classification') + Transcription = apps.get_model('documents', 'Transcription') + Entity = apps.get_model('documents', 'Entity') + Repository = apps.get_model('dataimport', 'Repository') + Worker = apps.get_model('dataimport', 'Worker') + + # Start by just removing the manual source, since a manual worker version is just None + print('Migrating manual sources…') + Element.objects.filter(source__slug='manual').update(source=None) + Classification.objects.filter(source__slug='manual').update(source=None) + Transcription.objects.filter(source__slug='manual').update(source=None) + Entity.objects.filter(source__slug='manual').update(source=None) + + # Only migrate sources that have related objects + to_migrate = DataSource.objects.filter( + id__in=Element.objects.values('source_id').union( + Classification.objects.values('source_id') + ).union( + Transcription.objects.values('source_id') + ).union( + Entity.objects.values('source_id') + ) + ) + for source in to_migrate: + print(f'Migrating {source.name} {source.revision} ({source.id})…') + repo, _ = Repository.objects.get_or_create( + type=RepositoryType.Worker, + url=f'{repo_prefix}/{source.slug}', + defaults={ + # This is supposed to be unique + 'hook_token': str(source.id), + }, + ) + worker, _ = Worker.objects.get_or_create( + repository=repo, + slug=source.slug, + defaults={ + 'name': source.name, + 'type': str(source.type), + } + ) + + # To ensure we cannot get duplicate worker version IDs when re-applying + # unique constraints later, we just keep on trying to get a new revision. + created = False + while not created: + revision, created = repo.revisions.get_or_create( + hash=uuid.uuid4().hex, + message='Migrated DataSource', + author='Arkindex', + ) + + version = worker.workerversion_set.create( + revision=revision, + configuration={}, + ) + + source.elements.update(source=None, worker_version=version) + source.classifications.update(source=None, worker_version=version) + source.transcriptions.update(source=None, worker_version=version) + source.entities.update(source=None, worker_version=version) + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0023_remove_transcription_type'), + ] + + operations = [ + migrations.AlterField( + model_name='DataSource', + name='type', + field=models.CharField(max_length=50), + ), + migrations.RemoveConstraint( + model_name='classification', + name='classification_unique_manual', + ), + migrations.RemoveConstraint( + model_name='classification', + name='classification_unique_worker_version', + ), + migrations.RemoveConstraint( + model_name='transcription', + name='transcription_source_not_worker_version', + ), + migrations.RunPython( + code=migrate_data_sources, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + # Deletion happens in another migration, since updating data then trying to update + # the structure causes errors with 'pending trigger events' + ] diff --git a/arkindex/documents/migrations/0025_drop_datasource.py b/arkindex/documents/migrations/0025_drop_datasource.py new file mode 100644 index 0000000000..2f0bd0c090 --- /dev/null +++ b/arkindex/documents/migrations/0025_drop_datasource.py @@ -0,0 +1,52 @@ +# Generated by Django 3.1.3 on 2020-12-10 13:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0024_migrate_datasource'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='datasource', + unique_together=None, + ), + migrations.RemoveField( + model_name='classification', + name='source', + ), + migrations.RemoveField( + model_name='element', + name='source', + ), + migrations.RemoveField( + model_name='entity', + name='source', + ), + migrations.RemoveField( + model_name='transcription', + name='source', + ), + migrations.AddConstraint( + model_name='classification', + constraint=models.UniqueConstraint( + condition=models.Q(worker_version_id__isnull=True), + fields=('element', 'ml_class'), + name='classification_unique_manual', + ), + ), + migrations.AddConstraint( + model_name='classification', + constraint=models.UniqueConstraint( + condition=models.Q(worker_version_id__isnull=False), + fields=('element', 'ml_class', 'worker_version'), + name='classification_unique_worker_version', + ), + ), + migrations.DeleteModel( + name='DataSource', + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 599cf9e219..2f9e3cd5c1 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -19,7 +19,6 @@ from arkindex.project.elastic import ESElement, ESEntity, ESTranscription from arkindex.project.fields import ArrayField from arkindex.project.models import IndexableModel from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType logger = logging.getLogger(__name__) @@ -148,13 +147,6 @@ class Element(IndexableModel): null=True, blank=True, ) - source = models.ForeignKey( - 'documents.DataSource', - on_delete=models.SET_NULL, - related_name='elements', - null=True, - blank=True, - ) worker_version = models.ForeignKey( 'dataimport.WorkerVersion', on_delete=models.SET_NULL, @@ -302,23 +294,6 @@ class Element(IndexableModel): return '{}: {}'.format(self.type.display_name, self.name) -class DataSource(models.Model): - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - type = EnumField(MLToolType, max_length=50) - slug = models.CharField(max_length=100) - name = models.CharField(max_length=100) - revision = models.CharField(max_length=100) - internal = models.BooleanField() - - class Meta: - unique_together = ( - ('type', 'slug', 'revision'), - ) - - def __str__(self): - return '{} {}'.format(self.slug, self.revision) - - class Entity(InterpretedDateMixin, models.Model): """ Semantic object in arkindex @@ -338,13 +313,6 @@ class Entity(InterpretedDateMixin, models.Model): null=True, blank=True ) - source = models.ForeignKey( - DataSource, - on_delete=models.CASCADE, - related_name='entities', - null=True, - blank=True, - ) worker_version = models.ForeignKey( 'dataimport.WorkerVersion', on_delete=models.CASCADE, @@ -436,13 +404,6 @@ class Transcription(models.Model): on_delete=models.CASCADE, related_name='transcriptions', ) - source = models.ForeignKey( - DataSource, - on_delete=models.CASCADE, - related_name='transcriptions', - null=True, - blank=True, - ) worker_version = models.ForeignKey( 'dataimport.WorkerVersion', on_delete=models.CASCADE, @@ -461,15 +422,6 @@ class Transcription(models.Model): def __str__(self): return 'Transcription: {}'.format(self.text[:20]) - class Meta: - constraints = [ - # Require either a source, a worker version, or none (manual), but not both at once - models.CheckConstraint( - check=~Q(source_id__isnull=False, worker_version_id__isnull=False), - name='transcription_source_not_worker_version', - ) - ] - class TranscriptionEntity(models.Model): """ @@ -536,13 +488,6 @@ class Classification(models.Model): on_delete=models.CASCADE, related_name='classifications', ) - source = models.ForeignKey( - DataSource, - on_delete=models.CASCADE, - related_name='classifications', - null=True, - blank=True, - ) worker_version = models.ForeignKey( 'dataimport.WorkerVersion', on_delete=models.CASCADE, @@ -573,12 +518,12 @@ class Classification(models.Model): models.UniqueConstraint( fields=['element', 'ml_class'], name='classification_unique_manual', - condition=Q(worker_version_id__isnull=True, source_id__isnull=True), + condition=Q(worker_version_id__isnull=True), ), models.UniqueConstraint( fields=['element', 'ml_class', 'worker_version'], name='classification_unique_worker_version', - condition=Q(worker_version_id__isnull=False, source_id__isnull=True), + condition=Q(worker_version_id__isnull=False), ) ] diff --git a/arkindex/documents/search.py b/arkindex/documents/search.py index b96cb85edc..a905f2dabc 100644 --- a/arkindex/documents/search.py +++ b/arkindex/documents/search.py @@ -15,7 +15,7 @@ def search_transcriptions_post(data): ts = Transcription.objects \ .filter(id__in=transcription_ids) \ .order_by('-score') \ - .prefetch_related('element__zone__image__server', 'source') + .select_related('element__zone__image__server', 'worker_version') element_ids = list(ts.values_list('element_id', flat=True)) all_parent_paths = Element.objects.get_ascendings_paths(*element_ids) for trans in ts: @@ -65,7 +65,7 @@ def search_elements_post(data): transcriptions = { t.id: t - for t in Transcription.objects.filter(id__in=tr_ids).prefetch_related('source') + for t in Transcription.objects.filter(id__in=tr_ids).select_related('worker_version') } elts_tr_ids = { diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index 89897319fd..31f2dcf8a2 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -24,7 +24,7 @@ from arkindex.documents.serializers.light import ( ElementTypeLightSerializer, MetaDataLightSerializer, ) -from arkindex.documents.serializers.ml import ClassificationSerializer, DataSourceSerializer +from arkindex.documents.serializers.ml import ClassificationSerializer from arkindex.images.models import Image, Zone from arkindex.images.serializers import ZoneSerializer from arkindex.project.serializer_fields import LinearRingField @@ -273,7 +273,6 @@ class ElementSerializer(ElementSlimSerializer): help_text='Set the polygon linking this element to the image. ' '`image` must be set when this field is set and there was no image or polygon defined before.', ) - source = DataSourceSerializer(read_only=True, required=False) class Meta: model = Element @@ -282,13 +281,11 @@ class ElementSerializer(ElementSlimSerializer): 'classifications', 'image', 'polygon', - 'source', 'worker_version' ) read_only_fields = ElementSlimSerializer.Meta.read_only_fields + ( 'metadata', 'classifications', - 'source', 'worker_version' ) diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index 8deca6f31a..932cdf7550 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -3,7 +3,6 @@ from rest_framework import serializers from arkindex.dataimport.models import WorkerVersion from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, TranscriptionEntity from arkindex.documents.serializers.light import CorpusLightSerializer, InterpretedDateSerializer -from arkindex.documents.serializers.ml import DataSourceSerializer from arkindex.project.serializer_fields import EnumField from arkindex.project.triggers import reindex_start from arkindex_common.enums import EntityType @@ -16,7 +15,6 @@ class BaseEntitySerializer(serializers.ModelSerializer): type = EnumField(EntityType) dates = InterpretedDateSerializer(many=True, source='get_dates', read_only=True) metas = serializers.HStoreField(child=serializers.CharField(), required=False, allow_null=True) - source = DataSourceSerializer(read_only=True) worker_version_id = serializers.PrimaryKeyRelatedField(read_only=True) class Meta: @@ -28,13 +26,11 @@ class BaseEntitySerializer(serializers.ModelSerializer): 'metas', 'validated', 'dates', - 'source', 'worker_version_id', ) read_only_fields = ( 'id', 'dates', - 'source', 'worker_version_id', ) diff --git a/arkindex/documents/serializers/ml.py b/arkindex/documents/serializers/ml.py index d3e4326665..dfabfdac86 100644 --- a/arkindex/documents/serializers/ml.py +++ b/arkindex/documents/serializers/ml.py @@ -11,7 +11,6 @@ from arkindex.documents.models import ( Classification, ClassificationState, Corpus, - DataSource, Element, ElementType, MLClass, @@ -19,7 +18,6 @@ from arkindex.documents.models import ( ) from arkindex.documents.serializers.light import ElementZoneSerializer from arkindex.project.serializer_fields import EnumField, LinearRingField -from arkindex_common.ml_tool import MLToolType class ClassificationMode(Enum): @@ -30,41 +28,6 @@ class ClassificationMode(Enum): Create = "create" -class DataSourceSerializer(serializers.ModelSerializer): - """ - Serialize a data source for transcriptions and classifications - """ - - type = EnumField(MLToolType) - - class Meta: - model = DataSource - fields = ( - 'id', - 'type', - 'slug', - 'name', - 'revision', - 'internal', - ) - - -class DataSourceStatsSerializer(DataSourceSerializer): - """ - A data source, but including their result counts - """ - classifications_count = serializers.IntegerField(default=0) - transcriptions_count = serializers.IntegerField(default=0) - entities_count = serializers.IntegerField(default=0) - - class Meta(DataSourceSerializer.Meta): - fields = DataSourceSerializer.Meta.fields + ( - 'classifications_count', - 'transcriptions_count', - 'entities_count', - ) - - class MLClassSerializer(serializers.ModelSerializer): """ Serializer for MLClass instances @@ -98,7 +61,6 @@ class ClassificationSerializer(serializers.ModelSerializer): Serialize a classification on an Element """ - source = DataSourceSerializer() ml_class = MLClassSerializer() state = EnumField(ClassificationState) @@ -107,7 +69,6 @@ class ClassificationSerializer(serializers.ModelSerializer): read_only_fields = ('id', 'confidence', 'high_confidence') fields = ( 'id', - 'source', 'ml_class', 'state', 'confidence', @@ -153,11 +114,11 @@ class ClassificationCreateSerializer(serializers.ModelSerializer): read_only_fields = ('id', 'state') validators = [ UniqueTogetherValidator( - queryset=Classification.objects.using('default').filter(worker_version__isnull=False, source_id__isnull=True), + queryset=Classification.objects.using('default').filter(worker_version__isnull=False), fields=['element', 'worker_version', 'ml_class'] ), UniqueTogetherValidator( - queryset=Classification.objects.using('default').filter(worker_version__isnull=True, source_id__isnull=True), + queryset=Classification.objects.using('default').filter(worker_version__isnull=True), fields=['element', 'ml_class'] ) ] @@ -239,18 +200,16 @@ class ClassificationsSelectionSerializer(serializers.ModelSerializer): class TranscriptionSerializer(serializers.ModelSerializer): """ - Serialises a Transcription + Serializes a Transcription """ - source = DataSourceSerializer(read_only=True) class Meta: model = Transcription - read_only_fields = ('id', 'score', 'source') + read_only_fields = ('id', 'score') fields = ( 'id', 'text', 'score', - 'source', 'worker_version_id', ) diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index 599cb6af67..064afbb36a 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -1,9 +1,7 @@ import logging -from math import ceil from typing import Optional from django.db.models import Q -from django.db.models.deletion import Collector from django_rq import job from rq import get_current_job @@ -26,37 +24,6 @@ from arkindex.documents.models import ( logger = logging.getLogger(__name__) -def _delete_queryset(queryset, batch_size=1000): - """ - Helper to delete large querysets with as little SQL queries and memory footprint as possible. - """ - count = queryset.count() - logger.info('Deleting {} {}'.format(count, queryset.model.__name__)) - - if not count: - return - - if Collector(using=queryset.db).can_fast_delete(queryset.all()): - # If a single DELETE statement can be used, - # bypass both the batched deletion and Django's related objects checks. - logger.debug('Using single-query deletion') - queryset._raw_delete(using=queryset.db) - return - - if count <= batch_size: - # If there is a single batch, just delete. - queryset.delete() - return - - for i in range(ceil(count / batch_size)): - logger.debug('Deleting batch {}'.format(i + 1)) - # Deleting a slice is not allowed; - # we use a sliced subquery instead and still delete in a single query. - # DELETE FROM … WHERE id IN (SELECT id FROM … LIMIT [batch_size]) - ids = queryset[:batch_size].values('id') - queryset.model.objects.filter(id__in=ids).delete() - - @job def reindex_start(corpus_id: Optional[str] = None, element_id: Optional[str] = None, @@ -110,51 +77,6 @@ def reindex_start(corpus_id: Optional[str] = None, indexer.run_index(entities_queryset, bulk_size=400) -@job('high') -def ml_results_delete(corpus_id: Optional[str] = None, - element_id: Optional[str] = None, - batch_size: int = 1000) -> None: - assert corpus_id or element_id, 'Missing element or corpus IDs' - - if element_id: - logger.info('Deleting ML results on element {}'.format(element_id)) - element = Element.objects.get(id=element_id) - if element.type.folder: - # The folder AND its children - elements = Element.objects.filter(id=element_id).values('id').union( - # Disable ordering here because we do not need it and it adds an extra column, - # causing the UNION to fail - Element.objects.get_descending(element_id).order_by().values('id') - ) - else: - elements = [element] - - if not corpus_id: - # The corpus ID is still used in some deletions; deduce it from the element. - corpus_id = Element.objects.get(id=element_id).corpus_id - elif corpus_id: - logger.info('Deleting ML results on corpus {}'.format(corpus_id)) - elements = Element.objects.filter(corpus_id=corpus_id) - - # Simple deletions for classifications and transcriptions. - _delete_queryset(Classification.objects.filter(element__in=elements).exclude(source__slug='manual'), batch_size) - _delete_queryset(Transcription.objects.filter(element__in=elements).exclude(source__slug='manual'), batch_size) - - # Entity deletion is complex: they can be linked to different elements both on transcriptions and metadata. - # Metadata are not considered ML results so we need to keep them: update them to unlink entities. - logger.info('Updating element metadata') - MetaData.objects.filter(element__in=elements).update(entity_id=None) - - # We removed transcriptions earlier, which implies removing the links with entities. - # All is left is to remove 'lonely' entities. - # Note: __isnull's implementation will fetch all element IDs into a list before deleting—use batches! - _delete_queryset(Entity.objects.filter( - corpus_id=corpus_id, - metadatas__isnull=True, - transcriptions__isnull=True, - ), batch_size) - - @job('high') def corpus_delete(corpus_id: str) -> None: # Note that this can be None when the task is run outside of a RQ worker (e.g. unit test) diff --git a/arkindex/documents/tests/commands/test_reindex.py b/arkindex/documents/tests/commands/test_reindex.py index 0531056f53..ff2d90d73e 100644 --- a/arkindex/documents/tests/commands/test_reindex.py +++ b/arkindex/documents/tests/commands/test_reindex.py @@ -3,7 +3,8 @@ from unittest.mock import call, patch from django.core.management import CommandError, call_command from django.test import override_settings -from arkindex.documents.models import DataSource, Element, Entity, EntityType, MLToolType, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Element, Entity, EntityType, Transcription from arkindex.project.elastic import ESElement, ESEntity, ESTranscription from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import MetaType @@ -14,10 +15,10 @@ class TestReindexCommand(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=True) cls.indexer_patch = patch('arkindex.documents.management.commands.reindex.Indexer') cls.vol = cls.corpus.elements.get(name="Volume 1") - cls.entity = cls.corpus.entities.create(type=EntityType.Misc, name='Dummy entity', source=source) + worker_version = WorkerVersion.objects.first() + cls.entity = cls.corpus.entities.create(type=EntityType.Misc, name='Dummy entity', worker_version=worker_version) page = cls.corpus.elements.get(name='Volume 1, page 1r') page.metadatas.create(name='Dummy metadata', value='Dummy', type=MetaType.Text, entity=cls.entity) diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 53c2cd99c5..db15d2c640 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -1,11 +1,10 @@ from django.db.models.signals import pre_delete from arkindex.dataimport.models import Repository, RepositoryType, WorkerVersion -from arkindex.documents.models import Corpus, DataSource, Element, Transcription +from arkindex.documents.models import Corpus, Element, Transcription from arkindex.documents.tasks import corpus_delete from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import DataImportMode, EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestDeleteCorpus(FixtureTestCase): @@ -32,12 +31,14 @@ class TestDeleteCorpus(FixtureTestCase): ) file_import.build_workflow() + cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + element_import = cls.corpus.imports.create( creator=cls.user, mode=DataImportMode.Workers, ) element_import.elements.add(element) - element_import.worker_runs.create(version=WorkerVersion.objects.first(), parents=[]) + element_import.worker_runs.create(version=cls.worker_version, parents=[]) entity1 = cls.corpus.entities.create(name='Magnemite', type=EntityType.Person) entity2 = cls.corpus.entities.create(name='Magneton', type=EntityType.Person) @@ -98,24 +99,14 @@ class TestDeleteCorpus(FixtureTestCase): name='A page', ) cls.page.classifications.create( - source=DataSource.objects.create( - type=MLToolType.Classifier, - slug='classeur', - revision='Early Access', - internal=False, - ), + worker_version=cls.worker_version, ml_class=cls.corpus2.ml_classes.create( name='klass', ), confidence=0.5, ) cls.page.transcriptions.create( - source=DataSource.objects.create( - type=MLToolType.Recognizer, - slug='reco', - revision='-1', - internal=False, - ), + worker_version=cls.worker_version, text='hi', score=0.75, ) @@ -159,11 +150,11 @@ class TestDeleteCorpus(FixtureTestCase): self.assertEqual(md.value, 'data') cl = self.page.classifications.get() - self.assertEqual(cl.source.slug, 'classeur') + self.assertEqual(cl.worker_version, self.worker_version) self.assertEqual(cl.ml_class.name, 'klass') self.assertEqual(cl.confidence, 0.5) ts = self.page.transcriptions.get() - self.assertEqual(ts.source.slug, 'reco') + self.assertEqual(ts.worker_version, self.worker_version) self.assertEqual(ts.text, 'hi') self.assertEqual(ts.score, 0.75) diff --git a/arkindex/documents/tests/tasks/test_ml_results_delete.py b/arkindex/documents/tests/tasks/test_ml_results_delete.py deleted file mode 100644 index 0c7a8b57ce..0000000000 --- a/arkindex/documents/tests/tasks/test_ml_results_delete.py +++ /dev/null @@ -1,169 +0,0 @@ -from arkindex.documents.models import Classification, DataSource, Element, Entity, Transcription -from arkindex.documents.tasks import ml_results_delete -from arkindex.project.tests import FixtureTestCase -from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType - - -class TestMLResultsDelete(FixtureTestCase): - - @classmethod - def setUpTestData(cls): - super().setUpTestData() - cls.folder1 = cls.corpus.elements.get(name='Volume 1') - cls.folder2 = cls.corpus.elements.get(name='Volume 2') - - source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) - ml_class = cls.corpus.ml_classes.create(name='Some class') - - cls.page1 = cls.corpus.elements.get(name='Volume 1, page 1r') - cls.page1.classifications.create( - ml_class=ml_class, - confidence=0.42, - source=source, - ) - entity1 = cls.corpus.entities.create( - type=EntityType.Person, - name='Some entity 1', - source=source, - ) - cls.page1.metadatas.create( - type=MetaType.Text, - name='something', - value='Some entity 1', - entity=entity1, - ) - cls.page1.transcriptions.first().transcription_entities.create( - offset=0, - length=1, - entity=entity1, - ) - - cls.page2 = cls.corpus.elements.get(name='Volume 2, page 1r') - cls.page2.classifications.create( - ml_class=ml_class, - confidence=0.42, - source=source, - ) - entity2 = cls.corpus.entities.create( - type=EntityType.Person, - name='Some entity 2', - source=source, - ) - cls.page2.metadatas.create( - type=MetaType.Text, - name='something', - value='Some entity 2', - entity=entity2, - ) - cls.page2.transcriptions.create( - score=0.74, - text='some text', - source=source, - ).transcription_entities.create( - offset=0, - length=1, - entity=entity2, - ) - - def _get_querysets(self, elements): - return [ - Transcription.objects.filter(element__in=elements), - Entity.objects.filter(transcriptions__element__in=elements), - Entity.objects.filter(metadatas__element__in=elements), - Classification.objects.filter(element__in=elements), - ] - - def test_delete_missing_parameters(self): - with self.assertRaises(AssertionError): - ml_results_delete() - - def test_delete_corpus(self): - querysets = self._get_querysets(self.corpus.elements.all()) - - for queryset in querysets: - self.assertTrue(queryset.exists()) - - self.assertEqual(self.page1.metadatas.count(), 2) - self.assertEqual(self.page2.metadatas.count(), 2) - - with self.assertNumQueries(13): - ml_results_delete(corpus_id=self.corpus.id) - - for queryset in querysets: - self.assertFalse(queryset.exists()) - - self.assertEqual(self.page1.metadatas.count(), 2) - self.assertEqual(self.page2.metadatas.count(), 2) - metadata = self.page1.metadatas.get(value='Some entity 1') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNone(metadata.entity) - metadata = self.page2.metadatas.get(value='Some entity 2') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNone(metadata.entity) - - def test_delete_folder(self): - folder1_querysets = self._get_querysets( - Element.objects.filter(id=self.folder1.id).values('id').union( - Element.objects.get_descending(self.folder1.id).order_by().values('id') - ) - ) - folder2_querysets = self._get_querysets( - Element.objects.filter(id=self.folder2.id).values('id').union( - Element.objects.get_descending(self.folder2.id).order_by().values('id') - ) - ) - - for queryset in folder1_querysets: - self.assertTrue(queryset.exists()) - for queryset in folder2_querysets: - self.assertTrue(queryset.exists()) - - with self.assertNumQueries(16): - ml_results_delete(element_id=self.folder1.id) - - for queryset in folder1_querysets: - self.assertFalse(queryset.exists()) - for queryset in folder2_querysets: # This folder should not change - self.assertTrue(queryset.exists()) - - self.assertEqual(self.page1.metadatas.count(), 2) - self.assertEqual(self.page2.metadatas.count(), 2) - metadata = self.page1.metadatas.get(value='Some entity 1') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNone(metadata.entity) - metadata = self.page2.metadatas.get(value='Some entity 2') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNotNone(metadata.entity) - - def test_delete_page(self): - page1_querysets = self._get_querysets( - Element.objects.filter(id=self.page1.id).values('id') - ) - folder2_querysets = self._get_querysets( - Element.objects.filter(id=self.folder2.id).values('id').union( - Element.objects.get_descending(self.folder2.id).order_by().values('id') - ) - ) - - for queryset in page1_querysets: - self.assertTrue(queryset.exists()) - for queryset in folder2_querysets: - self.assertTrue(queryset.exists()) - - with self.assertNumQueries(16): - ml_results_delete(element_id=self.page1.id) - - for queryset in page1_querysets: - self.assertFalse(queryset.exists()) - for queryset in folder2_querysets: # This folder should not change - self.assertTrue(queryset.exists()) - - self.assertEqual(self.page1.metadatas.count(), 2) - self.assertEqual(self.page2.metadatas.count(), 2) - metadata = self.page1.metadatas.get(value='Some entity 1') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNone(metadata.entity) - metadata = self.page2.metadatas.get(value='Some entity 2') - self.assertEqual(metadata.type, MetaType.Text) - self.assertIsNotNone(metadata.entity) diff --git a/arkindex/documents/tests/tasks/test_reindex.py b/arkindex/documents/tests/tasks/test_reindex.py index 014d09795e..0f5f3fccfe 100644 --- a/arkindex/documents/tests/tasks/test_reindex.py +++ b/arkindex/documents/tests/tasks/test_reindex.py @@ -3,11 +3,11 @@ from unittest.mock import patch from django.contrib.gis.geos import LinearRing from django.db.models import Q -from arkindex.documents.models import Corpus, DataSource, Element, Entity, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Element, Entity, Transcription from arkindex.documents.tasks import reindex_start from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType @patch('arkindex.documents.tasks.Indexer') @@ -16,8 +16,7 @@ class TestReindex(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) - + cls.worker_version = WorkerVersion.objects.first() cls.folder = cls.corpus.elements.get(name='Volume 1') cls.folder.metadatas.create( type=MetaType.Text, @@ -26,7 +25,7 @@ class TestReindex(FixtureTestCase): entity=cls.corpus.entities.create( type=EntityType.Person, name='Some entity', - source=source, + worker_version=cls.worker_version, ) ) @@ -41,13 +40,13 @@ class TestReindex(FixtureTestCase): ts = element2.transcriptions.create( score=0.8, text='something', - source=source, + worker_version=cls.worker_version, ) ts.transcription_entities.create( entity=corpus2.entities.create( type=EntityType.Misc, name='Some other entity', - source=source, + worker_version=cls.worker_version, ), offset=0, length=1, diff --git a/arkindex/documents/tests/test_bulk_classification.py b/arkindex/documents/tests/test_bulk_classification.py index 1ab1261240..e834fe55d9 100644 --- a/arkindex/documents/tests/test_bulk_classification.py +++ b/arkindex/documents/tests/test_bulk_classification.py @@ -2,9 +2,8 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource, MLClass +from arkindex.documents.models import Corpus, MLClass from arkindex.project.tests import FixtureAPITestCase -from arkindex_common.ml_tool import MLToolType class TestBulkClassification(FixtureAPITestCase): @@ -13,22 +12,9 @@ class TestBulkClassification(FixtureAPITestCase): def setUpTestData(cls): super().setUpTestData() cls.page = cls.corpus.elements.get(name='Volume 1, page 2r') - cls.src = DataSource.objects.get(slug='test', type=MLToolType.Classifier) cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.src.internal = True - cls.src.save() - - @classmethod - def tearDownClass(cls): - super().tearDownClass() - cls.src.internal = False - cls.src.save() - def create_classifications_data(self, classifications, parent=None): return { "parent": parent or str(self.page.id), @@ -90,12 +76,11 @@ class TestBulkClassification(FixtureAPITestCase): 'ml_class__name', 'confidence', 'high_confidence', - 'source', 'worker_version' )), [ - ('dog', 0.99, True, None, self.worker_version.id), - ('cat', 0.42, False, None, self.worker_version.id) + ('dog', 0.99, True, self.worker_version.id), + ('cat', 0.42, False, self.worker_version.id) ], ) diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index b07e965e4d..71a2abb020 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -75,10 +75,10 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ] ) self.assertCountEqual( - created_elts.values_list('transcriptions__text', 'transcriptions__source', 'transcriptions__worker_version'), + created_elts.values_list('transcriptions__text', 'transcriptions__worker_version'), [ - ('Hello world !', None, self.worker_version.id), - ('I <3 JavaScript', None, self.worker_version.id) + ('Hello world !', self.worker_version.id), + ('I <3 JavaScript', self.worker_version.id) ] ) self.assertEqual(delay_mock.call_count, 1) @@ -359,10 +359,10 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ] ) self.assertCountEqual( - created_elts.values_list('transcriptions__text', 'transcriptions__source', 'transcriptions__worker_version'), + created_elts.values_list('transcriptions__text', 'transcriptions__worker_version'), [ - ('Hello world !', None, self.worker_version.id), - ('I <3 JavaScript', None, self.worker_version.id) + ('Hello world !', self.worker_version.id), + ('I <3 JavaScript', self.worker_version.id) ] ) self.assertEqual(delay_mock.call_count, 1) diff --git a/arkindex/documents/tests/test_classes.py b/arkindex/documents/tests/test_classes.py index 9f6bfa282a..8b731dc2a3 100644 --- a/arkindex/documents/tests/test_classes.py +++ b/arkindex/documents/tests/test_classes.py @@ -2,9 +2,9 @@ from django.test import override_settings from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Classification, ClassificationState, Corpus, DataSource, Element, MLClass +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase -from arkindex_common.ml_tool import MLToolType class TestClasses(FixtureAPITestCase): @@ -21,14 +21,8 @@ class TestClasses(FixtureAPITestCase): self.parent = self.corpus.elements.create(type=self.folder_type) self.common_children = self.corpus.elements.create(type=self.folder_type) - source1 = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) - source2 = DataSource.objects.create( - type=MLToolType.Classifier, - slug='source2', - name='classifier', - revision='123', - internal=False, - ) + self.version1 = WorkerVersion.objects.get(worker__slug='reco') + self.version2 = WorkerVersion.objects.get(worker__slug='dla') for elt_num in range(1, 13): elt = Element.objects.create( name='elt_{}'.format(elt_num), @@ -38,9 +32,9 @@ class TestClasses(FixtureAPITestCase): elt.add_parent(self.parent) self.common_children.add_parent(elt) for ml_class, score in zip((self.text, self.cover), (.7, .99)): - for source in (source1, source2): + for worker_version in (self.version1, self.version2): elt.classifications.create( - source_id=source.id, + worker_version=worker_version, ml_class_id=ml_class.id, confidence=score, high_confidence=bool(score == .99) @@ -245,8 +239,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_list_elements_best_classes_false(self): @@ -275,8 +269,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_element_children_best_classes(self): @@ -291,8 +285,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_rejected_best_classes(self): @@ -330,13 +324,18 @@ class TestClasses(FixtureAPITestCase): for elt in response.json()['results']: if elt['id'] == str(parent.id): self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99), ('source2', .7), ('test', .7)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (str(self.version1.id), .99), + (str(self.version2.id), .99), + (str(self.version1.id), .7), + (str(self.version2.id), .7), + ] ) continue self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_rejected_human_class(self): @@ -344,10 +343,8 @@ class TestClasses(FixtureAPITestCase): A manual classification rejected by a human may not appear in best classes """ self.populate_classified_elements() - data_source, _ = DataSource.objects.get_or_create(type=MLToolType.NER, slug="manual", internal=False) element = Element.objects.filter(type=self.classified.id).first() classif = element.classifications.create( - source_id=data_source.id, ml_class_id=self.text.id, confidence=1, high_confidence=True, @@ -360,13 +357,20 @@ class TestClasses(FixtureAPITestCase): for elt in response.json()['results']: if elt['id'] == str(element.id): self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('manual', 1.0), ('test', .99), ('source2', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (None, 1.0), + (str(self.version1.id), .99), + (str(self.version2.id), .99), + ] ) continue self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (str(self.version1.id), .99), + (str(self.version2.id), .99), + ] ) # Reject the manual classification classif.state = ClassificationState.Rejected @@ -378,16 +382,15 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) for elt in response.json()['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_class_filter_list_elements(self): self.populate_classified_elements() element = Element.objects.filter(type=self.classified.id).first() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, - ml_class_id=self.text.id, + ml_class=self.text, confidence=.1337, high_confidence=True, ) @@ -482,7 +485,7 @@ class TestClasses(FixtureAPITestCase): element = Element.objects.filter(type=self.classified.id).first() element.classifications.all().delete() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, + worker_version=self.version2, ml_class_id=self.text.id, confidence=.1337, high_confidence=True, @@ -507,7 +510,7 @@ class TestClasses(FixtureAPITestCase): element = Element.objects.filter(type=self.classified.id).first() element.classifications.all().delete() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, + worker_version=self.version2, ml_class_id=self.text.id, confidence=.1337, high_confidence=False, diff --git a/arkindex/documents/tests/test_create_elements.py b/arkindex/documents/tests/test_create_elements.py index b1209a6f9a..13f4c6920a 100644 --- a/arkindex/documents/tests/test_create_elements.py +++ b/arkindex/documents/tests/test_create_elements.py @@ -65,7 +65,6 @@ class TestCreateElements(FixtureAPITestCase): 'type': volume.type.slug, 'thumbnail_put_url': None, 'thumbnail_url': volume.thumbnail.s3_url, - 'source': None, 'worker_version': None, 'classifications': [], 'metadata': None, @@ -104,7 +103,6 @@ class TestCreateElements(FixtureAPITestCase): 'type': page.type.slug, 'thumbnail_put_url': None, 'thumbnail_url': None, - 'source': None, 'worker_version': None, 'classifications': [], 'metadata': None, @@ -147,7 +145,6 @@ class TestCreateElements(FixtureAPITestCase): act = Element.objects.get(id=response.json()['id']) self.assertEqual(act.name, 'Castle story') self.assertEqual(act.type, self.act_type) - self.assertEqual(act.source, None) self.assertEqual(act.worker_version, self.worker_version) def test_create_element_polygon(self): @@ -176,7 +173,6 @@ class TestCreateElements(FixtureAPITestCase): 'type': page.type.slug, 'thumbnail_put_url': None, 'thumbnail_url': None, - 'source': None, 'worker_version': None, 'classifications': [], 'metadata': None, diff --git a/arkindex/documents/tests/test_create_transcriptions.py b/arkindex/documents/tests/test_create_transcriptions.py index fcc5f14034..d48b2125a1 100644 --- a/arkindex/documents/tests/test_create_transcriptions.py +++ b/arkindex/documents/tests/test_create_transcriptions.py @@ -6,10 +6,9 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource, Transcription +from arkindex.documents.models import Corpus, Transcription from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestTranscriptionCreate(FixtureAPITestCase): @@ -21,7 +20,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): def setUpTestData(cls): super().setUpTestData() cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') - cls.src = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) cls.line = cls.corpus.elements.filter(type__slug='text_line').first() cls.private_corpus = Corpus.objects.create(name='Private') cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) @@ -77,7 +75,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): 'id': str(tr.id), 'score': None, 'text': 'A perfect day in a perfect place', - 'source': None, 'worker_version_id': None, }) @@ -174,7 +171,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(tr.id), 'score': .42, - 'source': None, 'text': 'NEKUDOTAYIM', 'worker_version_id': str(self.worker_version.id), }) diff --git a/arkindex/documents/tests/test_datasource.py b/arkindex/documents/tests/test_datasource.py deleted file mode 100644 index 2730d7ccaa..0000000000 --- a/arkindex/documents/tests/test_datasource.py +++ /dev/null @@ -1,17 +0,0 @@ -from django.test import TestCase - -from arkindex.documents.models import DataSource -from arkindex_common.ml_tool import MLToolType - - -class TestDataSource(TestCase): - - def test_str(self): - ds = DataSource( - type=MLToolType.Classifier, - slug='something', - name='some classifier', - revision='1.2.3', - internal=False, - ) - self.assertEqual(str(ds), 'something 1.2.3') diff --git a/arkindex/documents/tests/test_edit_transcriptions.py b/arkindex/documents/tests/test_edit_transcriptions.py index 1e5f1bd3f2..15147f17fd 100644 --- a/arkindex/documents/tests/test_edit_transcriptions.py +++ b/arkindex/documents/tests/test_edit_transcriptions.py @@ -3,10 +3,9 @@ from uuid import uuid4 from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Corpus, DataSource, Element, Transcription +from arkindex.documents.models import Corpus, Element, Transcription from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestEditTranscription(FixtureAPITestCase): @@ -35,15 +34,8 @@ class TestEditTranscription(FixtureAPITestCase): cls.corpus.corpus_right.create(user=cls.write_user, can_write=True) def setUp(self): - self.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=False) - self.manual_transcription = self.line.transcriptions.create( - text='A manual transcription', - source=self.manual_source, - ) - self.private_transcription = self.private_page.transcriptions.create( - text='PEPE', - source=self.manual_source - ) + self.manual_transcription = self.line.transcriptions.create(text='A manual transcription') + self.private_transcription = self.private_page.transcriptions.create(text='PEPE') def test_transcription_retrieve(self): """ @@ -53,18 +45,9 @@ class TestEditTranscription(FixtureAPITestCase): response = self.client.get(reverse('api:transcription-edit', kwargs={'pk': self.manual_transcription.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) tr = Transcription.objects.get(id=response.json()['id']) - self.manual_source.refresh_from_db() self.assertDictEqual(response.json(), { 'id': str(tr.id), 'score': None, - 'source': { - 'id': str(self.manual_source.id), - 'internal': False, - 'name': '', - 'revision': '', - 'slug': 'manual', - 'type': 'recognizer' - }, 'text': 'A manual transcription', 'worker_version_id': None, }) @@ -146,7 +129,6 @@ class TestEditTranscription(FixtureAPITestCase): 'score': .42, 'zone': {'polygon': [[4, 2], [2, 4], [3, 3]], 'center': [42, 1337]}, 'image': {'status': 'checked'}, - 'source': 'castle_source', 'element': 'water' } ) @@ -155,14 +137,6 @@ class TestEditTranscription(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(manual_tr_id), 'score': None, - 'source': { - 'id': str(self.manual_source.id), - 'internal': False, - 'name': '', - 'revision': '', - 'slug': 'manual', - 'type': 'recognizer' - }, 'text': 'a knight was living lonely', 'worker_version_id': None, }) diff --git a/arkindex/documents/tests/test_entities.py b/arkindex/documents/tests/test_entities.py index ba6efbf003..1f72233cc5 100644 --- a/arkindex/documents/tests/test_entities.py +++ b/arkindex/documents/tests/test_entities.py @@ -1,6 +1,7 @@ from django.core.exceptions import ValidationError -from arkindex.documents.models import Corpus, DataSource, Entity, EntityLink, EntityRole, MetaData, MLToolType +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, MetaData from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType, MetaType @@ -10,16 +11,21 @@ class TestSaveEntities(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=False) + worker_version = WorkerVersion.objects.first() cls.corpus1 = Corpus.objects.create(name='corpus 1') cls.corpus2 = Corpus.objects.create(name='corpus 2') cls.parent = Entity.objects.create( name='parent', type=EntityType.Organization, corpus=cls.corpus1, - source=source + worker_version=worker_version, + ) + cls.child = Entity.objects.create( + type=EntityType.Person, + corpus=cls.corpus1, + name="child", + worker_version=worker_version, ) - cls.child = Entity.objects.create(type=EntityType.Person, corpus=cls.corpus1, name="child", source=source) cls.role = EntityRole.objects.create( parent_name='organization', child_name='person', diff --git a/arkindex/documents/tests/test_entities_api.py b/arkindex/documents/tests/test_entities_api.py index 3910f54e78..c1e945977c 100644 --- a/arkindex/documents/tests/test_entities_api.py +++ b/arkindex/documents/tests/test_entities_api.py @@ -8,17 +8,7 @@ from elasticsearch.exceptions import NotFoundError from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import ( - Corpus, - DataSource, - Element, - Entity, - EntityLink, - EntityRole, - EntityType, - MLToolType, - TranscriptionEntity, -) +from arkindex.documents.models import Corpus, Element, Entity, EntityLink, EntityRole, EntityType, TranscriptionEntity from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType @@ -28,15 +18,9 @@ class TestEntitiesAPI(FixtureAPITestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - cls.entity_source = DataSource.objects.create( - type=MLToolType.NER, - slug='entity', - name='Test NER', - internal=True, - ) - cls.source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) cls.private_corpus = Corpus.objects.create(name='private') - cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='dla') cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') cls.element_type = cls.corpus.types.get(slug='text_line') @@ -46,13 +30,13 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name='entity 1', - source_id=self.source.id + worker_version=self.worker_version_1, ) self.entity_bis = Entity.objects.create( type=EntityType.Location, corpus=self.corpus, name='entity 2', - source_id=self.source.id + worker_version=self.worker_version_2, ) self.role = EntityRole.objects.create( parent_name="parent", @@ -68,11 +52,11 @@ class TestEntitiesAPI(FixtureAPITestCase): type=self.element_type, name='Transcription', zone=zone, - source=self.source, + worker_version=self.worker_version_1, ) self.transcription = self.element.transcriptions.create( text='Some transcribed text', - source=self.source, + worker_version=self.worker_version_1, ) self.metadata = self.element.metadatas.create( name='test 1', @@ -129,7 +113,7 @@ class TestEntitiesAPI(FixtureAPITestCase): name='001', zone=zone, ) - elt_tr = elt.transcriptions.create(source_id=self.source.id, text='goodbye') + elt_tr = elt.transcriptions.create(worker_version=self.worker_version_1, text='goodbye') TranscriptionEntity.objects.create(transcription=elt_tr, entity=self.entity, offset=42, length=7) with self.assertNumQueries(8): response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)})) @@ -239,8 +223,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_person(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': 'entity', 'type': EntityType.Person.value, @@ -249,7 +231,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -257,7 +239,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, 'entity') self.assertEqual(entity.raw_dates, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -273,8 +255,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_number(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': '300g', 'type': EntityType.Number.value, @@ -283,7 +263,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -291,7 +271,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '300g') self.assertEqual(entity.raw_dates, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -307,8 +287,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_date(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': '1789', 'type': EntityType.Date.value, @@ -317,7 +295,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -325,7 +303,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '1789') self.assertEqual(entity.raw_dates, entity.name) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -348,7 +326,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'ner': self.entity_source.slug + 'worker_version': str(self.worker_version_1.id) } response = self.client.post(reverse('api:entity-create'), data=data, format='json') self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @@ -363,15 +341,14 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') self.assertEqual(response.status_code, status.HTTP_201_CREATED) entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '1789') - self.assertEqual(entity.source, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -390,7 +367,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Location, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -410,7 +387,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Location, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -425,7 +402,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -489,7 +466,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.private_corpus, name="a private entity", - source_id=self.source.id + worker_version=self.worker_version_1, ) self.tr_entities_sample.update({'entity': ent.id}) response = self.client.post( @@ -563,15 +540,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity_bis.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_2.id), }, 'length': self.transcriptionentity.length, 'offset': self.transcriptionentity.offset @@ -579,10 +548,6 @@ class TestEntitiesAPI(FixtureAPITestCase): ) def test_list_transcription_entities_worker_version(self): - self.entity_bis.source = None - self.entity_bis.worker_version = self.worker_version - self.entity_bis.save() - response = self.client.get(reverse('api:transcription-entities', kwargs={'pk': str(self.transcription.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertListEqual( @@ -595,8 +560,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': False, 'dates': [], - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_2.id), }, 'length': 8, 'offset': 2 @@ -627,10 +591,11 @@ class TestEntitiesAPI(FixtureAPITestCase): md = self.element.metadatas.create(name='some_metadata', type=MetaType.Location, value='something') md.entity = self.entity_bis md.save() - with self.assertNumQueries(9): + with self.assertNumQueries(6): response = self.client.get(reverse('api:element-entities', kwargs={'pk': str(self.element.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() + self.maxDiff = None self.assertDictEqual( data, { @@ -645,15 +610,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': t.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_2.id), }, 'offset': t.offset, 'length': t.length @@ -666,15 +623,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': m.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(m.entity.worker_version_id), }, 'id': str(m.id), 'type': m.type.value, @@ -704,14 +653,10 @@ class TestEntitiesAPI(FixtureAPITestCase): self.assertEqual(response.json(), {'worker_version': ['This worker version does not exist.']}) def test_list_element_entities_worker_version(self): - self.entity.source = None - self.entity.worker_version = self.worker_version - self.entity.save() - with self.assertNumQueries(6): response = self.client.get( reverse('api:element-entities', kwargs={'pk': str(self.element.id)}), - data={'worker_version': str(self.worker_version.id)} + data={'worker_version': str(self.worker_version_1.id)} ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -737,8 +682,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'validated': False, 'dates': [], 'metas': None, - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_1.id), }, } ], @@ -883,15 +827,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_1.id), }, 'child': { 'id': str(self.entity_bis.id), @@ -900,15 +836,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity_bis.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_2.id), }, 'role': { 'id': self.role.id, @@ -934,7 +862,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'name': 'entity', 'type': EntityType.Person.value, 'corpus': str(self.corpus.id), - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') diff --git a/arkindex/documents/tests/test_indexer.py b/arkindex/documents/tests/test_indexer.py index 52a7d9a9a2..3981f62a9a 100644 --- a/arkindex/documents/tests/test_indexer.py +++ b/arkindex/documents/tests/test_indexer.py @@ -3,8 +3,8 @@ from unittest.mock import MagicMock, call, patch from elasticsearch import Elasticsearch from elasticsearch.exceptions import NotFoundError +from arkindex.dataimport.models import WorkerVersion from arkindex.documents.indexer import Indexer -from arkindex.documents.models import DataSource, MLToolType from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType @@ -14,9 +14,13 @@ class TestIndexer(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=True) + worker_version = WorkerVersion.objects.first() for i in range(10): - cls.corpus.entities.create(name='ES Dummy {}'.format(i), type=EntityType.Misc, source=source) + cls.corpus.entities.create( + name=f'ES Dummy {i}', + type=EntityType.Misc, + worker_version=worker_version, + ) @patch('arkindex.documents.indexer.Elasticsearch') def test_drop_index(self, es_mock): diff --git a/arkindex/documents/tests/test_manifest.py b/arkindex/documents/tests/test_manifest.py index 659e5d3517..4eaafa5699 100644 --- a/arkindex/documents/tests/test_manifest.py +++ b/arkindex/documents/tests/test_manifest.py @@ -4,10 +4,10 @@ from django.urls import reverse from rest_framework import status from tripoli import IIIFValidator -from arkindex.documents.models import DataSource, Element +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Element from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType -from arkindex_common.ml_tool import MLToolType class TestFolderManifestSerializer(FixtureAPITestCase): @@ -135,11 +135,11 @@ class TestFolderManifestSerializer(FixtureAPITestCase): def test_with_classification(self): self.assertFalse(self.page.classifications.exists()) - source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) + worker_version = WorkerVersion.objects.first() text_class = self.corpus.ml_classes.create(name='text') cover_class = self.corpus.ml_classes.create(name='cover') - self.page.classifications.create(ml_class=text_class, confidence=0.42, source=source) - self.page.classifications.create(ml_class=cover_class, confidence=0.12, source=source) + self.page.classifications.create(ml_class=text_class, confidence=0.42, worker_version=worker_version) + self.page.classifications.create(ml_class=cover_class, confidence=0.12, worker_version=worker_version) response = self.client.get(reverse('api:folder-manifest', kwargs={'pk': self.vol.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py index 9fee602c1a..df87f60ef1 100644 --- a/arkindex/documents/tests/test_metadata.py +++ b/arkindex/documents/tests/test_metadata.py @@ -4,11 +4,11 @@ from django.test import override_settings from django.urls import reverse from rest_framework import status -from arkindex.documents.models import AllowedMetaData, Corpus, DataSource, MetaData +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import AllowedMetaData, Corpus, MetaData from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestMetaData(FixtureAPITestCase): @@ -31,7 +31,7 @@ class TestMetaData(FixtureAPITestCase): (MetaType.Reference, '_id'), ) ) - cls.source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) + cls.worker_version = WorkerVersion.objects.first() def setUp(self): super().setUp() @@ -480,7 +480,11 @@ class TestMetaData(FixtureAPITestCase): def test_create_metadata_entity(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) response = self.client.post( reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id} @@ -490,7 +494,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas') response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), @@ -502,7 +510,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity_none(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas', entity=entity) response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), @@ -515,7 +527,11 @@ class TestMetaData(FixtureAPITestCase): def test_create_metadata_entity_corpus_check(self): self.client.force_login(self.superuser) - entity = self.private_corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.private_corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) response = self.client.post( reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id} @@ -524,7 +540,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity_corpus_check(self): self.client.force_login(self.superuser) - entity = self.private_corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.private_corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas') response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), diff --git a/arkindex/documents/tests/test_ml_results.py b/arkindex/documents/tests/test_ml_results.py deleted file mode 100644 index 83e932fc02..0000000000 --- a/arkindex/documents/tests/test_ml_results.py +++ /dev/null @@ -1,225 +0,0 @@ -from unittest.mock import call, patch - -from django.test import override_settings -from django.urls import reverse -from rest_framework import status - -from arkindex.documents.models import DataSource, Entity, Transcription -from arkindex.project.tests import FixtureTestCase -from arkindex_common.enums import EntityType, MetaType - - -class TestMLResults(FixtureTestCase): - """ - Tests for ML results-related APIs - """ - - @classmethod - def setUpTestData(cls): - super().setUpTestData() - cls.recognizer = DataSource.objects.get(name='Test Recognizer') - cls.folder = cls.corpus.elements.get(name='Volume 1', type__folder=True) - cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') - - entity = cls.corpus.entities.create( - name='An entity', - type=EntityType.Misc, - source=cls.recognizer, - ) - entity2 = cls.corpus.entities.create( - name='Another entity', - type=EntityType.Misc, - source=cls.recognizer, - ) - - ml_class = cls.corpus.ml_classes.create(name='Some class') - cls.folder.classifications.create(ml_class=ml_class, source=cls.recognizer, confidence=0.42) - cls.page.classifications.create(ml_class=ml_class, source=cls.recognizer, confidence=0.85) - cls.page.metadatas.create(name='key', value='value', type=MetaType.Text, entity=entity) - cls.page.transcriptions.first().transcription_entities.create( - entity=entity2, offset=0, length=1, - ) - - def test_element_stats_requires_login(self): - response = self.client.get(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_element_stats_requires_admin(self): - self.client.force_login(self.user) - response = self.client.get(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_element_stats(self): - self.client.force_login(self.superuser) - self.assertEqual(self.page.transcriptions.count(), 1) - self.assertEqual(self.page.classifications.count(), 1) - self.assertEqual(Entity.objects.filter(transcriptions__element=self.page).count(), 1) - self.assertEqual(Entity.objects.filter(metadatas__element=self.page).count(), 1) - - with self.assertNumQueries(7): - response = self.client.get(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertListEqual(response.json(), [ - { - 'id': str(self.recognizer.id), - 'type': 'recognizer', - 'slug': 'test', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - 'classifications_count': 1, - 'transcriptions_count': 1, - 'entities_count': 2, - } - ]) - - def test_element_stats_folder(self): - self.client.force_login(self.superuser) - with self.assertNumQueries(7): - response = self.client.get(reverse('api:element-ml-stats', kwargs={'pk': str(self.folder.id)})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertListEqual(response.json(), [ - { - 'id': str(self.recognizer.id), - 'type': 'recognizer', - 'slug': 'test', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - 'classifications_count': 2, - 'transcriptions_count': 10, - 'entities_count': 2, - } - ]) - - def test_corpus_stats_requires_login(self): - response = self.client.get(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_corpus_stats_requires_admin(self): - self.client.force_login(self.user) - response = self.client.get(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_corpus_stats(self): - self.client.force_login(self.superuser) - self.assertEqual(self.folder.classifications.count(), 1) - self.assertEqual(self.page.classifications.count(), 1) - self.assertEqual(Entity.objects.filter(transcriptions__element=self.page).count(), 1) - self.assertEqual(Entity.objects.filter(metadatas__element=self.page).count(), 1) - self.assertEqual(Transcription.objects.filter(element__corpus=self.corpus).count(), 10) - - with self.assertNumQueries(7): - response = self.client.get(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertListEqual(response.json(), [ - { - 'id': str(self.recognizer.id), - 'type': 'recognizer', - 'slug': 'test', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - 'classifications_count': 2, - 'transcriptions_count': 10, - 'entities_count': 2, - } - ]) - - def test_corpus_destroy_results_requires_login(self): - response = self.client.delete(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_corpus_destroy_results_requires_admin(self): - self.client.force_login(self.user) - response = self.client.delete(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - @patch('arkindex.project.triggers.tasks.ml_results_delete.delay') - @patch('arkindex.project.triggers.tasks.reindex_start.delay') - def test_corpus_destroy_results(self, reindex_delay_mock, delete_delay_mock): - self.client.force_login(self.superuser) - delete_delay_mock.return_value = 'a' - response = self.client.delete(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - self.assertEqual(delete_delay_mock.call_count, 1) - self.assertEqual(delete_delay_mock.call_args, call( - corpus_id=str(self.corpus.id), - element_id=None, - batch_size=1000, - user_id=self.superuser.id, - description=f'ML results deletion on corpus {self.corpus.id}', - )) - self.assertEqual(reindex_delay_mock.call_count, 1) - self.assertEqual(reindex_delay_mock.call_args, call( - corpus_id=str(self.corpus.id), - element_id=None, - depends_on='a', - )) - - @override_settings(ARKINDEX_FEATURES={'search': False}) - @patch('arkindex.project.triggers.tasks.ml_results_delete.delay') - @patch('arkindex.project.triggers.tasks.reindex_start.delay') - def test_corpus_destroy_results_no_search(self, reindex_delay_mock, delete_delay_mock): - self.client.force_login(self.superuser) - delete_delay_mock.return_value = 'a' - response = self.client.delete(reverse('api:corpus-ml-stats', kwargs={'pk': str(self.corpus.id)})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - self.assertEqual(delete_delay_mock.call_count, 1) - self.assertEqual(delete_delay_mock.call_args, call( - corpus_id=str(self.corpus.id), - element_id=None, - batch_size=1000, - user_id=self.superuser.id, - description=f'ML results deletion on corpus {self.corpus.id}', - )) - self.assertFalse(reindex_delay_mock.called) - - def test_element_destroy_results_requires_login(self): - response = self.client.delete(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - def test_element_destroy_results_requires_admin(self): - self.client.force_login(self.user) - response = self.client.delete(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - @patch('arkindex.project.triggers.tasks.ml_results_delete.delay') - @patch('arkindex.project.triggers.tasks.reindex_start.delay') - def test_element_destroy_results(self, reindex_delay_mock, delete_delay_mock): - self.client.force_login(self.superuser) - delete_delay_mock.return_value = 'a' - response = self.client.delete(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - self.assertEqual(delete_delay_mock.call_count, 1) - self.assertEqual(delete_delay_mock.call_args, call( - corpus_id=None, - element_id=str(self.page.id), - batch_size=1000, - user_id=self.superuser.id, - description=f'ML results deletion on element {self.page.id}', - )) - self.assertEqual(reindex_delay_mock.call_count, 1) - self.assertEqual(reindex_delay_mock.call_args, call( - corpus_id=None, - element_id=str(self.page.id), - depends_on='a', - )) - - @override_settings(ARKINDEX_FEATURES={'search': False}) - @patch('arkindex.project.triggers.tasks.ml_results_delete.delay') - @patch('arkindex.project.triggers.tasks.reindex_start.delay') - def test_element_destroy_results_no_search(self, reindex_delay_mock, delete_delay_mock): - self.client.force_login(self.superuser) - delete_delay_mock.return_value = 'a' - response = self.client.delete(reverse('api:element-ml-stats', kwargs={'pk': str(self.page.id)})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - self.assertEqual(delete_delay_mock.call_count, 1) - self.assertEqual(delete_delay_mock.call_args, call( - corpus_id=None, - element_id=str(self.page.id), - batch_size=1000, - user_id=self.superuser.id, - description=f'ML results deletion on element {self.page.id}', - )) - self.assertFalse(reindex_delay_mock.called) diff --git a/arkindex/documents/tests/test_moderation.py b/arkindex/documents/tests/test_moderation.py index c8819aeaf4..bc0a9b222e 100644 --- a/arkindex/documents/tests/test_moderation.py +++ b/arkindex/documents/tests/test_moderation.py @@ -3,15 +3,7 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import ( - Classification, - ClassificationState, - Corpus, - DataSource, - Element, - MLClass, - MLToolType, -) +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User @@ -26,31 +18,9 @@ class TestClasses(FixtureAPITestCase): cls.act_type = cls.corpus.types.get(slug='act') cls.element = Element.objects.get(name='Volume 1, page 1v') cls.folder = cls.corpus.elements.get(name='Volume 1') - cls.worker_version = WorkerVersion.objects.get(worker__slug='dla') + cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='dla') + cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='reco') cls.internal_user = User.objects.get_by_natural_key('internal@internal.fr') - cls.classifier_source = DataSource.objects.create( - type=MLToolType.Classifier, - slug='some_classifier', - revision='1.3.3.7', - internal=False, - ) - - def _create_classification_from_source(self): - return self.element.classifications.create( - source=self.classifier_source, - ml_class=self.text, - confidence=.5, - ) - - def _serialized_source(self, classification): - return { - 'id': str(classification.source.id), - 'type': classification.source.type.value, - 'slug': classification.source.slug, - 'name': classification.source.name, - 'revision': classification.source.revision, - 'internal': classification.source.internal - } def test_manual_classification_creation(self): """ @@ -192,7 +162,7 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) }) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -210,12 +180,12 @@ class TestClasses(FixtureAPITestCase): 'ml_class': str(self.text.id), 'confidence': 0.42, 'high_confidence': False, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0.42) @@ -227,15 +197,14 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'confidence': 0.42, 'high_confidence': False, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.source, None) - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0.42) @@ -250,14 +219,14 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'confidence': 0, 'high_confidence': False, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0) @@ -265,7 +234,7 @@ class TestClasses(FixtureAPITestCase): def test_classification_validate(self): classification = self.element.classifications.create( - worker_version=self.worker_version, + worker_version=self.worker_version_1, ml_class=self.text, confidence=.1 ) @@ -277,8 +246,7 @@ class TestClasses(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': None, - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -293,52 +261,29 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(classification.moderator, self.user) def test_classification_validate_without_permissions(self): - classification = self._create_classification_from_source() + classification = self.element.classifications.create( + ml_class=self.text, + confidence=.5, + ) with self.assertNumQueries(0): response = self.client.put(reverse('api:classification-validate', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) - def test_source_classification_reject(self): - self.client.force_login(self.user) - classification = self._create_classification_from_source() - - with self.assertNumQueries(6): - response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertDictEqual(response.json(), { - 'id': str(classification.id), - 'source': self._serialized_source(classification), - 'worker_version': None, - 'ml_class': { - 'id': str(classification.ml_class.id), - 'name': classification.ml_class.name - }, - 'state': ClassificationState.Rejected.value, - 'confidence': classification.confidence, - 'high_confidence': False - }) - - # Ensure moderator has been set - classification.refresh_from_db() - self.assertEqual(classification.moderator, self.user) - def test_worker_classification_reject(self): self.client.force_login(self.user) classification = self.element.classifications.create( - worker_version=self.worker_version, + worker_version=self.worker_version_1, ml_class=self.text, confidence=.1, ) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': None, - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -352,24 +297,6 @@ class TestClasses(FixtureAPITestCase): classification.refresh_from_db() self.assertEqual(classification.moderator, self.user) - def test_classification_reject_manual_source_delete(self): - """ - A rejected classifications from a manual source should be automatically deleted - """ - self.client.force_login(self.user) - classification = self.element.classifications.create( - source=DataSource.objects.create(slug='manual', type=MLToolType.Classifier, internal=False), - ml_class=self.text, - confidence=.42, - ) - - with self.assertNumQueries(5): - response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - with self.assertRaises(Classification.DoesNotExist): - classification.refresh_from_db() - def test_manual_classification_reject_delete(self): """ A classifications with no worker version should be deleted when rejected @@ -385,26 +312,28 @@ class TestClasses(FixtureAPITestCase): classification.refresh_from_db() def test_classification_reject_without_permissions(self): - classification = self._create_classification_from_source() + classification = self.element.classifications.create(ml_class=self.text, confidence=.42) with self.assertNumQueries(0): response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) def test_classification_can_still_be_moderated(self): self.client.force_login(self.user) - classification = self._create_classification_from_source() - classification.moderator = self.user - classification.state = ClassificationState.Validated.value - classification.save() + classification = self.element.classifications.create( + ml_class=self.text, + confidence=.5, + moderator=self.user, + state=ClassificationState.Validated, + worker_version=self.worker_version_2, + ) # First try to reject - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': self._serialized_source(classification), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -412,17 +341,16 @@ class TestClasses(FixtureAPITestCase): 'state': ClassificationState.Rejected.value, 'confidence': classification.confidence, 'high_confidence': False, - 'worker_version': None + 'worker_version': str(self.worker_version_2.id), }) # Then try to validate - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.put(reverse('api:classification-validate', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': self._serialized_source(classification), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -430,7 +358,7 @@ class TestClasses(FixtureAPITestCase): 'state': ClassificationState.Validated.value, 'confidence': classification.confidence, 'high_confidence': False, - 'worker_version': None + 'worker_version': str(self.worker_version_2.id) }) def test_classification_selection_requires_login(self): @@ -535,7 +463,7 @@ class TestClasses(FixtureAPITestCase): [str(self.element.id), str(self.folder.id), str(act_x.id), str(act_y.id)], ) - with self.assertNumQueries(14): + with self.assertNumQueries(10): response = self.client.post( reverse('api:classification-selection'), data={'corpus_id': self.corpus.id, 'ml_class': self.text.id, 'mode': 'create'} @@ -546,26 +474,17 @@ class TestClasses(FixtureAPITestCase): classification = self.folder.classifications.get() self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Validated) - self.assertEqual(classification.source.slug, 'manual') + self.assertIsNone(classification.worker_version) self.assertEqual(classification.confidence, 1) self.assertTrue(self.element.classifications.exists()) classification = self.element.classifications.get() self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Validated) - self.assertEqual(classification.source.slug, 'manual') + self.assertIsNone(classification.worker_version) self.assertEqual(classification.confidence, 1) def test_classifications_selection_validate(self): - source_1 = DataSource.objects.get(slug='some_classifier') - source_2 = DataSource.objects.create( - type=MLToolType.NER, - slug='test', - name='Test NER', - revision='4.2', - internal=False, - ) - line = MLClass.objects.create(name='line', corpus=self.private_corpus) act_x = Element.objects.create( type=self.act_type, @@ -574,7 +493,7 @@ class TestClasses(FixtureAPITestCase): ) Classification.objects.create( element=act_x, - source=source_1, + worker_version=self.worker_version_1, state=ClassificationState.Pending, high_confidence=True, ml_class=line @@ -583,14 +502,14 @@ class TestClasses(FixtureAPITestCase): for e in [self.folder, self.element]: Classification.objects.create( element=e, - source=source_1, + worker_version=self.worker_version_1, state=ClassificationState.Pending, high_confidence=True, ml_class=self.text ) Classification.objects.create( element=e, - source=source_2, + worker_version=self.worker_version_2, state=ClassificationState.Pending, high_confidence=False, ml_class=self.text @@ -613,11 +532,11 @@ class TestClasses(FixtureAPITestCase): for e in [self.folder, self.element]: classification = e.classifications.get(state=ClassificationState.Validated) self.assertTrue(classification.high_confidence) - self.assertEqual(classification.source, source_1) + self.assertEqual(classification.worker_version, self.worker_version_1) classification = e.classifications.get(state=ClassificationState.Pending) self.assertFalse(classification.high_confidence) - self.assertEqual(classification.source, source_2) + self.assertEqual(classification.worker_version, self.worker_version_2) classification = act_x.classifications.get() self.assertEqual(classification.state, ClassificationState.Pending) diff --git a/arkindex/documents/tests/test_parents_elements.py b/arkindex/documents/tests/test_parents_elements.py index e0a5a797c6..3d79100b54 100644 --- a/arkindex/documents/tests/test_parents_elements.py +++ b/arkindex/documents/tests/test_parents_elements.py @@ -4,9 +4,8 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource, Element +from arkindex.documents.models import Corpus, Element from arkindex.project.tests import FixtureAPITestCase -from arkindex_common.ml_tool import MLToolType class TestParentsElements(FixtureAPITestCase): @@ -17,7 +16,6 @@ class TestParentsElements(FixtureAPITestCase): cls.vol = cls.corpus.elements.get(name='Volume 1') cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.private_elt = cls.private_corpus.elements.create(type=cls.private_corpus.types.create(slug='type')) - cls.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=True) cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') def setUp(self): diff --git a/arkindex/documents/tests/test_patch_elements.py b/arkindex/documents/tests/test_patch_elements.py index 2ea0260f6a..132918b892 100644 --- a/arkindex/documents/tests/test_patch_elements.py +++ b/arkindex/documents/tests/test_patch_elements.py @@ -1,12 +1,11 @@ from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Corpus, DataSource, Element +from arkindex.documents.models import Corpus, Element from arkindex.images.models import ImageServer from arkindex.project.aws import S3FileStatus from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestPatchElements(FixtureAPITestCase): @@ -28,7 +27,6 @@ class TestPatchElements(FixtureAPITestCase): ) cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.private_elt = cls.private_corpus.elements.create(type=cls.private_corpus.types.create(slug='type')) - cls.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=True) def test_patch_element_unverified(self): """ diff --git a/arkindex/documents/tests/test_retrieve_elements.py b/arkindex/documents/tests/test_retrieve_elements.py index 2729710372..d80d626d51 100644 --- a/arkindex/documents/tests/test_retrieve_elements.py +++ b/arkindex/documents/tests/test_retrieve_elements.py @@ -1,10 +1,10 @@ from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Classification, Corpus, DataSource, Entity, MLClass +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Entity, MLClass from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestRetrieveElements(FixtureAPITestCase): @@ -14,6 +14,7 @@ class TestRetrieveElements(FixtureAPITestCase): super().setUpTestData() cls.vol = cls.corpus.elements.get(name='Volume 1') cls.private_corpus = Corpus.objects.create(name='private', public=False) + cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') def setUp(self): self.page = self.corpus.elements.get(name='Volume 1, page 1r') @@ -25,9 +26,8 @@ class TestRetrieveElements(FixtureAPITestCase): ) def test_get_element(self): - data_source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) ml_class = MLClass.objects.create(name='text', corpus=self.corpus) - classification = Classification.objects.create(element=self.vol, source=data_source, ml_class=ml_class) + classification = self.vol.classifications.create(worker_version=self.worker_version, ml_class=ml_class) response = self.client.get(reverse('api:element-retrieve', kwargs={'pk': str(self.vol.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -42,7 +42,6 @@ class TestRetrieveElements(FixtureAPITestCase): }, 'thumbnail_url': self.vol.thumbnail.s3_url, 'thumbnail_put_url': None, - 'source': None, 'worker_version': None, 'zone': None, 'metadata': 0, @@ -52,15 +51,7 @@ class TestRetrieveElements(FixtureAPITestCase): 'confidence': None, 'high_confidence': False, 'state': 'pending', - 'worker_version': None, - 'source': { - 'id': str(data_source.id), - 'slug': 'test', - 'type': 'recognizer', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - }, + 'worker_version': str(self.worker_version.id), 'ml_class': { 'id': str(ml_class.id), 'name': 'text', @@ -157,7 +148,7 @@ class TestRetrieveElements(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name='Marc', - source_id=DataSource.objects.get(slug='test', type=MLToolType.Recognizer).id + worker_version=self.worker_version, ) self.metadata.entity = entity self.metadata.save() diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py index dd001a2ef4..46d0702c9a 100644 --- a/arkindex/documents/tests/test_search.py +++ b/arkindex/documents/tests/test_search.py @@ -6,7 +6,8 @@ from django.urls import reverse from elasticsearch_dsl.connections import connections from rest_framework import status -from arkindex.documents.models import Corpus, DataSource, Element, MLToolType, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Element, Transcription from arkindex.project.elastic import ESTranscription from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import EntityType, MetaType @@ -237,9 +238,9 @@ class TestSearchAPI(FixtureAPITestCase): self.assertListEqual(conditions, [{'match': {'text': 'paris'}}]) def test_entity_search(self): - source = DataSource.objects.create(type=MLToolType.NER, slug='entity source', internal=True) - entity_1 = self.corpus.entities.create(type=EntityType.Person, name="an entity", source=source) - entity_2 = self.corpus.entities.create(type=EntityType.Location, name="somewhere", source=source) + worker_version = WorkerVersion.objects.first() + entity_1 = self.corpus.entities.create(type=EntityType.Person, name="an entity", worker_version=worker_version) + entity_2 = self.corpus.entities.create(type=EntityType.Location, name="somewhere", worker_version=worker_version) self.es_mock.count.return_value = {'count': 2} self.es_mock.search.return_value = self.build_es_response([ # Test the ES ordering is preserved by returning entities in non-alphabetical order diff --git a/arkindex/documents/tests/test_transcriptions.py b/arkindex/documents/tests/test_transcriptions.py index 7ff9d3a1e0..619d9ae08c 100644 --- a/arkindex/documents/tests/test_transcriptions.py +++ b/arkindex/documents/tests/test_transcriptions.py @@ -2,10 +2,9 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource +from arkindex.documents.models import Corpus from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestTranscriptions(FixtureAPITestCase): @@ -21,12 +20,12 @@ class TestTranscriptions(FixtureAPITestCase): cls.line = cls.corpus.elements.get(name='Text line') cls.private_corpus = Corpus.objects.create(name='Private') cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) - cls.src = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) # Create an user with a read right only on the private corpus cls.private_read_user = User.objects.create_user('a@bc.de', 'a') cls.private_read_user.verified_email = True cls.private_read_user.save() - cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='dla') def test_list_transcriptions_read_right(self): # A read right on the element corpus is required to access transcriptions @@ -39,7 +38,7 @@ class TestTranscriptions(FixtureAPITestCase): tr1 = self.page.transcriptions.get() tr2 = self.page.transcriptions.create( text='something', - worker_version=self.worker_version, + worker_version=self.worker_version_2, score=0.369, ) self.client.force_login(self.user) @@ -53,23 +52,14 @@ class TestTranscriptions(FixtureAPITestCase): 'id': str(tr1.id), 'text': 'Lorem ipsum dolor sit amet', 'score': 1.0, - 'source': { - 'id': str(self.src.id), - 'type': 'recognizer', - 'slug': 'test', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_1.id), 'element': None, }, { 'id': str(tr2.id), 'text': 'something', 'score': 0.369, - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_2.id), 'element': None, } ]) @@ -99,7 +89,7 @@ class TestTranscriptions(FixtureAPITestCase): def test_list_worker_version_transcriptions(self): worker_transcription = self.page.transcriptions.create( text='something', - worker_version=self.worker_version, + worker_version=self.worker_version_2, score=0.369, ) @@ -108,7 +98,7 @@ class TestTranscriptions(FixtureAPITestCase): with self.assertNumQueries(12): response = self.client.get( reverse('api:element-transcriptions', kwargs={'pk': str(self.page.id)}), - data={'recursive': 'true', 'worker_version': str(self.worker_version.id)} + data={'recursive': 'true', 'worker_version': str(self.worker_version_2.id)} ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -117,8 +107,7 @@ class TestTranscriptions(FixtureAPITestCase): 'id': str(worker_transcription.id), 'text': 'something', 'score': 0.369, - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_2.id), 'element': { 'id': str(self.page.id), 'name': 'Volume 1, page 1r', diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 386d8cf4a9..e7e87313d9 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -71,8 +71,6 @@ from arkindex.documents.api.ml import ( ClassificationReject, ClassificationValidate, CorpusMLClassList, - CorpusMLStats, - ElementMLStats, ElementTranscriptionsBulk, ManageClassificationsSelection, MLClassList, @@ -131,7 +129,6 @@ api = [ ElementTranscriptionsBulk.as_view(), name='element-transcriptions-bulk' ), - path('element/<uuid:pk>/ml-stats/', ElementMLStats.as_view(), name='element-ml-stats'), path('element/<uuid:child>/parent/<uuid:parent>/', ElementParent.as_view(), name='element-parent'), # Corpora @@ -141,7 +138,6 @@ api = [ path('corpus/<uuid:corpus>/elements/', CorpusElements.as_view(), name='corpus-elements'), path('corpus/<uuid:pk>/classes/', CorpusMLClassList.as_view(), name='corpus-classes'), path('corpus/<uuid:pk>/roles/', CorpusRoles.as_view(), name='corpus-roles'), - path('corpus/<uuid:pk>/ml-stats/', CorpusMLStats.as_view(), name='corpus-ml-stats'), path('corpus/<uuid:pk>/allowed-metadata/', CorpusAllowedMetaData.as_view(), name='corpus-allowed-metadata'), path('corpus/<uuid:pk>/versions/', CorpusWorkerVersionList.as_view(), name='corpus-versions'), path('corpus/<uuid:pk>/selection/', CorpusDeleteSelection.as_view(), name='corpus-delete-selection'), diff --git a/arkindex/project/openapi/patch.yml b/arkindex/project/openapi/patch.yml index 7ad3e945e0..b4974619f9 100644 --- a/arkindex/project/openapi/patch.yml +++ b/arkindex/project/openapi/patch.yml @@ -100,11 +100,6 @@ paths: id: 55cd009d-cd4b-4ec2-a475-b060f98f9138 corpus: - Role already exists in this corpus - /api/v1/corpus/{id}/ml-stats/: - delete: - # Will need https://gitlab.com/arkindex/backend/-/issues/86 to be removed - operationId: DestroyCorpusMLResults - description: Delete machine learning results on all elements of a corpus. /api/v1/element/{id}/: get: description: Retrieve a single element's informations and metadata @@ -115,11 +110,6 @@ paths: description: Rename an element delete: description: Delete a childless element - /api/v1/element/{id}/ml-stats/: - delete: - # Will need https://gitlab.com/arkindex/backend/-/issues/86 to be removed - operationId: DestroyElementMLResults - description: Delete machine learning results on an element and its direct children. /api/v1/element/{id}/metadata/: get: operationId: ListElementMetaData diff --git a/arkindex/project/tests/test_elastic.py b/arkindex/project/tests/test_elastic.py index 05565df4ec..3a9da527cd 100644 --- a/arkindex/project/tests/test_elastic.py +++ b/arkindex/project/tests/test_elastic.py @@ -1,11 +1,10 @@ from unittest.mock import patch +from arkindex.dataimport.models import WorkerVersion from arkindex.documents.dates import DateType, InterpretedDate -from arkindex.documents.models import DataSource from arkindex.project.elastic import ESElement from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType -from arkindex_common.ml_tool import MLToolType class TestESDocuments(FixtureAPITestCase): @@ -61,7 +60,7 @@ class TestESDocuments(FixtureAPITestCase): surface.add_parent(page) surface.transcriptions.create( text='invisible transcription', - source=DataSource.objects.get(slug='test', type=MLToolType.Recognizer), + worker_version=WorkerVersion.objects.get(worker__slug='reco'), ) texts = [tr['text'] for tr in ESElement.from_model(page).to_dict()['transcriptions']] self.assertNotIn('invisible transcription', texts) diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index 736a7f44b1..a97585f846 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -76,46 +76,6 @@ def reindex_start(*, ) -def ml_results_delete(*, - element: Union[Element, UUID, str] = None, - corpus: Union[Corpus, UUID, str] = None, - batch_size: int = 1000, - user_id: Optional[int] = None) -> None: - """ - Delete all ML results from all sources on a corpus - or an element and its *direct* (non-recursive) children. - """ - element_id = None - corpus_id = None - if isinstance(element, Element): - element_id = str(element.id) - elif element: - element_id = str(element) - - if isinstance(corpus, Corpus): - corpus_id = str(corpus.id) - elif corpus: - corpus_id = str(corpus) - - assert element_id or corpus_id, 'Missing element or corpus ID' - - if element_id: - description = f'ML results deletion on element {element_id}' - else: - description = f'ML results deletion on corpus {corpus_id}' - - job = tasks.ml_results_delete.delay( - corpus_id=corpus_id, - element_id=element_id, - batch_size=batch_size, - description=description, - user_id=user_id, - ) - if settings.ARKINDEX_FEATURES['search']: - # Trigger a reindex afterwards to cleanup the deleted results - tasks.reindex_start.delay(corpus_id=corpus_id, element_id=element_id, depends_on=job) - - def corpus_delete(corpus: Union[Corpus, UUID, str], user_id: Optional[int] = None) -> None: """ Delete a whole corpus without killing a server by removing all related diff --git a/arkindex/sql_validation/element_trash_children.sql b/arkindex/sql_validation/element_trash_children.sql index e80795b9f4..e03f175df9 100644 --- a/arkindex/sql_validation/element_trash_children.sql +++ b/arkindex/sql_validation/element_trash_children.sql @@ -112,7 +112,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -224,7 +223,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -304,7 +302,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" WHERE "documents_element"."id" = '{id}'::uuid; diff --git a/arkindex/sql_validation/element_trash_deep.sql b/arkindex/sql_validation/element_trash_deep.sql index fdc4ce048f..dd5e994626 100644 --- a/arkindex/sql_validation/element_trash_deep.sql +++ b/arkindex/sql_validation/element_trash_deep.sql @@ -111,7 +111,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -223,7 +222,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -335,7 +333,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -447,7 +444,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -527,7 +523,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" WHERE "documents_element"."id" = '{id}'::uuid; diff --git a/arkindex/sql_validation/element_trash_ml_class.sql b/arkindex/sql_validation/element_trash_ml_class.sql index cfbbec2ecf..88be7fe325 100644 --- a/arkindex/sql_validation/element_trash_ml_class.sql +++ b/arkindex/sql_validation/element_trash_ml_class.sql @@ -120,7 +120,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") @@ -209,8 +208,7 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" INNER JOIN "documents_classification" ON ("documents_element"."id" = "documents_classification"."element_id") -WHERE "documents_classification"."id" IS NOT NULL \ No newline at end of file +WHERE "documents_classification"."id" IS NOT NULL diff --git a/arkindex/sql_validation/element_trash_no_children.sql b/arkindex/sql_validation/element_trash_no_children.sql index 41b889a7bb..66f880696c 100644 --- a/arkindex/sql_validation/element_trash_no_children.sql +++ b/arkindex/sql_validation/element_trash_no_children.sql @@ -69,7 +69,6 @@ SELECT "documents_element"."id", "documents_element"."type_id", "documents_element"."name", "documents_element"."zone_id", - "documents_element"."source_id", "documents_element"."worker_version_id" FROM "documents_element" WHERE "documents_element"."id" = '{id}'::uuid; -- GitLab