diff --git a/VERSION b/VERSION index ec70f75560773b1b4dc68dfd565d8308fb17bedc..bd8bf882d06184bb54615a59477e3c5e35f522fc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.6.6 +1.7.0 diff --git a/arkindex/documents/api/entities.py b/arkindex/documents/api/entities.py index 68234eeb51ea61351873ef1ed9d3a2d03974cdd4..7c60ae8f439c61c71247fd45fe1dd0fa6a0276c8 100644 --- a/arkindex/documents/api/entities.py +++ b/arkindex/documents/api/entities.py @@ -3,8 +3,10 @@ from textwrap import dedent from uuid import UUID from django.core.exceptions import ValidationError as DjangoValidationError +from django.db.utils import OperationalError from django.shortcuts import get_object_or_404 from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view +from psycopg2.errors import ProgramLimitExceeded from rest_framework import permissions, serializers, status from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.generics import CreateAPIView, ListAPIView, RetrieveUpdateDestroyAPIView @@ -199,6 +201,41 @@ class EntityCreate(CreateAPIView): permission_classes = (IsVerified, ) serializer_class = EntityCreateSerializer + def handle_creation( + self, + *, + use_existing, + name, + type, + corpus, + metas, + worker_version_id, + worker_run, + ): + status_code = status.HTTP_201_CREATED + if use_existing: + entity, created = Entity.objects.get_or_create(name=name, corpus=corpus, type=type, defaults={ + "metas": metas, + "worker_version_id": worker_version_id, + "worker_run": worker_run + }) + # When using the "use_existing" option, we return a 200_OK instead of a 201_CREATED status code + if not created: + status_code = status.HTTP_200_OK + else: + entity = Entity.objects.create( + name=name, + type=type, + corpus=corpus, + metas=metas, + worker_version_id=worker_version_id, + worker_run=worker_run + ) + + entity_serializer = EntitySerializer(entity) + headers = self.get_success_headers(entity_serializer.data) + return Response(entity_serializer.data, status=status_code, headers=headers) + def create(self, request, *args, **kwargs): # Overriding create in order to return EntitySerializer, not EntityCreateSerializer serializer = self.get_serializer(data=request.data) @@ -211,30 +248,21 @@ class EntityCreate(CreateAPIView): worker_run = serializer.validated_data["worker_run"] worker_version_id = worker_run.version_id if worker_run else None - # When using the "use_existing" option, we return a 200_OK instead of a 201_CREATED status code - if request.data.get("use_existing"): - entity, created = Entity.objects.get_or_create(name=name, corpus=corpus, type=type, defaults={ - "metas": metas, - "worker_version_id": worker_version_id, - "worker_run": worker_run - }) - entity = EntitySerializer(entity) - if created: - status_code = status.HTTP_201_CREATED - else: - status_code = status.HTTP_200_OK - else: - entity = EntitySerializer(Entity.objects.create( + try: + return self.handle_creation( + use_existing=request.data.get("use_existing"), name=name, type=type, corpus=corpus, metas=metas, worker_version_id=worker_version_id, - worker_run=worker_run - )) - status_code = status.HTTP_201_CREATED - headers = self.get_success_headers(serializer.data) - return Response(entity.data, status=status_code, headers=headers) + worker_run=worker_run, + ) + except OperationalError as e: + if isinstance(getattr(e, "__cause__", None), ProgramLimitExceeded): + # As the max length is dynamic and depending on content, we cannot just limit on a specific length + raise ValidationError({"name": ["Value is too long for this field."]}) + raise e @extend_schema_view(post=extend_schema( diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json index b945f7526fd12581d5e75cea1a270a19082a19bb..90686f494024882ac97cbabad74c34fb5025b7d8 100644 --- a/arkindex/documents/fixtures/data.json +++ b/arkindex/documents/fixtures/data.json @@ -1,11 +1,11 @@ [ { "model": "process.worker", - "pk": "008fd560-4b34-45bd-8a4c-2b4be6ff94f0", + "pk": "272af429-810f-4318-b126-06b915720023", "fields": { - "name": "Recognizer", - "slug": "reco", - "type": "8bb21ad8-8e5c-4c4c-8539-78d27bb9a658", + "name": "PDF export worker", + "slug": "pdf_export", + "type": "29d1e754-ae0c-43e1-a48d-ed678dbd27e1", "description": "", "public": false, "archived": null, @@ -14,11 +14,24 @@ }, { "model": "process.worker", - "pk": "4a32f3f4-31bf-4abc-a7de-d8dbc322cc1b", + "pk": "62ff0bf9-18da-44d4-8e87-2ceb477dbc32", + "fields": { + "name": "Elements Initialisation Worker", + "slug": "initialisation", + "type": "d2fe0ad7-9825-465a-8a42-fa40395cdd11", + "description": "", + "public": true, + "archived": null, + "repository_url": null + } +}, +{ + "model": "process.worker", + "pk": "6e08ebb4-7dee-437b-8d26-a91e99ba1c98", "fields": { "name": "Worker requiring a GPU", "slug": "worker-gpu", - "type": "e7d696c4-fd99-4737-a6c7-f36c8fd91052", + "type": "f5906e41-731b-419c-a0a5-bae201ca4ccf", "description": "", "public": false, "archived": null, @@ -27,11 +40,11 @@ }, { "model": "process.worker", - "pk": "4daaeda1-1aed-4147-b5b5-724d7e96047a", + "pk": "7e9cc0b9-4c40-4125-be12-8bfea80a4282", "fields": { "name": "Custom worker", "slug": "custom", - "type": "bb54b04a-41fb-470a-a82e-b9f8ad10f1f2", + "type": "298b2655-611a-493a-a6ea-31ae163549a1", "description": "", "public": false, "archived": null, @@ -40,37 +53,37 @@ }, { "model": "process.worker", - "pk": "56f4410a-3ac1-4940-8b8b-2d46430a06e0", + "pk": "ab57913d-701f-4cc4-9d04-83f04b8c7a8a", "fields": { - "name": "Generic worker with a Model", - "slug": "generic", - "type": "8bb21ad8-8e5c-4c4c-8539-78d27bb9a658", + "name": "File import", + "slug": "file_import", + "type": "151d56fa-329b-44f2-83e1-22ba873bd436", "description": "", - "public": false, + "public": true, "archived": null, "repository_url": null } }, { "model": "process.worker", - "pk": "bf56381e-a669-4e61-8d19-fc0e23171463", + "pk": "c13d16fe-d9cc-4fde-8c0a-48a6388551c2", "fields": { - "name": "Elements Initialisation Worker", - "slug": "initialisation", - "type": "2bc36b5e-3ce8-4a4c-a4b0-49198356b251", + "name": "Recognizer", + "slug": "reco", + "type": "a739a233-a46b-4b0c-8ac3-c221abfaa4c9", "description": "", - "public": true, + "public": false, "archived": null, "repository_url": null } }, { "model": "process.worker", - "pk": "dd99bb98-ca21-4f62-90a0-e15ed088de3b", + "pk": "c2722649-ffb7-4a18-b0f2-a1f6be9064d4", "fields": { "name": "Document layout analyser", "slug": "dla", - "type": "397f85ef-37fe-4f9e-9dbd-b02f4879db68", + "type": "24ca70b2-4856-4162-b130-b48354c25acb", "description": "", "public": false, "archived": null, @@ -79,30 +92,30 @@ }, { "model": "process.worker", - "pk": "f0f5787d-3e41-4d16-aa56-b84f64103c12", + "pk": "cd35eb01-10fd-4f79-9153-c1e76a3012c7", "fields": { - "name": "File import", - "slug": "file_import", - "type": "736e9c86-b137-4933-b4f4-18d9e2f3bb8c", + "name": "Generic worker with a Model", + "slug": "generic", + "type": "a739a233-a46b-4b0c-8ac3-c221abfaa4c9", "description": "", - "public": true, + "public": false, "archived": null, "repository_url": null } }, { "model": "process.workertype", - "pk": "2bc36b5e-3ce8-4a4c-a4b0-49198356b251", + "pk": "151d56fa-329b-44f2-83e1-22ba873bd436", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "init", - "display_name": "Elements Initialisation" + "slug": "import", + "display_name": "Import" } }, { "model": "process.workertype", - "pk": "397f85ef-37fe-4f9e-9dbd-b02f4879db68", + "pk": "24ca70b2-4856-4162-b130-b48354c25acb", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -112,17 +125,27 @@ }, { "model": "process.workertype", - "pk": "736e9c86-b137-4933-b4f4-18d9e2f3bb8c", + "pk": "298b2655-611a-493a-a6ea-31ae163549a1", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "import", - "display_name": "Import" + "slug": "custom", + "display_name": "Custom" + } +}, +{ + "model": "process.workertype", + "pk": "29d1e754-ae0c-43e1-a48d-ed678dbd27e1", + "fields": { + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z", + "slug": "export", + "display_name": "Document export" } }, { "model": "process.workertype", - "pk": "8bb21ad8-8e5c-4c4c-8539-78d27bb9a658", + "pk": "a739a233-a46b-4b0c-8ac3-c221abfaa4c9", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -132,17 +155,17 @@ }, { "model": "process.workertype", - "pk": "bb54b04a-41fb-470a-a82e-b9f8ad10f1f2", + "pk": "d2fe0ad7-9825-465a-8a42-fa40395cdd11", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "custom", - "display_name": "Custom" + "slug": "init", + "display_name": "Elements Initialisation" } }, { "model": "process.workertype", - "pk": "e7d696c4-fd99-4737-a6c7-f36c8fd91052", + "pk": "f5906e41-731b-419c-a0a5-bae201ca4ccf", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -152,53 +175,74 @@ }, { "model": "process.workerversion", - "pk": "3a1d6aec-4b4f-4618-baa4-a0943e515729", + "pk": "54f3c9eb-86af-4308-a3f2-d20e023e2afa", "fields": { - "worker": "bf56381e-a669-4e61-8d19-fc0e23171463", + "worker": "cd35eb01-10fd-4f79-9153-c1e76a3012c7", "version": 1, "configuration": { - "docker": { - "command": "worker-init-elements" - } + "test": 42 }, "state": "available", "gpu_usage": "disabled", + "model_usage": "required", + "docker_image_iid": "registry.somewhere.com/something:latest", + "revision_url": null, + "branch": null, + "tag": null, + "feature": null, + "created": "2020-02-02T01:23:45.678Z", + "updated": "2020-02-02T01:23:45.678Z" + } +}, +{ + "model": "process.workerversion", + "pk": "82b37102-f4c1-44d3-9404-743aaca249fd", + "fields": { + "worker": "7e9cc0b9-4c40-4125-be12-8bfea80a4282", + "version": 1, + "configuration": { + "custom": "value" + }, + "state": "created", + "gpu_usage": "disabled", "model_usage": "disabled", - "docker_image_iid": "registry.gitlab.teklia.com/arkindex/workers/init-elements:latest", + "docker_image_iid": null, "revision_url": null, "branch": null, "tag": null, - "feature": "init_elements", + "feature": null, "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z" } }, { "model": "process.workerversion", - "pk": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", + "pk": "a78fb453-e707-4d7e-896b-d33027898b02", "fields": { - "worker": "008fd560-4b34-45bd-8a4c-2b4be6ff94f0", + "worker": "62ff0bf9-18da-44d4-8e87-2ceb477dbc32", "version": 1, "configuration": { - "test": 42 + "docker": { + "command": "worker-init-elements" + } }, "state": "available", "gpu_usage": "disabled", "model_usage": "disabled", - "docker_image_iid": "registry.somewhere.com/something:latest", + "docker_image_iid": "registry.gitlab.teklia.com/arkindex/workers/init-elements:latest", "revision_url": null, "branch": null, "tag": null, - "feature": null, + "feature": "init_elements", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z" } }, { "model": "process.workerversion", - "pk": "5c2651c8-6179-4f7e-a0f4-be2c66293f0a", + "pk": "ad8121f6-f012-43a7-93a9-d5367dd2e6f0", "fields": { - "worker": "f0f5787d-3e41-4d16-aa56-b84f64103c12", + "worker": "ab57913d-701f-4cc4-9d04-83f04b8c7a8a", "version": 1, "configuration": {}, "state": "available", @@ -215,37 +259,35 @@ }, { "model": "process.workerversion", - "pk": "a86c20e8-9a55-45c1-bfa8-ce53fc01ad00", + "pk": "b315c4b9-3120-48c4-8d90-785355f60ed0", "fields": { - "worker": "dd99bb98-ca21-4f62-90a0-e15ed088de3b", + "worker": "272af429-810f-4318-b126-06b915720023", "version": 1, - "configuration": { - "test": 42 - }, + "configuration": {}, "state": "available", "gpu_usage": "disabled", "model_usage": "disabled", - "docker_image_iid": "registry.somewhere.com/something:latest", + "docker_image_iid": "registry.example.com/pdf-export:latest", "revision_url": null, "branch": null, "tag": null, - "feature": null, + "feature": "pdf_export", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z" } }, { "model": "process.workerversion", - "pk": "c5614a43-4131-4630-a69e-24d6a0b31e35", + "pk": "b7f533de-d429-47f7-96ec-34b8cbe89c16", "fields": { - "worker": "56f4410a-3ac1-4940-8b8b-2d46430a06e0", + "worker": "6e08ebb4-7dee-437b-8d26-a91e99ba1c98", "version": 1, "configuration": { "test": 42 }, "state": "available", - "gpu_usage": "disabled", - "model_usage": "required", + "gpu_usage": "required", + "model_usage": "disabled", "docker_image_iid": "registry.somewhere.com/something:latest", "revision_url": null, "branch": null, @@ -257,15 +299,15 @@ }, { "model": "process.workerversion", - "pk": "e0f56d55-5492-49c0-8988-7aa8f9049d54", + "pk": "c32136a0-257a-46e4-9cd0-2016a6150c85", "fields": { - "worker": "4a32f3f4-31bf-4abc-a7de-d8dbc322cc1b", + "worker": "c2722649-ffb7-4a18-b0f2-a1f6be9064d4", "version": 1, "configuration": { "test": 42 }, "state": "available", - "gpu_usage": "required", + "gpu_usage": "disabled", "model_usage": "disabled", "docker_image_iid": "registry.somewhere.com/something:latest", "revision_url": null, @@ -278,17 +320,17 @@ }, { "model": "process.workerversion", - "pk": "e75d4a04-b0ee-4627-bee6-f6e648a85d4b", + "pk": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", "fields": { - "worker": "4daaeda1-1aed-4147-b5b5-724d7e96047a", + "worker": "c13d16fe-d9cc-4fde-8c0a-48a6388551c2", "version": 1, "configuration": { - "custom": "value" + "test": 42 }, - "state": "created", + "state": "available", "gpu_usage": "disabled", "model_usage": "disabled", - "docker_image_iid": null, + "docker_image_iid": "registry.somewhere.com/something:latest", "revision_url": null, "branch": null, "tag": null, @@ -299,10 +341,10 @@ }, { "model": "process.workerrun", - "pk": "a89bf3d0-f4e2-4cdc-a0f8-beff2775cd1f", + "pk": "f179c779-54e9-4431-8f91-59356af8faa0", "fields": { - "process": "6ceedc11-f36b-418f-a19a-7c6b3dd96a82", - "version": "3a1d6aec-4b4f-4618-baa4-a0943e515729", + "process": "23b5ee11-b4b9-41dc-9b2b-83024404d0f0", + "version": "a78fb453-e707-4d7e-896b-d33027898b02", "model_version": null, "parents": "[]", "configuration": null, @@ -310,47 +352,50 @@ "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false, - "use_gpu": false + "use_gpu": false, + "ttl": 3600 } }, { "model": "process.workerrun", - "pk": "1f4f2186-93c9-4fc9-9e20-f4a790c30fa8", + "pk": "38e26faa-8609-4a29-a4c7-67b7c95d9845", "fields": { - "process": "6ceedc11-f36b-418f-a19a-7c6b3dd96a82", - "version": "a86c20e8-9a55-45c1-bfa8-ce53fc01ad00", + "process": "23b5ee11-b4b9-41dc-9b2b-83024404d0f0", + "version": "c32136a0-257a-46e4-9cd0-2016a6150c85", "model_version": null, - "parents": "[\"a89bf3d0-f4e2-4cdc-a0f8-beff2775cd1f\"]", + "parents": "[\"f179c779-54e9-4431-8f91-59356af8faa0\"]", "configuration": null, "summary": "Worker Document layout analyser @ version 1", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false, - "use_gpu": false + "use_gpu": false, + "ttl": 3600 } }, { "model": "process.workerrun", - "pk": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "pk": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "fields": { - "process": "6ceedc11-f36b-418f-a19a-7c6b3dd96a82", - "version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", + "process": "23b5ee11-b4b9-41dc-9b2b-83024404d0f0", + "version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", "model_version": null, - "parents": "[\"1f4f2186-93c9-4fc9-9e20-f4a790c30fa8\"]", + "parents": "[\"38e26faa-8609-4a29-a4c7-67b7c95d9845\"]", "configuration": null, "summary": "Worker Recognizer @ version 1", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false, - "use_gpu": false + "use_gpu": false, + "ttl": 3600 } }, { "model": "process.workerrun", - "pk": "e1322c97-67ad-4763-ae1a-88b7cdf447b3", + "pk": "313b7366-f528-42d0-822a-a7501da8208f", "fields": { - "process": "9767c1c7-cc6a-4a69-96b1-d8c0e7ee4c33", - "version": "e75d4a04-b0ee-4627-bee6-f6e648a85d4b", + "process": "da20071c-7c8e-4111-a1df-02f71f0531d2", + "version": "82b37102-f4c1-44d3-9404-743aaca249fd", "model_version": null, "parents": "[]", "configuration": null, @@ -358,15 +403,16 @@ "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false, - "use_gpu": false + "use_gpu": false, + "ttl": 0 } }, { "model": "process.workerrun", - "pk": "f3a31547-c102-454d-96f2-3eb5296662f5", + "pk": "52bd0035-6ea3-496b-85e8-37290a8758e1", "fields": { - "process": "49bef54f-c7cf-4c97-bd06-633ecf77efcf", - "version": "e75d4a04-b0ee-4627-bee6-f6e648a85d4b", + "process": "b6d2604e-6cf3-44cd-9b4a-4d7dff2ce040", + "version": "82b37102-f4c1-44d3-9404-743aaca249fd", "model_version": null, "parents": "[]", "configuration": null, @@ -374,12 +420,13 @@ "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false, - "use_gpu": false + "use_gpu": false, + "ttl": 0 } }, { "model": "documents.corpus", - "pk": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", + "pk": "63951d56-ce50-4d2d-9d11-cee0da8dffba", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -387,16 +434,17 @@ "description": "", "top_level_type": null, "public": true, - "indexable": false + "indexable": false, + "maximum_task_ttl": null } }, { "model": "documents.elementtype", - "pk": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", + "pk": "1d078b43-bb0d-44e6-a667-fec714dbf057", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "surface", - "display_name": "Surface", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "word", + "display_name": "Word", "folder": false, "indexable": false, "color": "28b62c" @@ -404,11 +452,11 @@ }, { "model": "documents.elementtype", - "pk": "324f4646-138e-425b-bb6c-76cbdd8c038c", + "pk": "3be8f2aa-3e04-4a25-80af-7f92c39c057f", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "page", - "display_name": "Page", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "text_line", + "display_name": "Line", "folder": false, "indexable": false, "color": "28b62c" @@ -416,11 +464,11 @@ }, { "model": "documents.elementtype", - "pk": "9d74a557-1c41-4fcb-9ac4-706fc8438a01", + "pk": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "text_line", - "display_name": "Line", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "page", + "display_name": "Page", "folder": false, "indexable": false, "color": "28b62c" @@ -428,23 +476,23 @@ }, { "model": "documents.elementtype", - "pk": "b445d965-667a-4690-8732-98659d8645dd", + "pk": "748cabf2-e002-431f-b582-5c39ca57b44e", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "volume", - "display_name": "Volume", - "folder": true, + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "surface", + "display_name": "Surface", + "folder": false, "indexable": false, "color": "28b62c" } }, { "model": "documents.elementtype", - "pk": "b61111fd-61e7-45f1-aa99-20c5f105357a", + "pk": "805d2171-94f5-45e3-ae85-fe511c88c141", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "word", - "display_name": "Word", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "act", + "display_name": "Act", "folder": false, "indexable": false, "color": "28b62c" @@ -452,329 +500,329 @@ }, { "model": "documents.elementtype", - "pk": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", + "pk": "acd7e2f0-e4c6-48e6-9bf5-e9be06e1ae6c", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "slug": "act", - "display_name": "Act", - "folder": false, + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "slug": "volume", + "display_name": "Volume", + "folder": true, "indexable": false, "color": "28b62c" } }, { "model": "documents.elementpath", - "pk": "05184aee-6dd7-4cfc-b4bc-87624c301c3b", + "pk": "06002271-45e8-4cb6-b8f1-b94e6dd7ae8a", "fields": { - "element": "d462dd86-bd44-4e74-95a3-44fefd397df8", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"bcd1fb0f-f910-4509-ba41-bf9fe67e67eb\"]", - "ordering": 1 + "element": "c49d9815-f0c8-471e-8e23-d24976505129", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"2cdad071-77b8-4e74-b05a-d87317cfbcc5\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "063a30c0-e3ed-4044-a914-17500916509b", + "pk": "12ad22b7-e8f5-4f7a-b2fb-648949221b1b", "fields": { - "element": "bc662943-dfa5-4696-8480-fc236ff3651e", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"b3433c27-0c42-4013-a645-67a4388cdd94\"]", - "ordering": 0 + "element": "6497f558-1aa8-4f51-a10e-d359a8d06c0b", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"dbffd4c4-a4f9-415f-b2e0-2fdab56a6358\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "0ac80cc9-e9e2-4cec-bebf-7c5ada86eeab", + "pk": "1d7efeec-d38d-4765-8b35-e0b1e5be1fd7", "fields": { - "element": "aa918643-b18a-4970-a5c4-f18ff3594462", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"98a67dc8-76d6-4a2c-931b-60a11ef601d9\"]", - "ordering": 2 + "element": "7047387b-146e-4735-a3d7-eefa9b4a64a9", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"57227959-07f9-43ea-a7ea-e05c808f13ca\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "17cd8a3f-a530-4fbc-8630-a39228d2bf15", + "pk": "1eed5fee-883a-40a0-8a5c-efe027b2068a", "fields": { - "element": "37ae14da-4051-43f7-ba68-2ed3014f508f", - "path": "[\"6db35fcd-3c50-4a16-b204-568850b6dbb0\"]", - "ordering": 1 + "element": "6508eb1d-b22d-4cf0-a94d-c32c9b614396", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"39d41f9e-c8eb-47c2-89b0-c43272a171ac\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "1eb521f5-5c2b-4579-b3cf-29a52c579d26", + "pk": "1f26d9d7-1b94-4367-a2f2-353b2e1cf85c", "fields": { - "element": "35886170-a511-424b-a1d5-984731ab4aed", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 6 + "element": "2cdad071-77b8-4e74-b05a-d87317cfbcc5", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 7 } }, { "model": "documents.elementpath", - "pk": "1f182eb0-2c92-4ff2-a6d1-052b47ee9fe4", + "pk": "2b4dd691-c560-4467-9910-fd7c1021f5f2", "fields": { - "element": "4e0e24e6-52a3-4623-b8da-c6fb7471c861", - "path": "[\"6db35fcd-3c50-4a16-b204-568850b6dbb0\"]", - "ordering": 2 + "element": "831b2e1f-01eb-4724-978b-608b7d587f7c", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"39d41f9e-c8eb-47c2-89b0-c43272a171ac\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "2116ba43-b157-4357-bbc0-292813e67b0a", + "pk": "437ee573-04e1-4ec2-aa49-c9dc8377174e", "fields": { - "element": "e792fe53-f1ed-459d-8b49-c7dae948c0db", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"bcd1fb0f-f910-4509-ba41-bf9fe67e67eb\"]", - "ordering": 0 + "element": "6cd4aba2-e49f-4153-8112-9eb561ac8d3b", + "path": "[\"4ac58be9-28bc-43da-bb6a-8e3eb09c2e14\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "5886e82a-089c-4e25-8e41-168d7bd5c33b", + "pk": "44ca9c72-57ba-430a-870b-ec01458b1570", "fields": { - "element": "99b4ca3d-834f-4010-b94f-ea02e307ceb4", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"98a67dc8-76d6-4a2c-931b-60a11ef601d9\"]", - "ordering": 3 + "element": "f3837e86-33a2-4d0d-bfe5-b407712f8e9b", + "path": "[]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "5ca9c492-2bf3-4a88-b845-658cec2965d7", + "pk": "579f577e-add9-4657-9219-f3f58e438c6b", "fields": { - "element": "99b51971-fbff-45c6-9e89-8bd0a598e1a4", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"bcd1fb0f-f910-4509-ba41-bf9fe67e67eb\"]", + "element": "9d9b6a50-5f16-480d-989b-0032ac1087e4", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"dbffd4c4-a4f9-415f-b2e0-2fdab56a6358\"]", "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "605fe08f-7145-43ad-8882-1a9773d953c9", + "pk": "587a691e-681b-4700-95ac-d182cc8aad09", "fields": { - "element": "aa9cdf0c-7583-400e-8432-f3e388f5cf0d", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 1 + "element": "39d41f9e-c8eb-47c2-89b0-c43272a171ac", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "69c85528-1bcf-424a-af26-6ab3ca1bfaf0", + "pk": "620b7a22-1921-4b82-b8ab-66833eec1116", "fields": { - "element": "98a67dc8-76d6-4a2c-931b-60a11ef601d9", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 0 + "element": "a463d187-d339-4f02-8307-420c5f19ac37", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"de0f901f-fa4e-4756-8e87-1ca20222bd07\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "737c7f51-ed49-48bb-9b8d-197125b56724", + "pk": "6f302a2f-a625-45ec-ba09-bd628889d22e", "fields": { - "element": "75db941d-00a5-4c9a-aa11-3594181b1cb2", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"98a67dc8-76d6-4a2c-931b-60a11ef601d9\"]", + "element": "4ac58be9-28bc-43da-bb6a-8e3eb09c2e14", + "path": "[]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "76136a2d-5996-41ca-9687-8713e849a261", + "pk": "701fdd59-68d5-4af7-b891-2897a472a265", "fields": { - "element": "95181a58-4d6f-45fc-881a-fc42e8b7c721", - "path": "[\"6db35fcd-3c50-4a16-b204-568850b6dbb0\"]", - "ordering": 0 + "element": "bc176daf-0894-4560-abaa-5b74ee8a3426", + "path": "[\"4ac58be9-28bc-43da-bb6a-8e3eb09c2e14\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "7c87b38d-2b17-481d-bed7-f4f347845444", + "pk": "724be080-f751-4e91-84e7-f43e18f4899b", "fields": { - "element": "b64fbadd-68b6-488c-89be-715ddb28590c", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", + "element": "b5af9a42-dcc1-408c-802d-e123cfc02180", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", "ordering": 4 } }, { "model": "documents.elementpath", - "pk": "8095ebf7-780f-40bc-8bfa-fade20d53e85", + "pk": "7e76ec75-5a02-4930-bad1-2f7901da7423", "fields": { - "element": "6db35fcd-3c50-4a16-b204-568850b6dbb0", - "path": "[]", + "element": "50e1ccf3-dbeb-4c29-9d77-85e05f90c426", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"8de507e6-e85c-41d1-9ef4-361fe218f9bb\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "90875c20-4ac6-42f2-b02e-7b89bfef12f5", + "pk": "7ef587b9-9a90-4d91-80d0-910fd09fb893", "fields": { - "element": "cf7188f3-98f6-49ae-ab3f-d3ab8b53608e", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"b64fbadd-68b6-488c-89be-715ddb28590c\"]", + "element": "60e09255-6a01-47e1-b6ba-f7ca8e657b25", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"de0f901f-fa4e-4756-8e87-1ca20222bd07\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "97bb4ec1-2531-4546-b5d6-fc66ee23fbee", + "pk": "8064af41-fc33-4324-aca8-8e5c881ced15", "fields": { - "element": "1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d", - "path": "[]", - "ordering": 0 + "element": "1c20d68c-ac53-4910-8e30-9e38e2cfa002", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 5 } }, { "model": "documents.elementpath", - "pk": "9e145e20-f2ee-49b6-bb64-1c456cc1045b", + "pk": "8d3652c2-018d-4de7-8a38-302ec99d2029", "fields": { - "element": "2331d5fb-fac5-431f-b313-9f0aa817c95b", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 7 + "element": "1becb3e4-c8c0-47e7-a2b2-402446d7280e", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"39d41f9e-c8eb-47c2-89b0-c43272a171ac\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "a15ff9e8-a500-4cd1-a000-c47797f3ed59", + "pk": "a4ae4629-73b3-42b2-baf9-cfdaa3f61345", "fields": { - "element": "2d08b6c7-2d0e-4d51-b989-a724ce6f40c7", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"bfa8252b-2859-4180-8358-fdcfedf82d7c\"]", + "element": "dbffd4c4-a4f9-415f-b2e0-2fdab56a6358", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "b6a2884c-c6c5-4ec1-b393-8d7d42b7371b", + "pk": "a74529cf-b361-46cf-84fc-9ea8d22b7be6", "fields": { - "element": "f1fbbe29-65c8-4a47-b912-6d4e04b39c01", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"2331d5fb-fac5-431f-b313-9f0aa817c95b\"]", - "ordering": 0 + "element": "8de507e6-e85c-41d1-9ef4-361fe218f9bb", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 3 } }, { "model": "documents.elementpath", - "pk": "bcb1cb9d-9157-4fed-8009-800546932474", + "pk": "a88a155b-f39c-4ee1-a820-4d9c471d8b6c", "fields": { - "element": "bfa8252b-2859-4180-8358-fdcfedf82d7c", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 5 + "element": "57227959-07f9-43ea-a7ea-e05c808f13ca", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 6 } }, { "model": "documents.elementpath", - "pk": "bddb93de-1872-4fe0-b675-874da068ded1", + "pk": "afdc2429-f5c7-4a27-8beb-ab8bc45f47b9", "fields": { - "element": "aeed5364-c2b1-45d9-81e8-f1591a1dc59d", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"b64fbadd-68b6-488c-89be-715ddb28590c\"]", + "element": "e0bdfb09-1e15-4ec2-a2dd-4c05714af2fd", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"b5af9a42-dcc1-408c-802d-e123cfc02180\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "bfa4dd50-deba-4b73-b238-edcc6b7a5250", + "pk": "b185b192-db9c-40bb-9b22-054713fe69a7", "fields": { - "element": "f4427695-7181-43d7-b5f8-803846bd4493", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"98a67dc8-76d6-4a2c-931b-60a11ef601d9\"]", - "ordering": 1 + "element": "459ecf7a-849f-4105-b371-9cad0078031b", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"b5af9a42-dcc1-408c-802d-e123cfc02180\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "c9955f41-6d4b-41a3-8120-c63d3b160666", + "pk": "c57c4dad-b747-445a-8564-6bed39a49253", "fields": { - "element": "04a1537a-35c2-4426-9533-63817890b5f5", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"aa9cdf0c-7583-400e-8432-f3e388f5cf0d\"]", + "element": "7d767a0e-c46d-4f6d-9364-2ee0c1cc5df4", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"1c20d68c-ac53-4910-8e30-9e38e2cfa002\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "d1acaf03-1549-4c19-9942-911242fb3ff3", + "pk": "c9a0d158-ff25-474f-8b56-9991f9cf9de1", "fields": { - "element": "bcd1fb0f-f910-4509-ba41-bf9fe67e67eb", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 2 + "element": "a4258522-c766-4fa9-b6ff-78c78f03573b", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"dbffd4c4-a4f9-415f-b2e0-2fdab56a6358\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "d46f91b2-6251-4a52-807e-9baea5792f05", + "pk": "e2d2d30f-8f1a-4066-8ff9-f2f93cb7bd24", "fields": { - "element": "dca5cf68-56fb-48df-b54a-3c09f3030a37", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"aa9cdf0c-7583-400e-8432-f3e388f5cf0d\"]", + "element": "59010638-140d-49e8-b123-114f5b3729b8", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"de0f901f-fa4e-4756-8e87-1ca20222bd07\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "dc4903c5-7e9a-4c6b-9035-3634c86b6e8c", + "pk": "f154a8ff-857c-40d0-bf9e-48bc88a44fa3", "fields": { - "element": "b3433c27-0c42-4013-a645-67a4388cdd94", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\"]", - "ordering": 3 + "element": "f6a605c8-3020-486f-9b41-81f8aed93f8d", + "path": "[\"4ac58be9-28bc-43da-bb6a-8e3eb09c2e14\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "f3473681-ccc0-4d68-9094-3470b59af917", + "pk": "fe830d8a-0a19-462f-b973-dfa83adeff48", "fields": { - "element": "8bff15a3-9f39-4111-947b-3e97b5605140", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"aa9cdf0c-7583-400e-8432-f3e388f5cf0d\"]", - "ordering": 2 + "element": "de0f901f-fa4e-4756-8e87-1ca20222bd07", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "ff35ba64-2da7-443a-acea-0e3bfba0c575", + "pk": "ff39b5b8-ad00-46a3-89de-14a48502543c", "fields": { - "element": "3597272d-3226-4f06-9a2f-6b479d247060", - "path": "[\"1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d\", \"35886170-a511-424b-a1d5-984731ab4aed\"]", - "ordering": 0 + "element": "f2fa1e76-9784-49e9-91ba-9b53f19523dd", + "path": "[\"f3837e86-33a2-4d0d-bfe5-b407712f8e9b\", \"dbffd4c4-a4f9-415f-b2e0-2fdab56a6358\"]", + "ordering": 3 } }, { "model": "documents.entitytype", - "pk": "2af0ca14-15d5-466a-848a-43203617bb7c", + "pk": "490dd132-94e7-4f36-954f-23b558b9bcc2", "fields": { - "name": "person", + "name": "date", "color": "ff0000", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba" } }, { "model": "documents.entitytype", - "pk": "4792dc93-c1cf-4667-9bbb-b3fcbcad4cc1", + "pk": "56a84ca9-0dbd-4324-8ce9-ec54a39033f2", "fields": { - "name": "organization", + "name": "number", "color": "ff0000", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba" } }, { "model": "documents.entitytype", - "pk": "9ae7659b-4dfc-4ea0-8056-af415f0638e4", + "pk": "745dfa3a-dabf-4c0a-8307-b1b78b79b7cd", "fields": { - "name": "date", + "name": "person", "color": "ff0000", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba" } }, { "model": "documents.entitytype", - "pk": "b50c5cd1-d945-4f23-904f-ad3267bb8ca1", + "pk": "81aa65d0-9091-4ba9-a52c-35c7d6bb0ca7", "fields": { "name": "location", "color": "ff0000", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba" } }, { "model": "documents.entitytype", - "pk": "ca93998c-c334-444f-bde6-16824b3dcc25", + "pk": "9cb63e11-6047-4d46-a416-583422506c3a", "fields": { - "name": "number", + "name": "organization", "color": "ff0000", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba" } }, { "model": "documents.transcription", - "pk": "11d60d63-d5e7-4620-a7e8-5da8c5319617", + "pk": "0757b713-8e2f-46a0-9c7f-9ca670b40656", "fields": { - "element": "aa918643-b18a-4970-a5c4-f18ff3594462", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "9d9b6a50-5f16-480d-989b-0032ac1087e4", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "DATUM", "orientation": "horizontal-lr", "confidence": 1.0 @@ -782,11 +830,11 @@ }, { "model": "documents.transcription", - "pk": "1954413e-298a-4495-b681-b9c70bf653c4", + "pk": "14549604-fcdd-478e-a3dd-42fcb6f1fc40", "fields": { - "element": "dca5cf68-56fb-48df-b54a-3c09f3030a37", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "1becb3e4-c8c0-47e7-a2b2-402446d7280e", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "ROY", "orientation": "horizontal-lr", "confidence": 1.0 @@ -794,11 +842,11 @@ }, { "model": "documents.transcription", - "pk": "5541ecd7-eb79-4673-97ea-d47200b2f92f", + "pk": "35bad62f-185b-4e81-b232-41cf20d480fa", "fields": { - "element": "f4427695-7181-43d7-b5f8-803846bd4493", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "6497f558-1aa8-4f51-a10e-d359a8d06c0b", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "ROY", "orientation": "horizontal-lr", "confidence": 1.0 @@ -806,47 +854,47 @@ }, { "model": "documents.transcription", - "pk": "55d5a414-29b7-4c41-92cd-558a0d1a4f24", + "pk": "37d96a70-7000-4478-bc0f-bbdb6ed31307", "fields": { - "element": "98a67dc8-76d6-4a2c-931b-60a11ef601d9", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", - "text": "Lorem ipsum dolor sit amet", + "element": "a4258522-c766-4fa9-b6ff-78c78f03573b", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", + "text": "PARIS", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "7b4ea9fc-c18c-4f17-9c9c-ec8327722182", + "pk": "40383971-e7b7-4e0a-9cee-32c597f82764", "fields": { - "element": "d462dd86-bd44-4e74-95a3-44fefd397df8", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", - "text": "ROY", + "element": "6508eb1d-b22d-4cf0-a94d-c32c9b614396", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", + "text": "DATUM", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "7c87229d-63f8-4bae-8c8c-cad1827a0cfa", + "pk": "813e947e-b96c-41ee-9c11-5f1f9512c6c7", "fields": { - "element": "75db941d-00a5-4c9a-aa11-3594181b1cb2", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", - "text": "PARIS", + "element": "59010638-140d-49e8-b123-114f5b3729b8", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", + "text": "ROY", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "861c51ac-cec3-428c-9395-2a08ab02cbb8", + "pk": "d4b642bf-ac25-4254-8b88-aa09bc0be2f6", "fields": { - "element": "e792fe53-f1ed-459d-8b49-c7dae948c0db", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "831b2e1f-01eb-4724-978b-608b7d587f7c", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "PARIS", "orientation": "horizontal-lr", "confidence": 1.0 @@ -854,11 +902,11 @@ }, { "model": "documents.transcription", - "pk": "8847e7ba-719e-4241-9008-3558640e7d21", + "pk": "d76419d5-2bbc-48d2-b7ac-d84ad2b8801b", "fields": { - "element": "04a1537a-35c2-4426-9533-63817890b5f5", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "60e09255-6a01-47e1-b6ba-f7ca8e657b25", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "PARIS", "orientation": "horizontal-lr", "confidence": 1.0 @@ -866,11 +914,11 @@ }, { "model": "documents.transcription", - "pk": "88aaf736-a27c-4d27-b179-547e3951d2d3", + "pk": "ee4dfbbd-686b-471e-b6c3-79ecf504ffe9", "fields": { - "element": "8bff15a3-9f39-4111-947b-3e97b5605140", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", + "element": "a463d187-d339-4f02-8307-420c5f19ac37", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", "text": "DATUM", "orientation": "horizontal-lr", "confidence": 1.0 @@ -878,51 +926,51 @@ }, { "model": "documents.transcription", - "pk": "f213292c-3072-4a40-b1c3-738d69d9eb7b", + "pk": "f9b6e10e-9599-4377-aac6-67c8137ce406", "fields": { - "element": "99b51971-fbff-45c6-9e89-8bd0a598e1a4", - "worker_version": "52ea454c-52ea-4dfd-bc5d-b3e46ab6540d", - "worker_run": "cc5dbf61-20f3-4286-8a82-e303a58783a0", - "text": "DATUM", + "element": "dbffd4c4-a4f9-415f-b2e0-2fdab56a6358", + "worker_version": "e08651f4-11ad-4dbe-ba3e-7d4ec8de0892", + "worker_run": "0239ce26-d3f8-44ed-b38d-f6c792e049e6", + "text": "Lorem ipsum dolor sit amet", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.allowedmetadata", - "pk": "3d117229-fa3d-4065-916d-9fae54cd65bf", + "pk": "06f5f06c-b76b-4739-8e6a-6a647db98c1d", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "date", - "name": "date" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "location", + "name": "location" } }, { "model": "documents.allowedmetadata", - "pk": "d57bb037-f571-4cef-9638-13cf1b7f991c", + "pk": "4adfbe49-4703-48ab-a297-8a69062f4a72", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "location", - "name": "location" + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "date", + "name": "date" } }, { "model": "documents.allowedmetadata", - "pk": "feaacc29-a0e1-4aa3-8db6-7f4368706dc2", + "pk": "8dd06811-8220-4cce-a5d5-421781f6767a", "fields": { - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", "type": "text", "name": "folio" } }, { "model": "documents.metadata", - "pk": "125b87e7-7cad-4ca9-8470-eaf7e77749f1", + "pk": "007ec832-989b-494f-8f79-8e1d6115aac1", "fields": { - "element": "aa9cdf0c-7583-400e-8432-f3e388f5cf0d", + "element": "6cd4aba2-e49f-4153-8112-9eb561ac8d3b", "name": "folio", "type": "text", - "value": "1v", + "value": "2r", "entity": null, "worker_version": null, "worker_run": null @@ -930,12 +978,12 @@ }, { "model": "documents.metadata", - "pk": "14f0de67-d225-49d0-81d9-b46d729c8e64", + "pk": "16a2140f-e31e-4b31-b0b8-9b8dfb59fe30", "fields": { - "element": "2331d5fb-fac5-431f-b313-9f0aa817c95b", - "name": "number", + "element": "bc176daf-0894-4560-abaa-5b74ee8a3426", + "name": "folio", "type": "text", - "value": "5", + "value": "1v", "entity": null, "worker_version": null, "worker_run": null @@ -943,12 +991,12 @@ }, { "model": "documents.metadata", - "pk": "1ee373d0-163b-4955-8656-8746c9f68390", + "pk": "407a698a-d6e0-45e8-8055-422756a3df60", "fields": { - "element": "4e0e24e6-52a3-4623-b8da-c6fb7471c861", - "name": "folio", + "element": "b5af9a42-dcc1-408c-802d-e123cfc02180", + "name": "number", "type": "text", - "value": "2r", + "value": "2", "entity": null, "worker_version": null, "worker_run": null @@ -956,12 +1004,12 @@ }, { "model": "documents.metadata", - "pk": "26ade477-4d41-4c93-9104-2baca79bc85a", + "pk": "70425669-b8d1-406e-b08f-8d5466caceff", "fields": { - "element": "bfa8252b-2859-4180-8358-fdcfedf82d7c", - "name": "number", + "element": "f6a605c8-3020-486f-9b41-81f8aed93f8d", + "name": "folio", "type": "text", - "value": "3", + "value": "1r", "entity": null, "worker_version": null, "worker_run": null @@ -969,12 +1017,12 @@ }, { "model": "documents.metadata", - "pk": "2eb35126-4dc5-468d-9e43-5d77fe7c16d7", + "pk": "858aa785-fd62-494b-bee0-fd40f3b6c9a2", "fields": { - "element": "bcd1fb0f-f910-4509-ba41-bf9fe67e67eb", - "name": "folio", + "element": "8de507e6-e85c-41d1-9ef4-361fe218f9bb", + "name": "number", "type": "text", - "value": "2r", + "value": "1", "entity": null, "worker_version": null, "worker_run": null @@ -982,12 +1030,12 @@ }, { "model": "documents.metadata", - "pk": "4e266775-04d5-48ff-95ce-f5c685e418e7", + "pk": "96633ee7-dd8f-420c-8932-d28cea115a9b", "fields": { - "element": "98a67dc8-76d6-4a2c-931b-60a11ef601d9", + "element": "de0f901f-fa4e-4756-8e87-1ca20222bd07", "name": "folio", "type": "text", - "value": "1r", + "value": "1v", "entity": null, "worker_version": null, "worker_run": null @@ -995,12 +1043,12 @@ }, { "model": "documents.metadata", - "pk": "8f57cc96-498f-4a69-a2b4-0bc31ca1345c", + "pk": "a0526316-88c3-4378-adbc-e872df4fa064", "fields": { - "element": "b64fbadd-68b6-488c-89be-715ddb28590c", + "element": "1c20d68c-ac53-4910-8e30-9e38e2cfa002", "name": "number", "type": "text", - "value": "2", + "value": "3", "entity": null, "worker_version": null, "worker_run": null @@ -1008,12 +1056,12 @@ }, { "model": "documents.metadata", - "pk": "960ecad0-1398-4800-984d-9b088568b8bd", + "pk": "aaafdad3-22a7-428f-9dbb-4938cbfcc26e", "fields": { - "element": "b3433c27-0c42-4013-a645-67a4388cdd94", + "element": "2cdad071-77b8-4e74-b05a-d87317cfbcc5", "name": "number", "type": "text", - "value": "1", + "value": "5", "entity": null, "worker_version": null, "worker_run": null @@ -1021,12 +1069,12 @@ }, { "model": "documents.metadata", - "pk": "c39cfe86-2cb8-4ada-81f3-5530597d5c62", + "pk": "c45df60f-e4c4-40ed-b742-8557ad83791d", "fields": { - "element": "95181a58-4d6f-45fc-881a-fc42e8b7c721", + "element": "39d41f9e-c8eb-47c2-89b0-c43272a171ac", "name": "folio", "type": "text", - "value": "1r", + "value": "2r", "entity": null, "worker_version": null, "worker_run": null @@ -1034,9 +1082,9 @@ }, { "model": "documents.metadata", - "pk": "f1497e1a-f9c7-4da9-aa53-72529d9b1ffe", + "pk": "de267c67-57f8-4bcb-995d-5920d2b6f19a", "fields": { - "element": "35886170-a511-424b-a1d5-984731ab4aed", + "element": "57227959-07f9-43ea-a7ea-e05c808f13ca", "name": "number", "type": "text", "value": "4", @@ -1047,12 +1095,12 @@ }, { "model": "documents.metadata", - "pk": "fe2703f5-5acd-4631-a12e-15c88b078898", + "pk": "f36cd04d-85dc-45c6-8155-14fd39c69387", "fields": { - "element": "37ae14da-4051-43f7-ba68-2ed3014f508f", + "element": "dbffd4c4-a4f9-415f-b2e0-2fdab56a6358", "name": "folio", "type": "text", - "value": "1v", + "value": "1r", "entity": null, "worker_version": null, "worker_run": null @@ -1075,12 +1123,12 @@ }, { "model": "images.image", - "pk": "2da54dd3-7b52-4398-9c8c-a693d831799a", + "pk": "1f1ba4d8-9828-48b5-9229-7fff0905168e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img1", + "path": "img6", "width": 1000, "height": 1000, "hash": null, @@ -1089,12 +1137,12 @@ }, { "model": "images.image", - "pk": "6a72cd19-bfb5-41e4-984d-cd4430a9ae82", + "pk": "6f7e9ea3-06b3-4b7f-9900-9676d71f5bc1", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img6", + "path": "img4", "width": 1000, "height": 1000, "hash": null, @@ -1103,12 +1151,12 @@ }, { "model": "images.image", - "pk": "705621ae-c0f0-421d-bf76-b36e96334f2e", + "pk": "819f362b-9955-4d9f-9095-1c1ecf893753", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img4", + "path": "img2", "width": 1000, "height": 1000, "hash": null, @@ -1117,12 +1165,12 @@ }, { "model": "images.image", - "pk": "76cbee11-d210-4113-a491-65160eb43e18", + "pk": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img5", + "path": "img3", "width": 1000, "height": 1000, "hash": null, @@ -1131,12 +1179,12 @@ }, { "model": "images.image", - "pk": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", + "pk": "d09bfb8d-888d-4899-ad03-2133d2efcbf3", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img3", + "path": "img5", "width": 1000, "height": 1000, "hash": null, @@ -1145,12 +1193,12 @@ }, { "model": "images.image", - "pk": "b33648fa-233d-4599-825a-2f8f87ac16d7", + "pk": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img2", + "path": "img1", "width": 1000, "height": 1000, "hash": null, @@ -1160,7 +1208,7 @@ { "model": "users.user", "fields": { - "password": "pbkdf2_sha256$720000$70GuFXETxOwQQSz9Aib3x0$HOlbbzXT4WIAExYuxRM181VO89uzRtNBInwT5Obm7d0=", + "password": "pbkdf2_sha256$720000$ApixV1UnAaQVRQq31fCgBR$GWHNyINgNtYkKdkIGkg0/g+ZQbJu+a4wEO4eLACL+oo=", "last_login": null, "email": "root@root.fr", "display_name": "Admin", @@ -1179,7 +1227,7 @@ { "model": "users.user", "fields": { - "password": "pbkdf2_sha256$720000$1kDQjHwH1uaBm6iQiD3l4T$DsGoCMTWCnmz6Mukwifc7ysuW9ro8Mwxc1MMARCujiQ=", + "password": "pbkdf2_sha256$720000$WHwscCnLna7TwRWdIog60g$4HHqZt1C4yZlCZhblkCQuF0iU68iI9EMz/jWkO21lR4=", "last_login": null, "email": "user@user.fr", "display_name": "Test user", @@ -1235,7 +1283,7 @@ }, { "model": "users.group", - "pk": "f26f4984-c439-474c-a28a-35ab207c4eea", + "pk": "ebc7b824-5b6e-474f-8afc-4c68686e997d", "fields": { "name": "User group", "public": false, @@ -1244,19 +1292,19 @@ }, { "model": "ponos.farm", - "pk": "79314671-52bc-42c7-aaa1-ace2aefc73a1", + "pk": "273e9f9c-a7bc-447c-ae13-94242dae89f1", "fields": { "name": "Wheat farm", - "seed": "4b0eb4d32ede0a4e9cf0a3190485fe461a7d4a14fb823d27bc513491ac5472b4" + "seed": "b3d8fe2c0d2edac0d775c82d09bc519245fc676e2025efd53c273b154c9ea417" } }, { "model": "training.dataset", - "pk": "99363540-0f19-4319-b3a2-f88583dc3add", + "pk": "7002fe39-5bac-4414-a773-4b877dd5f03d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", "creator": [ "user@user.fr" ], @@ -1269,11 +1317,11 @@ }, { "model": "training.dataset", - "pk": "a672f8a6-85d6-407f-a58a-809da093931e", + "pk": "74abd114-dea6-4ca3-a6c9-6cf0169bccb2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", "creator": [ "user@user.fr" ], @@ -1286,55 +1334,55 @@ }, { "model": "training.datasetset", - "pk": "1eee70f7-2a92-4e8d-b14f-c9bbc89c2aa2", + "pk": "09bc5aa0-ed44-413b-b792-357d2fb24f66", "fields": { - "name": "dev", - "dataset": "a672f8a6-85d6-407f-a58a-809da093931e" + "name": "test", + "dataset": "7002fe39-5bac-4414-a773-4b877dd5f03d" } }, { "model": "training.datasetset", - "pk": "1f7310d0-6618-4f1a-a43e-f7e7ddfed296", + "pk": "579e630a-63f6-449a-bb36-688b924f34d2", "fields": { "name": "test", - "dataset": "99363540-0f19-4319-b3a2-f88583dc3add" + "dataset": "74abd114-dea6-4ca3-a6c9-6cf0169bccb2" } }, { "model": "training.datasetset", - "pk": "32ce4cbc-a76f-4b96-8ee8-a817257ea4c1", + "pk": "a26c2ecd-f3d3-4d69-ac6d-df1343ef27ab", "fields": { - "name": "test", - "dataset": "a672f8a6-85d6-407f-a58a-809da093931e" + "name": "dev", + "dataset": "74abd114-dea6-4ca3-a6c9-6cf0169bccb2" } }, { "model": "training.datasetset", - "pk": "678e7414-489d-4418-a3b2-824c632b2aea", + "pk": "eb9207ec-6838-483d-8cf3-8c5a974f54dd", "fields": { "name": "train", - "dataset": "99363540-0f19-4319-b3a2-f88583dc3add" + "dataset": "7002fe39-5bac-4414-a773-4b877dd5f03d" } }, { "model": "training.datasetset", - "pk": "b4dcf070-bde8-4a44-8ea9-7556f3f163eb", + "pk": "f56d9cf0-19f7-4d77-a6ec-091180501a31", "fields": { - "name": "dev", - "dataset": "99363540-0f19-4319-b3a2-f88583dc3add" + "name": "train", + "dataset": "74abd114-dea6-4ca3-a6c9-6cf0169bccb2" } }, { "model": "training.datasetset", - "pk": "d3da05ed-e913-4616-9301-52c86729f5de", + "pk": "f87270ea-711d-4021-846a-179c19ccdb94", "fields": { - "name": "train", - "dataset": "a672f8a6-85d6-407f-a58a-809da093931e" + "name": "dev", + "dataset": "7002fe39-5bac-4414-a773-4b877dd5f03d" } }, { "model": "users.right", - "pk": "0b83b838-8bfb-4e6b-8f04-cc86340a622d", + "pk": "4e60a35c-50aa-4d57-af7e-ff3d79370aab", "fields": { "user": [ "user@user.fr" @@ -1344,88 +1392,88 @@ "users", "group" ], - "content_id": "f26f4984-c439-474c-a28a-35ab207c4eea", + "content_id": "ebc7b824-5b6e-474f-8afc-4c68686e997d", "level": 100 } }, { "model": "users.right", - "pk": "3f5c89f0-1d8b-4d00-b3a7-e48895299586", + "pk": "636f4fba-e823-408c-b272-e18a6b716d8c", "fields": { "user": [ - "user@user.fr" + "user3@user.fr" ], "group": null, "content_type": [ - "ponos", - "farm" + "users", + "group" ], - "content_id": "79314671-52bc-42c7-aaa1-ace2aefc73a1", + "content_id": "ebc7b824-5b6e-474f-8afc-4c68686e997d", "level": 10 } }, { "model": "users.right", - "pk": "504868f8-4faa-47a5-9bb3-2744b88b1615", + "pk": "8419d643-13ab-4851-b087-4cf5ebee0ab1", "fields": { "user": [ - "user3@user.fr" + "user2@user.fr" ], "group": null, "content_type": [ "users", "group" ], - "content_id": "f26f4984-c439-474c-a28a-35ab207c4eea", - "level": 10 + "content_id": "ebc7b824-5b6e-474f-8afc-4c68686e997d", + "level": 50 } }, { "model": "users.right", - "pk": "50c7f9d3-e6ac-4a88-8bff-d21600924f55", + "pk": "9974ded0-a68b-46c0-98c1-a65e0883b342", "fields": { "user": [ "user@user.fr" ], "group": null, "content_type": [ - "documents", - "corpus" + "ponos", + "farm" ], - "content_id": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "level": 100 + "content_id": "273e9f9c-a7bc-447c-ae13-94242dae89f1", + "level": 10 } }, { "model": "users.right", - "pk": "d8e17a82-ad52-42c1-9713-eb94832a8095", + "pk": "b4d389d9-a17f-4237-a7d9-db9aec6ffe43", "fields": { "user": [ - "user2@user.fr" + "user@user.fr" ], "group": null, "content_type": [ - "users", - "group" + "documents", + "corpus" ], - "content_id": "f26f4984-c439-474c-a28a-35ab207c4eea", - "level": 50 + "content_id": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "level": 100 } }, { "model": "documents.element", - "pk": "04a1537a-35c2-4426-9533-63817890b5f5", + "pk": "1becb3e4-c8c0-47e7-a2b2-402446d7280e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "PARIS", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": "b33648fa-233d-4599-825a-2f8f87ac16d7", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1433,13 +1481,13 @@ }, { "model": "documents.element", - "pk": "1b829e8b-8ebe-4e86-a257-b7aaa2f7a22d", + "pk": "1c20d68c-ac53-4910-8e30-9e38e2cfa002", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b445d965-667a-4690-8732-98659d8645dd", - "name": "Volume 1", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "805d2171-94f5-45e3-ae85-fe511c88c141", + "name": "Act 3", "creator": null, "worker_version": null, "worker_run": null, @@ -1452,12 +1500,12 @@ }, { "model": "documents.element", - "pk": "2331d5fb-fac5-431f-b313-9f0aa817c95b", + "pk": "2cdad071-77b8-4e74-b05a-d87317cfbcc5", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "805d2171-94f5-45e3-ae85-fe511c88c141", "name": "Act 5", "creator": null, "worker_version": null, @@ -1471,18 +1519,18 @@ }, { "model": "documents.element", - "pk": "2d08b6c7-2d0e-4d51-b989-a724ce6f40c7", + "pk": "39d41f9e-c8eb-47c2-89b0-c43272a171ac", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface D", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 1, page 2r", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1490,18 +1538,18 @@ }, { "model": "documents.element", - "pk": "35886170-a511-424b-a1d5-984731ab4aed", + "pk": "459ecf7a-849f-4105-b371-9cad0078031b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", - "name": "Act 4", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface B", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1509,18 +1557,18 @@ }, { "model": "documents.element", - "pk": "3597272d-3226-4f06-9a2f-6b479d247060", + "pk": "4ac58be9-28bc-43da-bb6a-8e3eb09c2e14", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface E", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "acd7e2f0-e4c6-48e6-9bf5-e9be06e1ae6c", + "name": "Volume 2", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1528,18 +1576,18 @@ }, { "model": "documents.element", - "pk": "37ae14da-4051-43f7-ba68-2ed3014f508f", + "pk": "50e1ccf3-dbeb-4c29-9d77-85e05f90c426", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 2, page 1v", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface A", "creator": null, "worker_version": null, "worker_run": null, - "image": "76cbee11-d210-4113-a491-65160eb43e18", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1547,18 +1595,18 @@ }, { "model": "documents.element", - "pk": "4e0e24e6-52a3-4623-b8da-c6fb7471c861", + "pk": "57227959-07f9-43ea-a7ea-e05c808f13ca", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 2, page 2r", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "805d2171-94f5-45e3-ae85-fe511c88c141", + "name": "Act 4", "creator": null, "worker_version": null, "worker_run": null, - "image": "6a72cd19-bfb5-41e4-984d-cd4430a9ae82", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1566,18 +1614,18 @@ }, { "model": "documents.element", - "pk": "6db35fcd-3c50-4a16-b204-568850b6dbb0", + "pk": "59010638-140d-49e8-b123-114f5b3729b8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b445d965-667a-4690-8732-98659d8645dd", - "name": "Volume 2", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "819f362b-9955-4d9f-9095-1c1ecf893753", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1585,17 +1633,17 @@ }, { "model": "documents.element", - "pk": "75db941d-00a5-4c9a-aa11-3594181b1cb2", + "pk": "60e09255-6a01-47e1-b6ba-f7ca8e657b25", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", + "image": "819f362b-9955-4d9f-9095-1c1ecf893753", "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, @@ -1604,18 +1652,18 @@ }, { "model": "documents.element", - "pk": "8bff15a3-9f39-4111-947b-3e97b5605140", + "pk": "6497f558-1aa8-4f51-a10e-d359a8d06c0b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "DATUM", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": "b33648fa-233d-4599-825a-2f8f87ac16d7", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1623,18 +1671,18 @@ }, { "model": "documents.element", - "pk": "95181a58-4d6f-45fc-881a-fc42e8b7c721", + "pk": "6508eb1d-b22d-4cf0-a94d-c32c9b614396", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 2, page 1r", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": "705621ae-c0f0-421d-bf76-b36e96334f2e", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1642,17 +1690,17 @@ }, { "model": "documents.element", - "pk": "98a67dc8-76d6-4a2c-931b-60a11ef601d9", + "pk": "6cd4aba2-e49f-4153-8112-9eb561ac8d3b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 1, page 1r", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 2, page 2r", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", + "image": "1f1ba4d8-9828-48b5-9229-7fff0905168e", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, @@ -1661,18 +1709,18 @@ }, { "model": "documents.element", - "pk": "99b4ca3d-834f-4010-b94f-ea02e307ceb4", + "pk": "7047387b-146e-4735-a3d7-eefa9b4a64a9", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "9d74a557-1c41-4fcb-9ac4-706fc8438a01", - "name": "Text line", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface E", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1680,18 +1728,18 @@ }, { "model": "documents.element", - "pk": "99b51971-fbff-45c6-9e89-8bd0a598e1a4", + "pk": "7d767a0e-c46d-4f6d-9364-2ee0c1cc5df4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "DATUM", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface D", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1699,18 +1747,18 @@ }, { "model": "documents.element", - "pk": "aa918643-b18a-4970-a5c4-f18ff3594462", + "pk": "831b2e1f-01eb-4724-978b-608b7d587f7c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "DATUM", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1718,18 +1766,18 @@ }, { "model": "documents.element", - "pk": "aa9cdf0c-7583-400e-8432-f3e388f5cf0d", + "pk": "8de507e6-e85c-41d1-9ef4-361fe218f9bb", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 1, page 1v", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "805d2171-94f5-45e3-ae85-fe511c88c141", + "name": "Act 1", "creator": null, "worker_version": null, "worker_run": null, - "image": "b33648fa-233d-4599-825a-2f8f87ac16d7", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1737,18 +1785,18 @@ }, { "model": "documents.element", - "pk": "aeed5364-c2b1-45d9-81e8-f1591a1dc59d", + "pk": "9d9b6a50-5f16-480d-989b-0032ac1087e4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface C", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": "b33648fa-233d-4599-825a-2f8f87ac16d7", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1756,18 +1804,18 @@ }, { "model": "documents.element", - "pk": "b3433c27-0c42-4013-a645-67a4388cdd94", + "pk": "a4258522-c766-4fa9-b6ff-78c78f03573b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", - "name": "Act 1", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1775,18 +1823,18 @@ }, { "model": "documents.element", - "pk": "b64fbadd-68b6-488c-89be-715ddb28590c", + "pk": "a463d187-d339-4f02-8307-420c5f19ac37", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", - "name": "Act 2", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "1d078b43-bb0d-44e6-a667-fec714dbf057", + "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "819f362b-9955-4d9f-9095-1c1ecf893753", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1794,18 +1842,18 @@ }, { "model": "documents.element", - "pk": "bc662943-dfa5-4696-8480-fc236ff3651e", + "pk": "b5af9a42-dcc1-408c-802d-e123cfc02180", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface A", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "805d2171-94f5-45e3-ae85-fe511c88c141", + "name": "Act 2", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", - "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1813,17 +1861,17 @@ }, { "model": "documents.element", - "pk": "bcd1fb0f-f910-4509-ba41-bf9fe67e67eb", + "pk": "bc176daf-0894-4560-abaa-5b74ee8a3426", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "324f4646-138e-425b-bb6c-76cbdd8c038c", - "name": "Volume 1, page 2r", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 2, page 1v", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", + "image": "d09bfb8d-888d-4899-ad03-2133d2efcbf3", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, @@ -1832,18 +1880,18 @@ }, { "model": "documents.element", - "pk": "bfa8252b-2859-4180-8358-fdcfedf82d7c", + "pk": "c49d9815-f0c8-471e-8e23-d24976505129", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b71d6bbe-b2b0-46a8-a983-e34cab59aa1e", - "name": "Act 3", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface F", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "ab1312d6-3c70-4e31-a440-fdbdb2acf4be", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1851,18 +1899,18 @@ }, { "model": "documents.element", - "pk": "cf7188f3-98f6-49ae-ab3f-d3ab8b53608e", + "pk": "dbffd4c4-a4f9-415f-b2e0-2fdab56a6358", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface B", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 1, page 1r", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1870,18 +1918,18 @@ }, { "model": "documents.element", - "pk": "d462dd86-bd44-4e74-95a3-44fefd397df8", + "pk": "de0f901f-fa4e-4756-8e87-1ca20222bd07", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "ROY", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 1, page 1v", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "819f362b-9955-4d9f-9095-1c1ecf893753", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1889,18 +1937,18 @@ }, { "model": "documents.element", - "pk": "dca5cf68-56fb-48df-b54a-3c09f3030a37", + "pk": "e0bdfb09-1e15-4ec2-a2dd-4c05714af2fd", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "ROY", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "748cabf2-e002-431f-b582-5c39ca57b44e", + "name": "Surface C", "creator": null, "worker_version": null, "worker_run": null, - "image": "b33648fa-233d-4599-825a-2f8f87ac16d7", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "819f362b-9955-4d9f-9095-1c1ecf893753", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1908,18 +1956,18 @@ }, { "model": "documents.element", - "pk": "e792fe53-f1ed-459d-8b49-c7dae948c0db", + "pk": "f2fa1e76-9784-49e9-91ba-9b53f19523dd", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "PARIS", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "3be8f2aa-3e04-4a25-80af-7f92c39c057f", + "name": "Text line", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", + "image": "fc31a9cc-53bc-4519-96b1-7cbbf5dc50a2", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1927,18 +1975,18 @@ }, { "model": "documents.element", - "pk": "f1fbbe29-65c8-4a47-b912-6d4e04b39c01", + "pk": "f3837e86-33a2-4d0d-bfe5-b407712f8e9b", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "2b310b2a-33bd-42fe-bfb1-2915928b00cc", - "name": "Surface F", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "acd7e2f0-e4c6-48e6-9bf5-e9be06e1ae6c", + "name": "Volume 1", "creator": null, "worker_version": null, "worker_run": null, - "image": "9afed6a2-17ef-4d8a-8047-f7d8807c6fa2", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1946,18 +1994,18 @@ }, { "model": "documents.element", - "pk": "f4427695-7181-43d7-b5f8-803846bd4493", + "pk": "f6a605c8-3020-486f-9b41-81f8aed93f8d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "type": "b61111fd-61e7-45f1-aa99-20c5f105357a", - "name": "ROY", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "type": "53d093a9-3d20-4fcf-9b55-c77e788e19a9", + "name": "Volume 2, page 1r", "creator": null, "worker_version": null, "worker_run": null, - "image": "2da54dd3-7b52-4398-9c8c-a693d831799a", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "6f7e9ea3-06b3-4b7f-9900-9676d71f5bc1", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1965,20 +2013,20 @@ }, { "model": "process.process", - "pk": "49bef54f-c7cf-4c97-bd06-633ecf77efcf", + "pk": "23b5ee11-b4b9-41dc-9b2b-83024404d0f0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "name": null, + "name": "Process fixture", "creator": [ "user@user.fr" ], - "corpus": null, - "mode": "local", + "corpus": "63951d56-ce50-4d2d-9d11-cee0da8dffba", + "mode": "workers", "activity_state": "disabled", "started": null, "finished": null, - "farm": null, + "farm": "273e9f9c-a7bc-447c-ae13-94242dae89f1", "element": null, "folder_type": null, "element_type": null, @@ -1995,20 +2043,20 @@ }, { "model": "process.process", - "pk": "6ceedc11-f36b-418f-a19a-7c6b3dd96a82", + "pk": "b6d2604e-6cf3-44cd-9b4a-4d7dff2ce040", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "name": "Process fixture", + "name": null, "creator": [ - "user@user.fr" + "root@root.fr" ], - "corpus": "9b13ef45-5eb4-48a7-92c4-4e2766f8ec3c", - "mode": "workers", + "corpus": null, + "mode": "local", "activity_state": "disabled", "started": null, "finished": null, - "farm": "79314671-52bc-42c7-aaa1-ace2aefc73a1", + "farm": null, "element": null, "folder_type": null, "element_type": null, @@ -2025,13 +2073,13 @@ }, { "model": "process.process", - "pk": "9767c1c7-cc6a-4a69-96b1-d8c0e7ee4c33", + "pk": "da20071c-7c8e-4111-a1df-02f71f0531d2", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "name": null, "creator": [ - "root@root.fr" + "user@user.fr" ], "corpus": null, "mode": "local", diff --git a/arkindex/documents/management/commands/build_fixtures.py b/arkindex/documents/management/commands/build_fixtures.py index afda9fe8e5ab802d6c35903cbf33b18ee5a49662..18d9ee1b4cf341215b127cff967f5b5f946dc173 100644 --- a/arkindex/documents/management/commands/build_fixtures.py +++ b/arkindex/documents/management/commands/build_fixtures.py @@ -76,6 +76,7 @@ class Command(BaseCommand): import_worker_type = WorkerType.objects.create(slug="import", display_name="Import") custom_worker_type = WorkerType.objects.create(slug="custom", display_name="Custom") init_type = WorkerType.objects.create(slug="init", display_name="Elements Initialisation") + export_type = WorkerType.objects.create(slug="export", display_name="Document export") farm = Farm.objects.create(name="Wheat farm") farm.memberships.create(user=user, level=Role.Guest.value) @@ -99,6 +100,19 @@ class Command(BaseCommand): feature=ArkindexFeature.InitElements, ) + # Create the PDF export worker version + WorkerVersion.objects.create( + worker=Worker.objects.create( + name="PDF export worker", + slug="pdf_export", + type=export_type + ), + version=1, + docker_image_iid="registry.example.com/pdf-export:latest", + state=WorkerVersionState.Available, + feature=ArkindexFeature.ExportPDF + ) + # Create some workers with available versions recognizer_worker = WorkerVersion.objects.create( worker=Worker.objects.create( @@ -192,11 +206,11 @@ class Command(BaseCommand): ) user_local_process.worker_runs.create( version=custom_version, - parents=[], + ttl=0, ) superuser_local_process.worker_runs.create( version=custom_version, - parents=[], + ttl=0, ) # Create a corpus @@ -218,14 +232,16 @@ class Command(BaseCommand): ) init_worker_run = process.worker_runs.create( version=init_worker, - parents=[] + ttl=3600, ) dla_worker_run = process.worker_runs.create( version=dla_worker, + ttl=3600, parents=[init_worker_run.id], ) reco_run = process.worker_runs.create( version=recognizer_worker, + ttl=3600, parents=[dla_worker_run.id], ) diff --git a/arkindex/documents/management/commands/load_export.py b/arkindex/documents/management/commands/load_export.py index d93cc0426950a7d2a96bac0b028f7978321e8b3f..9b2fad0ee55dea000fbd0a93ec471f18b63e0648 100644 --- a/arkindex/documents/management/commands/load_export.py +++ b/arkindex/documents/management/commands/load_export.py @@ -444,7 +444,7 @@ class Command(BaseCommand): version_id=worker_version_id, model_version=model_version, configuration=configuration, - defaults={"parents": []}, + defaults={"ttl": 0}, ) def create_image_server(self, row): diff --git a/arkindex/documents/managers.py b/arkindex/documents/managers.py index 9ccb6494b871c79f1e221b5f1569de4b0ce62015..887f0d0a40105d7369a5a5f2e98ab34736c7d3a1 100644 --- a/arkindex/documents/managers.py +++ b/arkindex/documents/managers.py @@ -1,7 +1,11 @@ +from datetime import timedelta from itertools import chain import django +from django.conf import settings +from django.core.exceptions import ValidationError from django.db import DJANGO_VERSION_PICKLE_KEY, connections, models +from django.utils import timezone from arkindex.project.fields import Unnest from arkindex.users.managers import BaseACLManager @@ -231,3 +235,23 @@ class CorpusManager(BaseACLManager): return super().get_queryset().filter( id__in=(self.filter_rights(user, self.model, Role.Admin.value).values("id")) ) + + +class CorpusExportManager(models.Manager): + def validate_creatable(self, corpus, source): + """ + Check if a new corpus export can be created (no export currently running or recent completed export) + """ + from arkindex.documents.models import CorpusExportState + # Check that there is no export already running for this corpus + if corpus.exports.filter(state=CorpusExportState.Running).exists(): + raise ValidationError("An export is already running for this corpus.") + # Check that there is no available completed export from the same source created less than {EXPORT_TTL_SECONDS} + # ago for this corpus + available_exports = corpus.exports.filter( + state=CorpusExportState.Done, + source=source, + created__gte=timezone.now() - timedelta(seconds=settings.EXPORT_TTL_SECONDS) + ) + if available_exports.exists(): + raise ValidationError(f"An export has already been made for this corpus in the last {settings.EXPORT_TTL_SECONDS} seconds.") diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 0353cd106e08c2921124f3034439d45d9844ac95..6ea6ee7745c9cba8008808ddf6b669ae2ea007b9 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -20,7 +20,7 @@ from enumfields import Enum, EnumField import pgtrigger from arkindex.documents.dates import InterpretedDateMixin from arkindex.documents.deletion import delete_element -from arkindex.documents.managers import CorpusManager, ElementManager +from arkindex.documents.managers import CorpusExportManager, CorpusManager, ElementManager from arkindex.project.aws import S3FileMixin from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES from arkindex.project.fields import ArrayConcat, ArrayField, LinearRingField @@ -1252,6 +1252,7 @@ class CorpusExport(S3FileMixin, IndexableModel): source = models.CharField(max_length=50, default="default", choices=[(source, source) for source in settings.EXPORT_SOURCES]) s3_bucket = settings.AWS_EXPORT_BUCKET + objects = CorpusExportManager() @property def s3_key(self) -> str: diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index 2b5d3fc2be0e239b668a1944c22d2cefdfdfec92..bcc27783682a8a19bbf85198d0e1168983d13ac5 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -2,6 +2,8 @@ from collections import defaultdict from textwrap import dedent from django.db import transaction +from django.db.utils import OperationalError +from psycopg2.errors import ProgramLimitExceeded from rest_framework import serializers from rest_framework.exceptions import ValidationError @@ -373,7 +375,7 @@ class TranscriptionEntitiesBulkSerializer(serializers.Serializer): @transaction.atomic def save(self): - entities = Entity.objects.bulk_create([ + entities_to_create = [ Entity( corpus=self.transcription.element.corpus, name=item["name"], @@ -383,7 +385,14 @@ class TranscriptionEntitiesBulkSerializer(serializers.Serializer): worker_version_id=self.validated_data["worker_run"].version_id, ) for item in self.validated_data["entities"] - ]) + ] + try: + entities = Entity.objects.bulk_create(entities_to_create) + except OperationalError as e: + if isinstance(getattr(e, "__cause__", None), ProgramLimitExceeded): + # As the max length is dynamic and depending on content, we cannot just limit on a specific length + raise ValidationError({"entities": {"name": ["Value is too long for this field."]}}) + raise e transcription_entities = TranscriptionEntity.objects.bulk_create([ TranscriptionEntity( diff --git a/arkindex/documents/serializers/export.py b/arkindex/documents/serializers/export.py index 6a7605e3b1381b8eaa81a9e24ba7dcd8b1d408a9..bc202cff9b057473f2fa815b0ceb312a0604e765 100644 --- a/arkindex/documents/serializers/export.py +++ b/arkindex/documents/serializers/export.py @@ -1,9 +1,5 @@ -from datetime import timedelta -from django.conf import settings -from django.utils import timezone from rest_framework import serializers -from rest_framework.exceptions import ValidationError from arkindex.documents.models import CorpusExport, CorpusExportState from arkindex.project.serializer_fields import EnumField @@ -21,19 +17,7 @@ class CorpusExportSerializer(serializers.ModelSerializer): def validate(self, data): corpus = self.context["corpus"] source = data.get("source", "default") - # Check that there is no export already running for this corpus - if corpus.exports.filter(state__in=(CorpusExportState.Created, CorpusExportState.Running)).exists(): - raise ValidationError("An export is already running for this corpus.") - # Check that there is no available completed export from the same source created less than {EXPORT_TTL_SECONDS} - # ago for this corpus - available_exports = corpus.exports.filter( - state=CorpusExportState.Done, - source=source, - created__gte=timezone.now() - timedelta(seconds=settings.EXPORT_TTL_SECONDS) - ) - if available_exports.exists(): - raise ValidationError(f"An export has already been made for this corpus in the last {settings.EXPORT_TTL_SECONDS} seconds.") - + CorpusExport.objects.validate_creatable(corpus, source) data["corpus"] = corpus data["source"] = source return data diff --git a/arkindex/documents/tests/commands/test_cleanup.py b/arkindex/documents/tests/commands/test_cleanup.py index e7e342cfceb4b0457c4fb853b71e0763fa200d4c..16fe3290c65e199dafe8a8ca5adf51c3ce68d78c 100644 --- a/arkindex/documents/tests/commands/test_cleanup.py +++ b/arkindex/documents/tests/commands/test_cleanup.py @@ -61,6 +61,7 @@ class TestCleanupCommand(FixtureTestCase): depth=0, slug="task", expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), + ttl=0, ) return task.artifacts.create(path="artichoke", size=99999) @@ -394,7 +395,7 @@ class TestCleanupCommand(FixtureTestCase): corpus=self.corpus, creator=self.superuser, ) - task = process.tasks.create(run=0, depth=0, slug="task") + task = process.tasks.create(run=0, depth=0, slug="task", ttl=0) good_s3_artifact = MagicMock() good_s3_artifact.key = f"{task.id}/path/to/thing.txt" @@ -487,6 +488,7 @@ class TestCleanupCommand(FixtureTestCase): depth=0, slug="task", expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), + ttl=0, ) expired_artifact = expired_task.artifacts.create(path="nope.txt", size=256) @@ -500,6 +502,7 @@ class TestCleanupCommand(FixtureTestCase): depth=0, slug="task", expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), + ttl=0, ) expired_task_with_dataset_link.dataset.set([ Dataset.objects.create(name="SEELE", description="Neon Genesis Evangelion", corpus=self.corpus, creator=self.user) @@ -517,6 +520,7 @@ class TestCleanupCommand(FixtureTestCase): slug="task", # Expiration date in the future: not yet expired expiry=datetime.now(timezone.utc) + timedelta(days=99), + ttl=0, ) non_expired_artifact = non_expired_task.artifacts.create(path="artsy-fact", size=1337) @@ -805,7 +809,7 @@ class TestCleanupCommand(FixtureTestCase): corpus=self.corpus, creator=self.superuser, ) - task = process.tasks.create(run=0, depth=0, slug="task") + task = process.tasks.create(run=0, depth=0, slug="task", ttl=0) good_s3_log = MagicMock() good_s3_log.key = f"{task.id}.log" @@ -1159,13 +1163,14 @@ class TestCleanupCommand(FixtureTestCase): # from the WorkerRuns, so there would be 2 runs with the same version and no configuration when they should be unique process = self.corpus.processes.create(mode=ProcessMode.Workers, creator=self.superuser) version = removable_worker.versions.first() - process.worker_runs.create(version=version) + process.worker_runs.create(version=version, ttl=0) process.worker_runs.create( version=version, configuration=removable_worker.configurations.create( name="Some configuration", configuration={}, ), + ttl=0, ) # This worker cannot be cleaned up because it is used in ML results @@ -1244,6 +1249,7 @@ class TestCleanupCommand(FixtureTestCase): worker_run = process.worker_runs.create( version=worker_version, model_version=used_model.versions.create(), + ttl=0, ) self.corpus.elements.create( type=self.corpus.types.first(), @@ -1311,7 +1317,7 @@ class TestCleanupCommand(FixtureTestCase): self.assertTrue(Worker.objects.filter(type=worker_type).exists()) unused_type = WorkerType.objects.create(slug="unused", display_name="A worker type that no worker uses") self.assertFalse(Worker.objects.filter(type=unused_type).exists()) - self.assertEqual(WorkerType.objects.count(), 7) + self.assertEqual(WorkerType.objects.count(), 8) self.assertEqual( self.cleanup(), @@ -1353,4 +1359,4 @@ class TestCleanupCommand(FixtureTestCase): with self.assertRaises(WorkerType.DoesNotExist): unused_type.refresh_from_db() - self.assertEqual(WorkerType.objects.count(), 6) + self.assertEqual(WorkerType.objects.count(), 7) diff --git a/arkindex/documents/tests/commands/test_load_export.py b/arkindex/documents/tests/commands/test_load_export.py index 383c1bc7ef1eeb733a17a7f60febfefcde60d417..0787a57f666980bdb4eab0240c459c20b6bfbe2e 100644 --- a/arkindex/documents/tests/commands/test_load_export.py +++ b/arkindex/documents/tests/commands/test_load_export.py @@ -39,7 +39,7 @@ class TestLoadExport(FixtureTestCase): "process.workerversion": ["created", "updated", "configuration", "state", "docker_image_iid"], # The WorkerRuns lose their parents, use different worker versions that just got recreated, # are assigned to the user's local process and not the original one - "process.workerrun": ["parents", "version", "process", "summary", "created", "updated"], + "process.workerrun": ["parents", "version", "process", "summary", "created", "updated", "ttl"], "process.workertype": [], "images.imageserver": ["s3_bucket", "s3_region", "created", "updated", "read_only"], "images.image": ["created", "updated", "hash", "status"], diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 099c49309c4dd29c8708f79cff221676c02698f5..7f0ff00642610028dedbe728915196333818e7af 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -45,7 +45,7 @@ class TestDeleteCorpus(FixtureTestCase): ml_class=cls.corpus.ml_classes.create(name="a class"), ) element_process.elements.add(element) - worker_run = element_process.worker_runs.create(version=cls.worker_version, parents=[]) + worker_run = element_process.worker_runs.create(version=cls.worker_version, ttl=0) task_1, task_2, task_3, task_4 = Task.objects.bulk_create( [ Task( @@ -55,6 +55,7 @@ class TestDeleteCorpus(FixtureTestCase): worker_run=worker_run, slug=f"unscheduled task {i}", state=State.Unscheduled, + ttl=0, ) for i in range(0, 4) ] ) diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py index 9831b8eed4f1f7e73a91d17e3bbb45d855255a97..8b21ea830c002881fa53b6e8b9935f57c407e231 100644 --- a/arkindex/documents/tests/tasks/test_export.py +++ b/arkindex/documents/tests/tasks/test_export.py @@ -82,6 +82,7 @@ class TestExport(FixtureTestCase): model=Model.objects.create(name="Some model"), ), configuration=metadata_version.worker.configurations.create(name="Some configuration"), + ttl=0, ) element.metadatas.create( diff --git a/arkindex/documents/tests/tasks/test_worker_results_delete.py b/arkindex/documents/tests/tasks/test_worker_results_delete.py index 2906bc25b49fd5fdeb1c5ff5ff85b51ad33f4bfa..264d9fa821ab457e0e8996d01f0bf71d9302d563 100644 --- a/arkindex/documents/tests/tasks/test_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_worker_results_delete.py @@ -40,7 +40,7 @@ class TestDeleteWorkerResults(FixtureTestCase): version=cls.version_1, model_version=cls.model_version, configuration=cls.configuration, - parents=[], + ttl=0, ) cls.vol = cls.corpus.elements.get(name="Volume 1") diff --git a/arkindex/documents/tests/test_bulk_classification.py b/arkindex/documents/tests/test_bulk_classification.py index 7411a992f93f48fa60694aa947c85baf2becde53..ddcc0d5c71d11584af9fb9c6d98a09f13c9d3399 100644 --- a/arkindex/documents/tests/test_bulk_classification.py +++ b/arkindex/documents/tests/test_bulk_classification.py @@ -287,7 +287,7 @@ class TestBulkClassification(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index 6caf72e472fb9e5fb30635ad996143270103e15a..f4578bdce00ec3c58f24c48039f54ba05d86bc95 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -748,7 +748,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_bulk_elements.py b/arkindex/documents/tests/test_bulk_elements.py index 03303ea101bc6b0d7c43a63db56218a5528fa1d9..05d3516d2df3acc5b3950d81de00214711764881 100644 --- a/arkindex/documents/tests/test_bulk_elements.py +++ b/arkindex/documents/tests/test_bulk_elements.py @@ -427,7 +427,7 @@ class TestBulkElements(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_bulk_transcription_entities.py b/arkindex/documents/tests/test_bulk_transcription_entities.py index 773a5cb727570b2812c4a5ed777bd30f889030ee..8c60037671d12e50cfb6b3d08610ff1243f3d991 100644 --- a/arkindex/documents/tests/test_bulk_transcription_entities.py +++ b/arkindex/documents/tests/test_bulk_transcription_entities.py @@ -233,7 +233,7 @@ class TestBulkTranscriptionEntities(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() @@ -416,3 +416,25 @@ class TestBulkTranscriptionEntities(FixtureAPITestCase): ), [("Guzzlord", self.person_ent_type.id, 6, 9, .42, self.local_worker_run.id, self.local_worker_run.id)], ) + + def test_create_name_too_long(self): + self.client.force_login(self.user) + response = self.client.post( + reverse("api:transcription-entities-bulk", kwargs={"pk": str(self.transcription.id)}), + data={ + "entities": [ + { + "name": "A" * 500000, + "type_id": str(self.person_ent_type.id), + "offset": 6, + "length": 9, + "confidence": .42, + }, + ], + "worker_run_id": str(self.local_worker_run.id), + } + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), { + "entities": {"name": ["Value is too long for this field."]}, + }) diff --git a/arkindex/documents/tests/test_bulk_transcriptions.py b/arkindex/documents/tests/test_bulk_transcriptions.py index f5ecd2861cdda5f65a1db5ea7aed28aa54637114..8fbbcdac41d84b2cb57f2221c5b4ea25dea9d38a 100644 --- a/arkindex/documents/tests/test_bulk_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_transcriptions.py @@ -263,7 +263,7 @@ class TestBulkTranscriptions(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_classification.py b/arkindex/documents/tests/test_classification.py index 35ddeb9d1cc8c345df67393dc645263a3602814c..84ccefcfec56ecd8d506049f77db6e3fc7e4b314 100644 --- a/arkindex/documents/tests/test_classification.py +++ b/arkindex/documents/tests/test_classification.py @@ -368,7 +368,7 @@ class TestClassifications(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_create_elements.py b/arkindex/documents/tests/test_create_elements.py index 28b0f2b17276da27a1f27e22671fff076677db51..0cd08a757f2d0f78935cc70555d891cf35e1c54c 100644 --- a/arkindex/documents/tests/test_create_elements.py +++ b/arkindex/documents/tests/test_create_elements.py @@ -720,7 +720,7 @@ class TestCreateElements(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_create_transcriptions.py b/arkindex/documents/tests/test_create_transcriptions.py index 7b37e36012341d7bc832ef55a7240981e5929e00..18c03d0f16a08fd4c353a90d4b514d74ef2ad34e 100644 --- a/arkindex/documents/tests/test_create_transcriptions.py +++ b/arkindex/documents/tests/test_create_transcriptions.py @@ -337,7 +337,7 @@ class TestTranscriptionCreate(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/documents/tests/test_entities_api.py b/arkindex/documents/tests/test_entities_api.py index d35466755771ec309cb5188c21d847a7665622b8..32b4dffa231488169b0f3f34ac80ad5c08e3524a 100644 --- a/arkindex/documents/tests/test_entities_api.py +++ b/arkindex/documents/tests/test_entities_api.py @@ -388,7 +388,7 @@ class TestEntitiesAPI(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_version_1, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_version_1, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run_1.process.run() task = self.worker_run_1.process.tasks.first() @@ -534,6 +534,23 @@ class TestEntitiesAPI(FixtureAPITestCase): self.assertEqual(entity.worker_version_id, local_worker_run.version_id) self.assertEqual(entity.worker_run, local_worker_run) + def test_create_entity_name_too_long(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.post( + reverse("api:entity-create"), + data={ + "name": "A" * 500000, + "type_id": str(self.person_type.id), + "corpus": str(self.corpus.id), + "worker_run_id": str(self.local_worker_run.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), { + "name": ["Value is too long for this field."], + }) + def test_create_transcription_entity(self): self.client.force_login(self.user) with self.assertNumQueries(6): @@ -846,7 +863,7 @@ class TestEntitiesAPI(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_version_1, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_version_1, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run_1.process.run() task = self.worker_run_1.process.tasks.first() diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py index b271170e822ba5c6469b4002ede38b0641e5f6ce..55b026370d0252154f8f913acaf995cc3dcf3f50 100644 --- a/arkindex/documents/tests/test_metadata.py +++ b/arkindex/documents/tests/test_metadata.py @@ -44,7 +44,7 @@ class TestMetaData(FixtureAPITestCase): creator=cls.user, farm=Farm.objects.first(), ) - cls.process.worker_runs.create(version=cls.worker_version, parents=[]) + cls.process.worker_runs.create(version=cls.worker_version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): cls.process.run() cls.task = cls.process.tasks.first() @@ -463,7 +463,7 @@ class TestMetaData(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() @@ -1458,7 +1458,7 @@ class TestMetaData(FixtureAPITestCase): mode=ProcessMode.Workers, corpus=self.corpus, ) - other_worker_run = process2.worker_runs.create(version=self.worker_run.version, parents=[]) + other_worker_run = process2.worker_runs.create(version=self.worker_run.version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): self.worker_run.process.run() task = self.worker_run.process.tasks.first() diff --git a/arkindex/ponos/admin.py b/arkindex/ponos/admin.py index 84297e00a0c5050f2242128d76c1364633455225..a4b47b391eb3f2dbb0cc7e79036bab522b0584a4 100644 --- a/arkindex/ponos/admin.py +++ b/arkindex/ponos/admin.py @@ -39,6 +39,8 @@ class TaskAdmin(admin.ModelAdmin): "id", "created", "updated", + "started", + "finished", "container", "shm_size", "original_task", @@ -59,7 +61,14 @@ class TaskAdmin(admin.ModelAdmin): ), }, ), - ("Dates", {"fields": ("created", "updated", "expiry")}), + ("Dates", {"fields": ( + "created", + "updated", + "started", + "finished", + "expiry", + "ttl", + )}), ( "Docker", { diff --git a/arkindex/ponos/migrations/0013_task_ttl.py b/arkindex/ponos/migrations/0013_task_ttl.py new file mode 100644 index 0000000000000000000000000000000000000000..1c2c48ab164fb991f319b910352f98e5d0e3a964 --- /dev/null +++ b/arkindex/ponos/migrations/0013_task_ttl.py @@ -0,0 +1,23 @@ +# Generated by Django 5.0.8 on 2024-11-07 13:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("ponos", "0012_task_started_finished"), + ] + + operations = [ + migrations.AddField( + model_name="task", + name="ttl", + field=models.PositiveIntegerField( + default=0, + help_text="Maximum time-to-live in seconds. 0 means infinite.", + verbose_name="TTL", + ), + preserve_default=False, + ), + ] diff --git a/arkindex/ponos/models.py b/arkindex/ponos/models.py index cd923158744152370e513b7e7ce54f4e1e264102..c9d3e109f8818d65ee4ed5e03059cf7f51c7a4b4 100644 --- a/arkindex/ponos/models.py +++ b/arkindex/ponos/models.py @@ -172,6 +172,12 @@ class State(Enum): State where a task that entered the Stopping state has successfully stopped. """ + Cancelled = "cancelled" + """ + State where a task has been stopped because it has exceeded a resource limit (time, CPU, etc.). + This limit may have been enforced by Arkindex or by a third party. + """ + # States where a task is considered final. # Once a task reaches a final state, its state should no longer change. @@ -180,6 +186,7 @@ FINAL_STATES = ( State.Failed, State.Error, State.Stopped, + State.Cancelled, ) # States where a task could start, be running, or still be using some resources, on an agent @@ -197,6 +204,7 @@ ACTIVE_STATES = ( # the process is set as running to prevent retrying and allow stopping. STATES_ORDERING = [ State.Running, + State.Cancelled, State.Failed, State.Error, State.Stopping, @@ -340,6 +348,11 @@ class Task(models.Model): finished = models.DateTimeField(blank=True, null=True) expiry = models.DateTimeField(default=expiry_default) + ttl = models.PositiveIntegerField( + verbose_name="TTL", + help_text="Maximum time-to-live in seconds. 0 means infinite.", + ) + # Remote files required to start the container extra_files = HStoreField(default=dict, blank=True) @@ -402,6 +415,10 @@ class Task(models.Model): """ return self.state in FINAL_STATES + @property + def is_over_ttl(self) -> bool: + return self.ttl > 0 and self.started + timedelta(seconds=self.ttl) < timezone.now() + @property def logs(self) -> TaskLogs: return TaskLogs(self) diff --git a/arkindex/ponos/serializers.py b/arkindex/ponos/serializers.py index f172e58ea35b994bd4bf5e0329c212a93c02e513..ce6bd6f6cb160be1312a851357aa8bc0de0a231f 100644 --- a/arkindex/ponos/serializers.py +++ b/arkindex/ponos/serializers.py @@ -36,7 +36,8 @@ class TaskLightSerializer(serializers.ModelSerializer): Pending ⟶ Running ⟶ Completed └⟶ Error ├⟶ Failed - └⟶ Error + ├⟶ Error + └⟶ Cancelled Stopping ⟶ Stopped └⟶ Error @@ -58,6 +59,7 @@ class TaskLightSerializer(serializers.ModelSerializer): "original_task_id", "started", "finished", + "ttl", ) read_only_fields = ( "id", @@ -70,6 +72,7 @@ class TaskLightSerializer(serializers.ModelSerializer): "original_task_id", "started", "finished", + "ttl", ) def validate_state(self, state): @@ -79,7 +82,12 @@ class TaskLightSerializer(serializers.ModelSerializer): allowed_transitions = { State.Unscheduled: [State.Pending], State.Pending: [State.Running, State.Error], - State.Running: [State.Completed, State.Failed, State.Error], + State.Running: [ + State.Completed, + State.Failed, + State.Error, + State.Cancelled, + ], State.Stopping: [State.Stopped, State.Error], } if user.mode == AgentMode.Slurm: diff --git a/arkindex/ponos/tasks.py b/arkindex/ponos/tasks.py index 370245ca922cd645c5e7bb149f15aeb281658e7e..eb44300d1562aa93411ac22484f5a3e8de835ccb 100644 --- a/arkindex/ponos/tasks.py +++ b/arkindex/ponos/tasks.py @@ -16,7 +16,7 @@ from django.template.loader import render_to_string from django_rq import job import docker -from arkindex.ponos.models import State, Task +from arkindex.ponos.models import FINAL_STATES, State, Task from arkindex.ponos.utils import decompress_zst_archive, extract_tar_archive, upload_artifact from arkindex.process.models import Process, WorkerActivityState from arkindex.project.tools import should_verify_cert @@ -211,9 +211,11 @@ def run_docker_task(client, task, temp_dir): previous_logs = b"" while container.status == "running": logs = container.logs() + if logs != previous_logs: upload_logs(task, logs) previous_logs = logs + # Handle a task that is being stopped during execution task.refresh_from_db() if task.state == State.Stopping: @@ -222,10 +224,28 @@ def run_docker_task(client, task, temp_dir): task.finished = datetime.now(timezone.utc) task.save() break + + # If a task has been running for longer than what its TTL allows, cancel it + if task.is_over_ttl: + container.stop() + task.state = State.Cancelled + task.finished = datetime.now(timezone.utc) + task.save() + break + sleep(TASK_DOCKER_POLLING) container.reload() + # Upload logs one last time so we do not miss any data - upload_logs(task, container.logs()) + logs = container.logs() + if task.state == State.Cancelled: + # For cancelled tasks, log the cancellation explicitly to make it more distinguishable from Error or Failed states + if logs: + # Add a line break if there were some existing logs + logs += b"\n" + logs += f"[ERROR] This task has been cancelled because it has exceeded its TTL of {task.ttl} second{'s' if task.ttl != 1 else ''}.".encode() + + upload_logs(task, logs) # 6. Retrieve the state of the container container.reload() @@ -248,9 +268,9 @@ def run_docker_task(client, task, temp_dir): logger.warning( f"Failed uploading artifact {path} for task {task}: {e}" ) - elif task.state != State.Stopped: - # Stopping a task will usually result in a non-zero exit code, - # but we want to report them as Stopped and not Failed so we skip stopped tasks. + elif task.state not in (State.Stopped, State.Cancelled): + # Canceling or stopping a task will usually result in a non-zero exit code, + # but we want to report them as Stopped or Cancelled and not Failed, so we skip those states. logger.info("Task failed") task.state = State.Failed task.finished = datetime.now(timezone.utc) @@ -275,7 +295,7 @@ def run_task_rq(task: Task): # Automatically update children in case an error occurred if (parent_state := next( - (parent.state for parent in parents if parent.state in (State.Stopped, State.Error, State.Failed)), + (parent.state for parent in parents if parent.state in FINAL_STATES and parent.state != State.Completed), None )) is not None: task.state = parent_state diff --git a/arkindex/ponos/tests/rq/test_download_extra_files.py b/arkindex/ponos/tests/rq/test_download_extra_files.py index 00b5bf71dd21bb134473746b5daa5d00d3f68260..17ff6dda37ab0d739fb7e2d92173400cf23f6fb4 100644 --- a/arkindex/ponos/tests/rq/test_download_extra_files.py +++ b/arkindex/ponos/tests/rq/test_download_extra_files.py @@ -32,6 +32,7 @@ class TestDownloadExtraFiles(FixtureTestCase): depth=0, state=State.Pending, extra_files={"something": "http://teklia.com/some/thing"}, + ttl=0, ) @responses.activate diff --git a/arkindex/ponos/tests/rq/test_run_docker_task.py b/arkindex/ponos/tests/rq/test_run_docker_task.py index e3dd85759ff058920747d5975ef7773b0cf74b82..8ab74494683bcdc8aca985d2ce0c9e4c49fa19c2 100644 --- a/arkindex/ponos/tests/rq/test_run_docker_task.py +++ b/arkindex/ponos/tests/rq/test_run_docker_task.py @@ -1,11 +1,12 @@ import tempfile +from datetime import datetime, timedelta, timezone from pathlib import Path from unittest.mock import MagicMock, PropertyMock, call, patch, seal from django.test import override_settings import docker -from arkindex.ponos.models import Farm, State, Task +from arkindex.ponos.models import State, Task from arkindex.ponos.tasks import run_docker_task from arkindex.process.models import ProcessMode from arkindex.project.tests import FixtureTestCase @@ -16,12 +17,10 @@ class TestRunDockerTask(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - cls.farm = Farm.objects.first() cls.process = cls.corpus.processes.create( creator=cls.user, mode=ProcessMode.Workers, corpus=cls.corpus, - farm=cls.farm, ) cls.task = cls.process.tasks.create( slug="something", @@ -29,6 +28,7 @@ class TestRunDockerTask(FixtureTestCase): run=0, depth=0, state=State.Pending, + ttl=0, ) def test_local_image(self): @@ -97,6 +97,7 @@ class TestRunDockerTask(FixtureTestCase): depth=0, run=0, state=State.Completed, + ttl=0, ) self.task.depth = 1 self.task.save() @@ -294,7 +295,7 @@ class TestRunDockerTask(FixtureTestCase): def add_artifacts(*args): # sleep() is called after one iteration of the running task loop is complete. # We use this mock to not wait during tests, check that the task is properly running, - # and create artifacts that the function should upload once the container exists. + # and create artifacts that the function should not upload once the container exists. self.task.refresh_from_db() self.assertEqual(self.task.state, State.Running) self.assertIsNotNone(self.task.started) @@ -360,7 +361,6 @@ class TestRunDockerTask(FixtureTestCase): self.assertEqual(upload_artifact_mock.call_count, 0) - @override_settings(PONOS_DOCKER_AUTO_REMOVE_CONTAINER=True) @patch("arkindex.ponos.utils.upload_artifact") @patch("arkindex.ponos.tasks.upload_logs") @@ -458,3 +458,98 @@ class TestRunDockerTask(FixtureTestCase): self.assertEqual(sleep_mock.call_args, call(1)) self.assertEqual(upload_artifact_mock.call_count, 0) + + @override_settings(PONOS_DOCKER_AUTO_REMOVE_CONTAINER=False) + @patch("arkindex.ponos.utils.upload_artifact") + @patch("arkindex.ponos.tasks.upload_logs") + @patch("arkindex.ponos.tasks.sleep") + def test_cancelled(self, sleep_mock, upload_logs_mock, upload_artifact_mock): + client_mock = MagicMock() + container = client_mock.containers.run.return_value + + # The first two calls occur while the task is running, the third after it has finished. + container.logs.side_effect = [b"Running", b"Running", b"(whilhelm scream)"] + + # This will be accessed after the container has been stopped + container.attrs = {"State": {"ExitCode": 1}} + + def add_artifacts(*args): + # sleep() is called after one iteration of the running task loop is complete. + # We use this mock to not wait during tests, check that the task is properly running, + # configure it to exceed its TTL, and create artifacts that the function should not + # upload after cancelling the task. + self.task.refresh_from_db() + self.assertEqual(self.task.state, State.Running) + self.assertIsNotNone(self.task.started) + self.assertIsNone(self.task.finished) + + self.task.ttl = 1 + self.task.created = self.task.started = datetime.now(timezone.utc) - timedelta(seconds=2) + self.task.save() + + # This artifact should never be uploaded + (Path(temp_dir) / str(self.task.id) / "something.txt").write_text("blah") + + # Set up all the remaining attributes, then seal the mocks so nonexistent attributes can't be accessed + sleep_mock.side_effect = add_artifacts + seal(sleep_mock) + + client_mock.images.pull.return_value = None + client_mock.containers.get.side_effect = docker.errors.NotFound("Not found.") + container.reload.return_value = None + container.stop.return_value = None + # Sealing is not friendly with PropertyMocks, so we put a placeholder first + container.status = None + seal(client_mock) + # Limit the amount of times container.status can be accessed, so we can't get stuck in an infinite loop + type(container).status = PropertyMock(side_effect=[ + "running", # Loop that checks whether the container is `created` and needs to be awaited + "running", # First iteration of the running task loop + "running", # Second iteration where the task should be cancelled + "exited", # This should not be called but protects us from an infinite loop if the test doesn't go as planned + ]) + + upload_logs_mock.return_value = None + seal(upload_logs_mock) + + # We only mock this so that we can make sure we never upload any artifact + seal(upload_artifact_mock) + + with tempfile.TemporaryDirectory() as temp_dir: + run_docker_task(client_mock, self.task, Path(temp_dir)) + + self.task.refresh_from_db() + self.assertEqual(self.task.state, State.Cancelled) + self.assertIsNotNone(self.task.finished) + + self.assertEqual(client_mock.images.pull.call_count, 1) + self.assertEqual(client_mock.images.pull.call_args, call("image")) + self.assertEqual(client_mock.containers.get.call_count, 1) + self.assertEqual(client_mock.containers.get.call_args, call(f"ponos-{self.task.id}")) + self.assertEqual(client_mock.containers.run.call_count, 1) + self.assertEqual(client_mock.containers.run.call_args, call( + "image", + environment={"PONOS_DATA": "/data"}, + detach=True, + network="host", + volumes={temp_dir: {"bind": "/data", "mode": "rw"}}, + name=f"ponos-{self.task.id}", + )) + + self.assertEqual(container.reload.call_count, 2) + self.assertEqual(container.reload.call_args_list, [call(), call()]) + self.assertEqual(container.logs.call_count, 3) + self.assertEqual(container.logs.call_args_list, [call(), call(), call()]) + self.assertEqual(container.stop.call_count, 1) + self.assertEqual(container.stop.call_args, call()) + + self.assertEqual(upload_logs_mock.call_count, 2) + self.assertEqual(upload_logs_mock.call_args_list, [ + call(self.task, b"Running"), + call(self.task, b"(whilhelm scream)\n[ERROR] This task has been cancelled because it has exceeded its TTL of 1 second."), + ]) + + self.assertEqual(sleep_mock.call_count, 1) + self.assertEqual(sleep_mock.call_args, call(1)) + + self.assertEqual(upload_artifact_mock.call_count, 0) diff --git a/arkindex/ponos/tests/rq/test_run_task_rq.py b/arkindex/ponos/tests/rq/test_run_task_rq.py index 3ad16a949a2f2a95f4a6232d242399850719ce96..d94eec25593ce6b0d4c24cb7f9741f13e82b96a7 100644 --- a/arkindex/ponos/tests/rq/test_run_task_rq.py +++ b/arkindex/ponos/tests/rq/test_run_task_rq.py @@ -25,6 +25,7 @@ class TestRunTaskRQ(FixtureTestCase): run=0, depth=0, state=State.Pending, + ttl=0, ) def test_no_image(self): @@ -55,9 +56,16 @@ class TestRunTaskRQ(FixtureTestCase): run=0, depth=0, state=state, + ttl=0, ) - parent = self.task.parents.create(slug="parent4", process=self.process, run=0, depth=0) - for state in {State.Stopped, State.Error, State.Failed}: + parent = self.task.parents.create( + slug="parent4", + process=self.process, + run=0, + depth=0, + ttl=0, + ) + for state in {State.Stopped, State.Error, State.Failed, State.Cancelled}: self.task.state = State.Pending self.task.save() with self.subTest(state=state): diff --git a/arkindex/ponos/tests/rq/test_trigger.py b/arkindex/ponos/tests/rq/test_trigger.py index e0b8caf97b283f929c53664f124314fdf3552808..406dd77a8987f02ae48552a5d71bd11afbf4360e 100644 --- a/arkindex/ponos/tests/rq/test_trigger.py +++ b/arkindex/ponos/tests/rq/test_trigger.py @@ -7,7 +7,7 @@ from arkindex.process.models import ProcessMode, WorkerVersion from arkindex.project.tests import FixtureTestCase -class TestModels(FixtureTestCase): +class TestTrigger(FixtureTestCase): @classmethod def setUpTestData(cls): @@ -21,8 +21,8 @@ class TestModels(FixtureTestCase): ) cls.worker_version1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_version2 = WorkerVersion.objects.get(worker__slug="dla") - cls.run1 = cls.process.worker_runs.create(version=cls.worker_version1, parents=[]) - cls.run2 = cls.process.worker_runs.create(version=cls.worker_version2, parents=[cls.run1.id]) + cls.run1 = cls.process.worker_runs.create(version=cls.worker_version1, ttl=0) + cls.run2 = cls.process.worker_runs.create(version=cls.worker_version2, parents=[cls.run1.id], ttl=0) @override_settings(PONOS_RQ_EXECUTION=True) @patch("arkindex.ponos.tasks.run_task_rq.delay") diff --git a/arkindex/ponos/tests/tasks/test_partial_update.py b/arkindex/ponos/tests/tasks/test_partial_update.py index 0306ea5ad6fcafbb8b335190a3792ef4ff3ef576..e43cc217ae4d2690c4862846aa66ae0ed79842ed 100644 --- a/arkindex/ponos/tests/tasks/test_partial_update.py +++ b/arkindex/ponos/tests/tasks/test_partial_update.py @@ -49,6 +49,7 @@ class TestTaskPartialUpdate(FixtureAPITestCase): (State.Running, State.Completed), (State.Running, State.Failed), (State.Running, State.Error), + (State.Running, State.Cancelled), (State.Stopping, State.Stopped), (State.Stopping, State.Error), ) diff --git a/arkindex/ponos/tests/tasks/test_restart.py b/arkindex/ponos/tests/tasks/test_restart.py index f89fec15952f89e17a12184126d0c3c37e164569..67623102526118c531f9d4ff7dbf0fbf37d4573f 100644 --- a/arkindex/ponos/tests/tasks/test_restart.py +++ b/arkindex/ponos/tests/tasks/test_restart.py @@ -13,7 +13,7 @@ from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import Role -@override_settings(PONOS_LOG_TAIL=42) +@override_settings(PONOS_LOG_TAIL=42, PONOS_MAXIMUM_TASK_TTL=3600) class TestTaskRestart(FixtureAPITestCase): @classmethod @@ -145,14 +145,14 @@ class TestTaskRestart(FixtureAPITestCase): } s3_mock.meta.client.generate_presigned_url.return_value = "http://somewhere" - task4 = self.process.tasks.create(run=self.task1.run, depth=1) + task4 = self.process.tasks.create(run=self.task1.run, depth=1, ttl=0) task4.parents.add(self.task2) task4.children.add(self.task3) task_2_slug = self.task2.slug with patch("django.utils.timezone.now") as mock_now: mock_now.return_value = datetime.now(timezone.utc) + timedelta(minutes=1) - old_task_2 = self.process.tasks.create(run=self.task1.run, depth=1, slug=f"{task_2_slug}_old1") + old_task_2 = self.process.tasks.create(run=self.task1.run, depth=1, slug=f"{task_2_slug}_old1", ttl=0) old_task_2.state = State.Error.value old_task_2.original_task_id = self.task1.id old_task_2.save() @@ -206,6 +206,7 @@ class TestTaskRestart(FixtureAPITestCase): "requires_gpu": True, "started": None, "finished": None, + "ttl": 3600, }, ) self.assertQuerySetEqual(self.task2.children.all(), Task.objects.none()) @@ -242,7 +243,7 @@ class TestTaskRestart(FixtureAPITestCase): } s3_mock.meta.client.generate_presigned_url.return_value = "http://somewhere" - task4 = self.process.tasks.create(run=self.task1.run, depth=1) + task4 = self.process.tasks.create(run=self.task1.run, depth=1, ttl=0) task4.parents.add(self.task2) task4.children.add(self.task3) task_2_slug = self.task2.slug @@ -278,6 +279,7 @@ class TestTaskRestart(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) self.assertQuerySetEqual(self.task2.children.all(), Task.objects.none()) @@ -315,6 +317,7 @@ class TestTaskRestart(FixtureAPITestCase): depth=0, slug=f"{task_1_slug}_old1", expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), + ttl=0, ) self.task1.state = State.Error.value @@ -350,6 +353,7 @@ class TestTaskRestart(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) self.assertQuerySetEqual(self.task1.children.all(), Task.objects.none()) @@ -410,6 +414,7 @@ class TestTaskRestart(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) diff --git a/arkindex/ponos/tests/tasks/test_retrieve.py b/arkindex/ponos/tests/tasks/test_retrieve.py index d5f6040420aeaf48ef2bf5ac2bfe10d3bd0c978e..5fe9581a411349d3f0a58d075a9416235a9058a1 100644 --- a/arkindex/ponos/tests/tasks/test_retrieve.py +++ b/arkindex/ponos/tests/tasks/test_retrieve.py @@ -102,6 +102,7 @@ class TestTaskRetrieve(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) @@ -189,6 +190,7 @@ class TestTaskRetrieve(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) @@ -234,6 +236,7 @@ class TestTaskRetrieve(FixtureAPITestCase): "requires_gpu": False, "started": None, "finished": None, + "ttl": 3600, }, ) diff --git a/arkindex/ponos/tests/tasks/test_update.py b/arkindex/ponos/tests/tasks/test_update.py index dd71c715eec857056f247860040a079eecf6d995..97029d9324ec065e5005327b9975a48cf7aabc22 100644 --- a/arkindex/ponos/tests/tasks/test_update.py +++ b/arkindex/ponos/tests/tasks/test_update.py @@ -59,6 +59,7 @@ class TestTaskUpdate(FixtureAPITestCase): (State.Running, State.Completed), (State.Running, State.Failed), (State.Running, State.Error), + (State.Running, State.Cancelled), (State.Stopping, State.Stopped), (State.Stopping, State.Error), ) @@ -130,8 +131,8 @@ class TestTaskUpdate(FixtureAPITestCase): corpus=self.corpus, activity_state=ActivityState.Ready ) - init_run = test_process.worker_runs.create(version=WorkerVersion.objects.get(worker__slug="initialisation"), parents=[]) - test_run = test_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) + init_run = test_process.worker_runs.create(version=WorkerVersion.objects.get(worker__slug="initialisation"), ttl=0) + test_run = test_process.worker_runs.create(version=self.recognizer, parents=[init_run.id], ttl=0) test_process.run() @@ -226,8 +227,8 @@ class TestTaskUpdate(FixtureAPITestCase): activity_state=ActivityState.Ready ) init_version = WorkerVersion.objects.get(worker__slug="initialisation") - init_run = test_process.worker_runs.create(version=init_version, parents=[]) - test_run = test_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) + init_run = test_process.worker_runs.create(version=init_version, ttl=0) + test_run = test_process.worker_runs.create(version=self.recognizer, parents=[init_run.id], ttl=0) test_process.run() @@ -299,7 +300,7 @@ class TestTaskUpdate(FixtureAPITestCase): chunks=2, activity_state=ActivityState.Ready ) - test_run = test_process.worker_runs.create(version=self.recognizer) + test_run = test_process.worker_runs.create(version=self.recognizer, ttl=0) test_process.run() @@ -399,9 +400,21 @@ class TestTaskUpdate(FixtureAPITestCase): corpus=self.corpus, activity_state=ActivityState.Ready ) - test_run_1 = test_process.worker_runs.create(version=self.recognizer) - test_run = test_process.worker_runs.create(version=self.recognizer, model_version_id=test_model_version.id) - test_run_2 = test_process.worker_runs.create(version=self.recognizer, model_version_id=test_model_version.id, configuration_id=test_configuration.id) + test_run_1 = test_process.worker_runs.create( + version=self.recognizer, + ttl=0, + ) + test_run = test_process.worker_runs.create( + version=self.recognizer, + model_version_id=test_model_version.id, + ttl=0, + ) + test_run_2 = test_process.worker_runs.create( + version=self.recognizer, + model_version_id=test_model_version.id, + configuration_id=test_configuration.id, + ttl=0, + ) test_process.run() diff --git a/arkindex/ponos/tests/test_models.py b/arkindex/ponos/tests/test_models.py index 443944d4badb9235b8418c173358890c186f25fe..a94dc7373408de5e8e8fe786756c352c3bc9dc3c 100644 --- a/arkindex/ponos/tests/test_models.py +++ b/arkindex/ponos/tests/test_models.py @@ -65,13 +65,15 @@ class TestModels(FixtureAPITestCase): command="do something --like this", slug="test_task_1", run=1, - depth=0 + depth=0, + ttl=0, ) task2 = self.process.tasks.create( command="do something else --like that", slug="test_task_2", run=1, - depth=0 + depth=0, + ttl=0, ) self.assertEqual(task1.expiry, expected_expiry) diff --git a/arkindex/process/admin.py b/arkindex/process/admin.py index cac9fdfef231e9927b11fbf37204f337d639dae8..d43d5ae745b56e88c9c67af0c944e56eebb25179 100644 --- a/arkindex/process/admin.py +++ b/arkindex/process/admin.py @@ -119,20 +119,6 @@ class WorkerVersionAdmin(admin.ModelAdmin): ) readonly_fields = ("id", ) - def save_model(self, request, obj, form, change): - # When a WorkerVersion is created with a feature, or an existing one has its feature updated, clear the cached versions providing features - if form["feature"]._has_changed(): - # `cache_clear` is a function defined by the `functools.lru_cache` decorator - # on the function itself, not on its return value - WorkerVersion.objects.get_by_feature.cache_clear() - super().save_model(request, obj, form, change) - - def delete_model(self, request, obj): - # When this WorkerVersion provides an Arkindex feature, clear the cached versions providing features - if obj.feature is not None: - WorkerVersion.objects.get_by_feature.cache_clear() - super().delete_model(request, obj) - class WorkerConfigurationAdmin(admin.ModelAdmin): list_display = ("id", "name", "worker") diff --git a/arkindex/process/api.py b/arkindex/process/api.py index 156251d1a8362e5eaeac5c399e67e1281ed90c93..8348ff930641d50335a665336eb1f0b95502208f 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -74,6 +74,7 @@ from arkindex.process.serializers.imports import ( ApplyProcessTemplateSerializer, CorpusProcessSerializer, CreateProcessTemplateSerializer, + ExportProcessSerializer, FilesProcessSerializer, ProcessDetailsSerializer, ProcessElementLightSerializer, @@ -261,14 +262,17 @@ class ProcessList(ProcessACLMixin, ListAPIView): except ValueError: raise ValidationError({"state": [f"State '{state_value}' does not exist"]}) + last_run_tasks_filter = ( + Q(tasks__run=F("last_run")) + & ~Q(tasks__id__in=Task.objects.filter(process_id=OuterRef("id")).values("original_task_id")) + ) + # Filter out processes which have a task with an incompatible state on their last run excluding_states = STATES_ORDERING[:STATES_ORDERING.index(state)] - excluded_processes = qs.filter( - Q(tasks__run=F("last_run")), - Q(tasks__state__in=excluding_states) - ) + excluded_processes = qs.filter(last_run_tasks_filter & Q(tasks__state__in=excluding_states)) + # Keep non excluded processes matching the state on their last run tasks - state_query = Q(tasks__run=F("last_run")) & Q(tasks__state=state) + state_query = last_run_tasks_filter & Q(tasks__state=state) if state == State.Unscheduled: # Handle the absence of tasks as unscheduled state_query |= Q(tasks__isnull=True) @@ -1487,6 +1491,7 @@ class WorkerRunDetails(ProcessACLMixin, RetrieveUpdateDestroyAPIView): ) queryset = WorkerRun.objects \ + .using("default") \ .filter(run_filters) \ .select_related("process__corpus") \ .annotate(process_has_tasks=Exists(Task.objects.filter(process=OuterRef("process_id")))) @@ -2370,3 +2375,29 @@ class ProcessArtifactDownload(APIView): def get(self, request, *args, **kwargs): artifact = self.get_object(*args, **kwargs) return redirect(artifact.s3_url) + + +@extend_schema_view( + post=extend_schema( + operation_id="CreateExportProcess", + tags=["process"] + ), +) +class ExportProcess(CreateAPIView): + permission_classes = (IsVerified, ) + serializer_class = ExportProcessSerializer + + @cached_property + def corpus(self): + corpus = get_object_or_404(Corpus.objects.readable(self.request.user), id=self.kwargs["corpus_id"]) + if not corpus.is_processable(self.request.user): + raise PermissionDenied(detail="You do not have an admin access to this corpus.") + return corpus + + def get_serializer_context(self): + context = super().get_serializer_context() + # Ignore this step when generating the schema with OpenAPI + if not self.kwargs: + return context + context["corpus"] = self.corpus + return context diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py index 474342ba264e1b8f833d0eec1a587746e56b4f8f..5a1c347a32e95b8bc3fc0234e71338388c860c27 100644 --- a/arkindex/process/builder.py +++ b/arkindex/process/builder.py @@ -102,6 +102,7 @@ class ProcessBuilder: shm_size=shm_size, extra_files=extra_files, worker_run=worker_run, + ttl=worker_run.ttl, ) ) @@ -217,7 +218,10 @@ class ProcessBuilder: from arkindex.process.models import ArkindexFeature, WorkerVersion import_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) - worker_run, _ = self.process.worker_runs.get_or_create(version=import_version) + worker_run, _ = self.process.worker_runs.get_or_create( + version=import_version, + defaults={"ttl": self.process.corpus.applied_maximum_task_ttl}, + ) self._build_task( slug="import_files", @@ -246,6 +250,7 @@ class ProcessBuilder: version=ingest_version, model_version=None, configuration=worker_configuration, + defaults={"ttl": self.process.corpus.applied_maximum_task_ttl}, ) env = { @@ -290,7 +295,8 @@ class ProcessBuilder: # If there is no elements initialisation worker run in the process, create one else: initialisation_worker_run = self.process.worker_runs.create( - version=init_elements_version + version=init_elements_version, + ttl=self.process.corpus.applied_maximum_task_ttl, ) # Link all parentless worker runs to the initialisation worker run no_parents = [run for run in worker_runs if not len(run.parents)] @@ -355,9 +361,18 @@ class ProcessBuilder: self.tasks.append(task) self.tasks_parents[task.slug].extend(parent_slugs) + @prefetch_worker_runs def build_export(self): - # The export mode works like the workers mode for the process builder - self.build_workers() + # We do not need elements initialisation for export processes + runs = list(self.process.worker_runs.all()) + for run in runs: + self._build_task( + slug=run.version.worker.slug, + image=run.version.docker_image_iid, + command=run.version.docker_command, + worker_run=run, + ) + self._create_worker_versions_cache([(run.version_id, None, run.configuration_id)]) def build(self, *args, **kwargs): if not (_build := getattr(self, f"build_{self.mode_key}", None)): diff --git a/arkindex/process/management/commands/fake_worker_run.py b/arkindex/process/management/commands/fake_worker_run.py index 30adeb081cd34066c60942b9443ffdcde33b7355..e1e9e1d72fc2ef648f2e4d4c3967507e6401b8d6 100644 --- a/arkindex/process/management/commands/fake_worker_run.py +++ b/arkindex/process/management/commands/fake_worker_run.py @@ -35,7 +35,7 @@ class Command(BaseCommand): worker_run, created = process.worker_runs.get_or_create( version=worker_version, - defaults={"parents": []}, + defaults={"ttl": 0}, ) if created: diff --git a/arkindex/process/managers.py b/arkindex/process/managers.py index 7263aed19fec727a5b57b319f658158132f4988e..8bd8ae9e69e6e841c02dceb66de37b43596df61f 100644 --- a/arkindex/process/managers.py +++ b/arkindex/process/managers.py @@ -1,6 +1,6 @@ import logging import operator -from functools import lru_cache, reduce +from functools import reduce from django.db import connections from django.db.models import Exists, Manager, ManyToOneRel, OuterRef, Q @@ -162,7 +162,6 @@ class WorkerResultSourceQuerySet(QuerySet): class WorkerVersionManager(Manager): - @lru_cache def get_by_feature(self, feature): return self.get_queryset().get_by_feature(feature) diff --git a/arkindex/process/migrations/0046_workerrun_ttl.py b/arkindex/process/migrations/0046_workerrun_ttl.py new file mode 100644 index 0000000000000000000000000000000000000000..29c84a3534d7a6c22b130c7f291562d8d7bafda5 --- /dev/null +++ b/arkindex/process/migrations/0046_workerrun_ttl.py @@ -0,0 +1,23 @@ +# Generated by Django 5.0.8 on 2024-11-07 13:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("process", "0045_remove_workerversionstate_processing"), + ] + + operations = [ + migrations.AddField( + model_name="workerrun", + name="ttl", + field=models.PositiveIntegerField( + default=0, + help_text="Maximum time-to-live for tasks created from this WorkerRun, in seconds. 0 means infinite.", + verbose_name="TTL", + ), + preserve_default=False, + ), + ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 4dec68a471c76f76d147fec5b032402f22b19da6..b618b6c530dc3ee5f8435a8a31dcc6fe9b9930f9 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -6,7 +6,7 @@ from django.contrib.contenttypes.fields import GenericRelation from django.core.exceptions import ValidationError from django.core.validators import MinLengthValidator, MinValueValidator from django.db import models, transaction -from django.db.models import F, Q +from django.db.models import Exists, F, OuterRef, Q from django.urls import reverse from django.utils import timezone from django.utils.functional import cached_property @@ -67,6 +67,11 @@ class ProcessMode(Enum): Export = "export" +class ExportFormat(Enum): + PDF = "pdf" + PageXML = "page_xml" + + class Process(IndexableModel): name = models.CharField(null=True, blank=True, max_length=250) @@ -271,10 +276,24 @@ class Process(IndexableModel): # This prevents performing another SQL request when tasks have already been prefetched. # See https://stackoverflow.com/a/19651840/5990435 if self.has_prefetched_tasks: - task_states = set(t.state for t in self.tasks.all() if t.run == run) + restarted_tasks = set(t.original_task_id for t in self.tasks.all()) + task_states = set( + t.state + for t in self.tasks.all() + if t.run == run + and t.id not in restarted_tasks + ) else: task_states = set( - self.tasks.filter(run=run).values_list("state", flat=True) + self.tasks + .filter(run=run) + # Skip tasks that have been restarted + .exclude(Exists(Task.objects.filter( + process_id=OuterRef("process_id"), + run=OuterRef("run"), + original_task_id=OuterRef("pk"), + ))) + .values_list("state", flat=True) ) # This run has no tasks @@ -319,6 +338,19 @@ class Process(IndexableModel): for run in self.worker_runs.all(): if run.version.is_init_elements(): continue + + if new_process.mode == ProcessMode.Template or new_process.corpus.applied_maximum_task_ttl == 0: + # When the destination process is a template, we do not apply any limits and copy the original TTL. + # The limits will be applied only when applying a template. + # With other modes, when the corpus has no limits, we also just use the original TTL. + ttl = run.ttl + elif run.ttl == 0: + # The original TTL was infinite and there is a limit, so we use the corpus' limit + ttl = new_process.corpus.applied_maximum_task_ttl + else: + # Apply the limit normally when no infinity is involved + ttl = min(run.ttl, new_process.corpus.applied_maximum_task_ttl) + # Create a new WorkerRun with same version, configuration and parents. new_run = WorkerRun( process=new_process, @@ -333,7 +365,8 @@ class Process(IndexableModel): if run.version.gpu_usage == FeatureUsage.Supported else run.version.gpu_usage == FeatureUsage.Required ), - summary=run.summary + summary=run.summary, + ttl=ttl, ) # Save the correspondence between this process' worker_run and the new one new_runs[run.id] = new_run @@ -351,7 +384,7 @@ class Process(IndexableModel): """ Return a queryset of elements involved in this process """ - if self.mode != ProcessMode.Workers: + if self.mode not in [ProcessMode.Workers, ProcessMode.Export]: return Element.objects.none() elements = None @@ -666,6 +699,8 @@ class ArkindexFeature(Enum): InitElements = "init_elements" FileImport = "file_import" S3Ingest = "s3_ingest" + # When adding a new export worker, don't forget to also update the ExportFormat enum and the + # FEATURE_FORMAT_MAP dictionary which maps export formats to arkindex features ExportPDF = "pdf_export" ExportPageXML = "pagexml_export" @@ -802,7 +837,7 @@ class WorkerVersion(models.Model): def is_init_elements(self): return ( - self.id == WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements).id + self.feature == ArkindexFeature.InitElements # Make detection of the init task in case its version changes or self.worker.slug.startswith("init-elements") or self.worker.slug.startswith("init_elements") @@ -864,6 +899,10 @@ class WorkerRun(models.Model): updated = models.DateTimeField(auto_now=True) has_results = models.BooleanField(default=False) use_gpu = models.BooleanField(default=False) + ttl = models.PositiveIntegerField( + verbose_name="TTL", + help_text="Maximum time-to-live for tasks created from this WorkerRun, in seconds. 0 means infinite.", + ) objects = WorkerRunManager() @@ -926,7 +965,8 @@ class WorkerRun(models.Model): .select_related("version__worker") # The IDs are required so that Django understands that everything is prefetched. # WorkerVersion.slug will just require the version ID and worker slug - .only("id", "version_id", "version__worker_id", "version__worker__slug") + # The init_elements checks requires the version's feature field + .only("id", "version_id", "version__worker_id", "version__worker__slug", "version__feature") ) parents = [ worker_run.task_slug + slug_suffix @@ -977,7 +1017,8 @@ class WorkerRun(models.Model): process=process, worker_run=self, extra_files=extra_files, - requires_gpu=requires_gpu + requires_gpu=requires_gpu, + ttl=self.ttl, ) return task, parents @@ -1176,3 +1217,10 @@ class CorpusWorkerVersion(models.Model): name="corpus_workerversion_version_not_null_configuration_not_null", ), ] + + +# A mapping of export formats with their corresponding arkindex feature +FEATURE_FORMAT_MAP = { + ExportFormat.PDF: ArkindexFeature.ExportPDF, + ExportFormat.PageXML: ArkindexFeature.ExportPageXML +} diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py index aff2ea91ae0b9710945fa4a2d6f477d442ff9a9c..71bdbf729fdeb74b28596134658d7a946fcdce3c 100644 --- a/arkindex/process/serializers/imports.py +++ b/arkindex/process/serializers/imports.py @@ -2,26 +2,34 @@ from collections import defaultdict from textwrap import dedent from django.conf import settings +from django.core.exceptions import ValidationError as DjangoValidationError +from django.db import transaction +from django.utils.functional import cached_property from django.utils.module_loading import import_string from rest_framework import serializers from rest_framework.exceptions import PermissionDenied, ValidationError -from arkindex.documents.models import Corpus, Element, ElementType, MLClass +from arkindex.documents.models import Corpus, CorpusExport, CorpusExportState, Element, ElementType, MLClass from arkindex.ponos.models import Farm, State from arkindex.ponos.serializers import TaskLightSerializer from arkindex.process.models import ( + FEATURE_FORMAT_MAP, ActivityState, DataFile, + ExportFormat, FeatureUsage, Process, + ProcessElement, ProcessMode, WorkerRun, + WorkerVersion, WorkerVersionState, ) +from arkindex.process.serializers.workers import USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE, UserConfigurationFieldType from arkindex.process.utils import get_default_farm from arkindex.project.mixins import ProcessACLMixin from arkindex.project.serializer_fields import EnumField, LinearRingField -from arkindex.project.validators import MaxValueValidator +from arkindex.project.validators import MaxValueValidator, XorValidator from arkindex.training.models import ModelVersionState from arkindex.users.models import Role from arkindex.users.utils import get_max_level @@ -109,7 +117,7 @@ class ProcessSerializer(ProcessLightSerializer): ) template_id = serializers.PrimaryKeyRelatedField( - queryset=Process.objects.none(), + read_only=True, allow_null=True, source="template", style={"base_template": "input.html"} @@ -177,6 +185,9 @@ class ProcessDetailsSerializer(ProcessSerializer): data = super().validate(data) edited_fields = set(data.keys()) + if not self.instance: + return data + # Fields that can always be edited on any process of any state editable_fields = {"name", "state"} @@ -352,6 +363,262 @@ class FilesProcessSerializer(serializers.ModelSerializer): return process +class ExportProcessSerializer(ProcessDetailsSerializer): + export_id = serializers.PrimaryKeyRelatedField( + write_only=True, + queryset=CorpusExport.objects.none(), + allow_null=True, + source="export", + help_text=dedent(f""" + Optional ID of an existing SQLite corpus export to use. That export must not be in the `failed` state. + + If set to `None`, a new SQLite corpus export will be created, provided that none is already running or was completed less than {settings.EXPORT_TTL_SECONDS} seconds ago. + """) + ) + format = EnumField(ExportFormat, write_only=True) + selection = serializers.BooleanField( + write_only=True, + default=None, + help_text="Set to `True` to export only the elements in the selection (that belong to the target corpus)." + ) + configuration = serializers.DictField( + write_only=True, + default={}, + help_text="The configuration parameters must match the user configuration parameters defined for the worker version used for the selected export mode." + ) + # These fields need to be redefined here because as they are explicitly defined in the serializers this one is based on, putting them in `read_only_fields` + # does not work https://github.com/encode/django-rest-framework/blob/9016efe3fc412488df92912c619f8f24fed2937c/rest_framework/serializers.py#L1092 + element_type = serializers.SlugRelatedField( + slug_field="slug", + allow_null=True, + style={"base_template": "input.html"}, + read_only=True, + ) + element_name_contains = serializers.CharField( + source="name_contains", + allow_null=True, + allow_blank=True, + max_length=250, + read_only=True, + ) + template_id = serializers.PrimaryKeyRelatedField( + allow_null=True, + source="template", + style={"base_template": "input.html"}, + read_only=True, + ) + ml_class_id = serializers.PrimaryKeyRelatedField( + default=None, + allow_null=True, + source="ml_class", + style={"base_template": "input.html"}, + read_only=True, + ) + state = EnumField(State, required=False, read_only=True) + farm_id = serializers.PrimaryKeyRelatedField( + queryset=Farm.objects.all(), + source="farm", + write_only=True, + allow_null=True, + default=None, + ) + + def validate_farm_id(self, farm): + """ + Using the `default=` on the farm field would allow the default farm + to be used without having access rights to it, so we do the ACL checks + here during validation. We thus do not filter the field's queryset, + as we would be duplicating the ACL checks otherwise. + """ + if farm is None: + farm = get_default_farm() + + if farm and not farm.is_available(self.context["request"].user): + raise ValidationError(["You do not have access to this farm."]) + + return farm + + class Meta(ProcessDetailsSerializer.Meta): + fields = ProcessDetailsSerializer.Meta.fields + ( + "export_id", + "format", + "selection", + "configuration", + "farm_id", + ) + read_only_fields = ProcessDetailsSerializer.Meta.read_only_fields + ( + "element_name_contains", + "element_type", + "template_id", + "ml_class", + "ml_class_id", + "name", + "state", + "load_children" + ) + validators = [XorValidator("element", "selection", required=False)] + + def get_arkindex_feature(self, format): + try: + return FEATURE_FORMAT_MAP[format] + except KeyError: + raise ValidationError({"format": [f"{format} is not a supported export format."]}) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + user = self.context["request"].user + self.fields["element_id"].queryset = Element.objects.filter(corpus__in=Corpus.objects.readable(user)) + self.fields["export_id"].queryset = CorpusExport.objects.filter(corpus__in=Corpus.objects.readable(user)) + + @cached_property + def feature_worker_version(self): + feature = self.get_arkindex_feature(self.format) + return WorkerVersion.objects.get_by_feature(feature) + + def validate_export_id(self, export): + corpus = self.context["corpus"] + user = self.context["request"].user + if export is None: + # When no export_id is set, a new export must be created + CorpusExport.objects.validate_creatable(corpus, "default") + return CorpusExport( + corpus=corpus, + source="default", + user=user, + state=CorpusExportState.Created + ) + + if export is not None and export.corpus_id != corpus.id: + raise ValidationError(f"Export {export.id} does not belong to corpus {corpus.id}.") + if export.state == CorpusExportState.Failed: + raise ValidationError(f"Export {export.id} has failed.") + return export + + def validate_element_id(self, element): + if not element: + return + corpus = self.context["corpus"] + if element.corpus_id != corpus.id: + raise ValidationError(f"Element {element.id} does not belong to corpus {corpus.id}.") + return element + + def validate_selection(self, selection): + corpus = self.context["corpus"] + user = self.context["request"].user + if selection and not user.selected_elements.filter(corpus=corpus).exists(): + raise ValidationError(f"The user selection does not contain any elements from corpus {corpus.id}.") + return selection + + def validate_wv_configuration(self, configuration, format): + errors = defaultdict(list) + missing_required = [] + self.format = format + worker_version = self.feature_worker_version + user_configuration = worker_version.configuration.get("user_configuration", {}) + + for key in worker_version.required_user_configuration_fields: + if configuration.get(key) is None: + errors[key].append("This parameter is required.") + missing_required.append(key) + + # Check for invalid parameters being sent + for key, value in configuration.items(): + if key in missing_required: + continue + if not user_configuration.get(key): + errors[key].append("This is a not a valid parameter for this worker version.") + continue + + user_config_field = user_configuration[key] + + param_type = UserConfigurationFieldType(user_config_field.get("type")) + choices = user_config_field.get("choices", None) + + if choices is not None and value not in choices: + errors[key].append(f"{value} is not an available choice.") + + if param_type != UserConfigurationFieldType.Enum: + data_type, data_class = USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE[param_type] + try: + if not isinstance(value, data_class): + raise ValidationError + data_type.to_internal_value(value) + if param_type == UserConfigurationFieldType.List: + subtype = UserConfigurationFieldType(user_config_field.get("subtype")) + data_type, data_subclass = USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE[subtype] + if any(not isinstance(item, data_subclass) for item in value): + errors[key].append(f"All items must be of type {data_subclass.__name__}.") + except (ValidationError, DjangoValidationError): + errors[key].append(f"This is not a valid value for a field of type {param_type.value}.") + + if errors: + raise ValidationError({"configuration": errors}) + + return { + key: configuration.get(key, user_configuration[key].get("default")) + for key in user_configuration + } + + + def validate(self, data): + super().validate(data) + export_format = data.get("format") + configuration = data.get("configuration") + + if configuration: + data["configuration"] = self.validate_wv_configuration(configuration, export_format) + + return data + + @transaction.atomic + def create(self, validated_data): + user = self.context["request"].user + corpus = self.context["corpus"] + self.format = validated_data["format"] + export = validated_data["export"] + worker_version = self.feature_worker_version + + # Create the process + export_process = Process.objects.create( + mode=ProcessMode.Export, + corpus=corpus, + creator=user, + element=validated_data.get("element"), + farm=validated_data.get("farm"), + ) + # If using the user selection, create process elements + if validated_data.get("selection"): + ProcessElement.objects.bulk_create( + ProcessElement( + process=export_process, + element=e + ) for e in user.selected_elements.filter(corpus=corpus) + ) + # Create and start a new corpus export if required + # The object id is calculated before saving, so we need to use _state.adding to know if the object + # actually already exists in DB or not (_state.adding is True if the object hasn't been saved) + # https://docs.djangoproject.com/en/5.1/ref/models/instances/#state + if export._state.adding: + export.save() + export.start() + # Get or create the worker configuration + worker_configuration, _ = worker_version.worker.configurations.get_or_create( + configuration={**validated_data["configuration"], "export_id": str(export.id)}, + defaults={ + "name": f"Configuration for process {str(export_process.id)}", + }, + ) + # Create a worker run + export_process.worker_runs.create( + version=worker_version, + configuration=worker_configuration, + ttl=corpus.applied_maximum_task_ttl, + ) + # Start the export process + export_process.run() + + return export_process + class StartProcessSerializer(serializers.Serializer): chunks = serializers.IntegerField( min_value=1, diff --git a/arkindex/process/serializers/worker_runs.py b/arkindex/process/serializers/worker_runs.py index e77cfbcff5a0ec75c76c28ead94ab1015d33d1cf..eb6ab16138af0062854135348292a6422ea330b9 100644 --- a/arkindex/process/serializers/worker_runs.py +++ b/arkindex/process/serializers/worker_runs.py @@ -1,6 +1,7 @@ from collections import defaultdict from textwrap import dedent +from django.core.validators import MaxValueValidator, MinValueValidator from rest_framework import serializers from rest_framework.exceptions import ValidationError @@ -23,6 +24,21 @@ from arkindex.training.serializers import ModelVersionLightSerializer # (process, worker version, model version…) with extra GET requests, we # do serialize all the related information on WorkerRun serializers. +def _ttl_from_corpus(serializer_field) -> int: + if isinstance(serializer_field.parent.instance, WorkerRun): + process = serializer_field.parent.instance.process + else: + process = serializer_field.context["process"] + + # This function may be called on a local process, which does not have a corpus, even if the API blocks them later on + if process.mode == ProcessMode.Local: + return 0 + + return process.corpus.applied_maximum_task_ttl + + +_ttl_from_corpus.requires_context = True + class WorkerRunSerializer(serializers.ModelSerializer): @@ -80,6 +96,15 @@ class WorkerRunSerializer(serializers.ModelSerializer): "Only a configuration of the WorkerVersion's worker may be set.", ) + ttl = serializers.IntegerField( + default=_ttl_from_corpus, + help_text=dedent(""" + Maximum time-to-live for tasks created from this WorkerRun, in seconds. `0` means infinite. + + Defaults to, and cannot exceed, the `maximum_task_ttl` on the corpus of the process. + """), + ) + process = ProcessLightSerializer(read_only=True) class Meta: @@ -96,6 +121,7 @@ class WorkerRunSerializer(serializers.ModelSerializer): "model_version", "summary", "use_gpu", + "ttl", ) read_only_fields = ( "id", @@ -113,27 +139,50 @@ class WorkerRunSerializer(serializers.ModelSerializer): user = self.context["request"].user self.fields["model_version_id"].queryset = ModelVersion.objects.executable(user).select_related("model") + @property + def _process(self): + if self.instance: + return self.instance.process + return self.context["process"] + + def validate_ttl(self, value) -> int: + if self._process.mode == ProcessMode.Local: + # Don't validate anything, the endpoint will not work on local processes anyway + return value + + corpus_ttl = self._process.corpus.applied_maximum_task_ttl + if corpus_ttl == 0: + # Allow infinity, and limit to the maximum value of an integer field + min_ttl, max_ttl = 0, 2147483647 + else: + # Restrict the maximum TTL further using the limit + min_ttl, max_ttl = 1, corpus_ttl + + MinValueValidator(min_ttl)(value) + MaxValueValidator(max_ttl)(value) + + return value + def validate(self, data): data = super().validate(data) errors = defaultdict(list) worker_version, model_version, configuration = None, None, None if self.instance: - process = self.instance.process worker_version = self.instance.version model_version = self.instance.model_version configuration = self.instance.configuration else: - data["process"] = process = self.context["process"] + data["process"] = self._process worker_version = data.get("version", worker_version) model_version = data.get("model_version", model_version) configuration = data.get("configuration", configuration) parents = data.get("parents", []) - if process.mode not in (ProcessMode.Workers, ProcessMode.Dataset): + if self._process.mode not in (ProcessMode.Workers, ProcessMode.Dataset): errors["process_id"].append("WorkerRuns can only be created or updated on Workers or Dataset processes.") - if process.has_tasks: + if self._process.has_tasks: errors["process_id"].append("WorkerRuns cannot be added or updated on processes that have already started.") if len(parents) != len(set(parents)): @@ -161,7 +210,16 @@ class WorkerRunSerializer(serializers.ModelSerializer): if worker_version.model_usage == FeatureUsage.Disabled: errors["model_version_id"].append("This worker version does not support models.") - existing_worker_run = process.worker_runs.filter(version=worker_version, model_version_id=model_version, configuration=configuration) + existing_worker_run = ( + self._process + .worker_runs + .using("default") + .filter( + version=worker_version, + model_version_id=model_version, + configuration=configuration, + ) + ) if self.instance: # The current worker run cannot be a duplicate of itself! existing_worker_run = existing_worker_run.exclude(id=self.instance.id) @@ -171,9 +229,9 @@ class WorkerRunSerializer(serializers.ModelSerializer): if errors: raise ValidationError(errors) - # Since we only allow creating a WorkerRun on a process without any task, we know the last run is None, + # Since we only allow creating or editing a WorkerRun on a process without any task, we know the last run is None, # without having to make any extra SQL queries to serialize it in the response. - process.last_run = None + self._process.last_run = None return data @@ -238,6 +296,8 @@ class UserWorkerRunSerializer(serializers.ModelSerializer): queryset=WorkerConfiguration.objects.all(), style={"base_template": "input.html"}, ) + # Default value for the TTL, as the local process does not have a corpus and the run will never actually run + ttl = serializers.HiddenField(default=0) def validate_worker_version_id(self, worker_version_id): # Check that the worker version exists @@ -305,7 +365,12 @@ class UserWorkerRunSerializer(serializers.ModelSerializer): class Meta: model = WorkerRun - fields = ("worker_version_id", "model_version_id", "configuration_id") + fields = ( + "worker_version_id", + "model_version_id", + "configuration_id", + "ttl", + ) class CorpusWorkerRunSerializer(WorkerRunSerializer): diff --git a/arkindex/process/serializers/workers.py b/arkindex/process/serializers/workers.py index bbafa5ca7201d2de1c2238a0e3b4388cc8432f76..e969205e36b0abfb476c013af11a2376ae8a5357 100644 --- a/arkindex/process/serializers/workers.py +++ b/arkindex/process/serializers/workers.py @@ -151,27 +151,30 @@ class UserConfigurationFieldType(Enum): Model = "model" +USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE = { + UserConfigurationFieldType.Int: [serializers.IntegerField(), int], + UserConfigurationFieldType.Float: [serializers.FloatField(), float], + UserConfigurationFieldType.String: [serializers.CharField(), str], + UserConfigurationFieldType.Enum: [serializers.ChoiceField(choices=[]), None], + UserConfigurationFieldType.Boolean: [serializers.BooleanField(), bool], + UserConfigurationFieldType.Dict: [serializers.DictField(child=serializers.CharField()), dict], + UserConfigurationFieldType.List: [serializers.ListField(), list], + UserConfigurationFieldType.Model: [serializers.UUIDField(), str] +} + + class UserConfigurationFieldSerializer(serializers.Serializer): title = serializers.CharField() type = EnumField(UserConfigurationFieldType) subtype = EnumField(UserConfigurationFieldType, required=False) required = serializers.BooleanField(default=False) choices = serializers.ListField(required=False, allow_empty=False, allow_null=True) + multiline = serializers.BooleanField(required=False) def to_internal_value(self, data): errors = defaultdict(list) - allowed_fields = ["title", "type", "required", "default", "choices", "subtype"] - data_types = { - UserConfigurationFieldType.Int: [serializers.IntegerField(), int], - UserConfigurationFieldType.Float: [serializers.FloatField(), float], - UserConfigurationFieldType.String: [serializers.CharField(), str], - UserConfigurationFieldType.Enum: [serializers.ChoiceField(choices=[]), None], - UserConfigurationFieldType.Boolean: [serializers.BooleanField(), bool], - UserConfigurationFieldType.Dict: [serializers.DictField(child=serializers.CharField()), dict], - UserConfigurationFieldType.List: [serializers.ListField(), list], - UserConfigurationFieldType.Model: [serializers.UUIDField(), str] - } + allowed_fields = ("title", "type", "required", "default", "choices", "subtype", "multiline") if not isinstance(data, dict): errors["__all__"] = [f"User configuration field definitions should be of type dict, not {type(data).__name__}."] @@ -180,7 +183,7 @@ class UserConfigurationFieldSerializer(serializers.Serializer): for field in data: if field not in allowed_fields: errors[field].append( - "Configurable properties can only be defined using the following keys: title, type, required, default, subtype, choices." + f"Configurable properties can only be defined using the following keys: {', '.join(allowed_fields)}." ) default_value = data.get("default") @@ -192,12 +195,14 @@ class UserConfigurationFieldSerializer(serializers.Serializer): if field_type == UserConfigurationFieldType.List and not subtype: errors["subtype"].append('The "subtype" field must be set for "list" type properties.') + # Handle subtypes if subtype is not None: if field_type != UserConfigurationFieldType.List: errors["subtype"].append('The "subtype" field can only be set for a "list" type property.') if subtype not in [UserConfigurationFieldType.Int, UserConfigurationFieldType.Float, UserConfigurationFieldType.String, UserConfigurationFieldType.Boolean]: errors["subtype"].append("Subtype can only be int, float, bool or string.") + # Handle enums if choices is not None: if field_type != UserConfigurationFieldType.Enum: @@ -205,17 +210,22 @@ class UserConfigurationFieldSerializer(serializers.Serializer): # If the configuration parameter is of enum type, an eventual default value won't match the field type if default_value and default_value not in choices: errors["default"].append(f"{default_value} is not an available choice.") + + # Only allow the multiline property on string fields + if "multiline" in data and field_type != UserConfigurationFieldType.String: + errors["multiline"].append('The "multiline" field can only be set for a "string" type property.') + # Handle everything else if default_value is not None and field_type != UserConfigurationFieldType.Enum: try: - data_type, data_class = data_types[field_type] + data_type, data_class = USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE[field_type] if not isinstance(default_value, data_class): raise ValidationError # In the case of model fields, the validation error is raised here if the default value is a string but not a UUID data_type.to_internal_value(default_value) # For lists, check that list elements are of given subtype if field_type == UserConfigurationFieldType.List and not errors.get("subtype"): - _, data_subclass = data_types[subtype] + _, data_subclass = USER_CONFIGURATION_FIELD_TYPE_DATA_TYPE[subtype] if any(not isinstance(item, data_subclass) for item in default_value): errors["default"].append(f"All items in the default value must be of type {data_subclass.__name__}.") except (ValidationError, DjangoValidationError): @@ -276,17 +286,16 @@ class WorkerVersionSerializer(serializers.ModelSerializer): return tag def validate_configuration(self, configuration): - errors = defaultdict(list) user_configuration = configuration.get("user_configuration") if not user_configuration: return configuration + field = serializers.DictField(child=UserConfigurationFieldSerializer()) try: field.to_internal_value(user_configuration) except ValidationError as e: - errors["user_configuration"].append(e.detail) - if errors: - raise ValidationError(errors) + raise ValidationError({"user_configuration": e.detail}) + return configuration def validate(self, data): diff --git a/arkindex/process/tasks.py b/arkindex/process/tasks.py index 7079572ea5a440cc310ae4e97b6522cecb197aa7..0ccd2865f4e9e04f70c50b39df423278297d7f2c 100644 --- a/arkindex/process/tasks.py +++ b/arkindex/process/tasks.py @@ -29,7 +29,7 @@ def initialize_activity(process: Process): worker_run for worker_run in ( process.worker_runs .select_related("version__worker") - .only("process_id", "version_id", "configuration_id", "model_version_id", "version__worker__slug") + .only("process_id", "version_id", "configuration_id", "model_version_id", "version__worker__slug", "version__feature") ) # Do not generate worker activities for the elements initialisation task if not worker_run.version.is_init_elements() diff --git a/arkindex/process/tests/commands/test_fake_worker_run.py b/arkindex/process/tests/commands/test_fake_worker_run.py index 6b8e614d8e0999fac6be0889173fc11403fd4242..97055247eafd7d759f77c860d379d89ed153e55e 100644 --- a/arkindex/process/tests/commands/test_fake_worker_run.py +++ b/arkindex/process/tests/commands/test_fake_worker_run.py @@ -64,7 +64,7 @@ class TestFakeWorkerRun(FixtureTestCase): def test_existing_worker_run(self): process = Process.objects.get(mode=ProcessMode.Local, creator=self.user) - worker_run = process.worker_runs.create(version=self.worker_version, parents=[]) + worker_run = process.worker_runs.create(version=self.worker_version, ttl=0) self.assertEqual(process.worker_runs.count(), 2) output = self.fake_worker_run(["--user", str(self.user.id), "--worker-version", str(self.worker_version.id)]) diff --git a/arkindex/process/tests/process/test_clear.py b/arkindex/process/tests/process/test_clear.py index 156330eb97589d351fb21f33a2ac97007a774b45..0593a54ce403a38d4fda7f0e1e42e70d112e9907 100644 --- a/arkindex/process/tests/process/test_clear.py +++ b/arkindex/process/tests/process/test_clear.py @@ -20,8 +20,14 @@ class TestProcessClear(FixtureAPITestCase): mode=ProcessMode.Workers, element_type=cls.corpus.types.get(slug="page") ) - cls.process.worker_runs.create(version=WorkerVersion.objects.get(worker__slug="reco")) - cls.process.worker_runs.create(version=WorkerVersion.objects.get(worker__slug="dla")) + cls.process.worker_runs.create( + version=WorkerVersion.objects.get(worker__slug="reco"), + ttl=0, + ) + cls.process.worker_runs.create( + version=WorkerVersion.objects.get(worker__slug="dla"), + ttl=0, + ) def test_clear(self): self.assertEqual(self.process.worker_runs.count(), 2) diff --git a/arkindex/process/tests/process/test_create.py b/arkindex/process/tests/process/test_create.py index f305c3ea2dea7c2da279612e44eb36fcae62e685..3f1d6aaec3e07f6f23e8d5c263d69f6340a96984 100644 --- a/arkindex/process/tests/process/test_create.py +++ b/arkindex/process/tests/process/test_create.py @@ -488,15 +488,17 @@ class TestCreateProcess(FixtureAPITestCase): ) init_run = process_2.worker_runs.create( version=self.init_elements_version, - parents=[] + ttl=0, ) run_1 = process_2.worker_runs.create( version=self.version_1, parents=[init_run.id], + ttl=0, ) run_2 = process_2.worker_runs.create( version=self.version_2, parents=[run_1.id], + ttl=0, ) self.assertFalse(process_2.tasks.exists()) @@ -587,7 +589,7 @@ class TestCreateProcess(FixtureAPITestCase): dataset = self.corpus.datasets.first() test_set = dataset.sets.get(name="test") ProcessDatasetSet.objects.create(process=process, set=test_set) - process.versions.add(self.version_1) + process.worker_runs.create(version=self.version_1, ttl=0) with self.assertNumQueries(9): response = self.client.post(reverse("api:process-start", kwargs={"pk": str(process.id)})) @@ -603,7 +605,7 @@ class TestCreateProcess(FixtureAPITestCase): self.worker_1.save() process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Workers) - process.versions.add(self.version_1) + process.worker_runs.create(version=self.version_1, ttl=0) with self.assertNumQueries(9): response = self.client.post(reverse("api:process-start", kwargs={"pk": str(process.id)})) diff --git a/arkindex/process/tests/process/test_default_process_name.py b/arkindex/process/tests/process/test_default_process_name.py index e30d315f63a9246128c6b516ec44effc49a3763d..e496e7ff00af52728120e7d4e9876b8081ce70ca 100644 --- a/arkindex/process/tests/process/test_default_process_name.py +++ b/arkindex/process/tests/process/test_default_process_name.py @@ -48,10 +48,25 @@ class TestProcessName(FixtureAPITestCase): is built from its worker runs. The elements initialisation worker run is ignored. """ init_elements_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.workers_process.worker_runs.create(version=init_elements_version) - self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) - dla_run = self.workers_process.worker_runs.create(version=self.dla, parents=[init_run.id]) - self.workers_process.worker_runs.create(version=self.version_gpu, parents=[dla_run.id]) + init_run = self.workers_process.worker_runs.create( + version=init_elements_version, + ttl=0, + ) + self.workers_process.worker_runs.create( + version=self.recognizer, + parents=[init_run.id], + ttl=0, + ) + dla_run = self.workers_process.worker_runs.create( + version=self.dla, + parents=[init_run.id], + ttl=0, + ) + self.workers_process.worker_runs.create( + version=self.version_gpu, + parents=[dla_run.id], + ttl=0, + ) self.workers_process.save() self.workers_process.refresh_from_db() @@ -78,13 +93,44 @@ class TestProcessName(FixtureAPITestCase): dla - reco - dla """ init_elements_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.workers_process.worker_runs.create(version=init_elements_version) - reco_run_1 = self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) - reco_run_2 = self.workers_process.worker_runs.create(version=self.recognizer, configuration=self.reco_config_1, parents=[reco_run_1.id]) - self.workers_process.worker_runs.create(version=self.recognizer, configuration=self.reco_config_2, parents=[reco_run_2.id]) - dla_run_1 = self.workers_process.worker_runs.create(version=self.dla, parents=[init_run.id]) - reco_run_4 = self.workers_process.worker_runs.create(version=self.recognizer, configuration=self.reco_config_3, parents=[dla_run_1.id]) - self.workers_process.worker_runs.create(version=self.dla, configuration=self.dla_config, parents=[reco_run_4.id]) + init_run = self.workers_process.worker_runs.create( + version=init_elements_version, + ttl=0, + ) + reco_run_1 = self.workers_process.worker_runs.create( + version=self.recognizer, + parents=[init_run.id], + ttl=0, + ) + reco_run_2 = self.workers_process.worker_runs.create( + version=self.recognizer, + configuration=self.reco_config_1, + parents=[reco_run_1.id], + ttl=0, + ) + self.workers_process.worker_runs.create( + version=self.recognizer, + configuration=self.reco_config_2, + parents=[reco_run_2.id], + ttl=0, + ) + dla_run_1 = self.workers_process.worker_runs.create( + version=self.dla, + parents=[init_run.id], + ttl=0, + ) + reco_run_4 = self.workers_process.worker_runs.create( + version=self.recognizer, + configuration=self.reco_config_3, + parents=[dla_run_1.id], + ttl=0, + ) + self.workers_process.worker_runs.create( + version=self.dla, + configuration=self.dla_config, + parents=[reco_run_4.id], + ttl=0, + ) self.workers_process.save() @@ -104,13 +150,27 @@ class TestProcessName(FixtureAPITestCase): If the default process worker name is too long (len() > 250) it gets truncated """ init_elements_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.workers_process.worker_runs.create(version=init_elements_version) + init_run = self.workers_process.worker_runs.create(version=init_elements_version, ttl=0) # Update the recognizer worker's name so that it is long self.recognizer.worker.name = "animula vagula blandula hospes comesque corporis quae nunc abibis in loca pallidula rigida nudula ne" self.recognizer.worker.save() - reco_run_1 = self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) - reco_run_2 = self.workers_process.worker_runs.create(version=self.recognizer, configuration=self.reco_config_1, parents=[reco_run_1.id]) - self.workers_process.worker_runs.create(version=self.recognizer, configuration=self.reco_config_2, parents=[reco_run_2.id]) + reco_run_1 = self.workers_process.worker_runs.create( + version=self.recognizer, + parents=[init_run.id], + ttl=0, + ) + reco_run_2 = self.workers_process.worker_runs.create( + version=self.recognizer, + configuration=self.reco_config_1, + parents=[reco_run_1.id], + ttl=0, + ) + self.workers_process.worker_runs.create( + version=self.recognizer, + configuration=self.reco_config_2, + parents=[reco_run_2.id], + ttl=0, + ) self.workers_process.save() self.workers_process.refresh_from_db() @@ -131,8 +191,8 @@ class TestProcessName(FixtureAPITestCase): self.workers_process.name = "My process" self.workers_process.save() init_elements_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.workers_process.worker_runs.create(version=init_elements_version) - self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) + init_run = self.workers_process.worker_runs.create(version=init_elements_version, ttl=0) + self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id], ttl=0) self.assertEqual(self.workers_process.name, "My process") builder = ProcessBuilder(process=self.workers_process) diff --git a/arkindex/process/tests/process/test_destroy.py b/arkindex/process/tests/process/test_destroy.py index 0ce7a8bc8e7b7827029c4d781cbee868d2f61d47..b87856e50cc290f43f37fc638862e8f1f1c2c4d8 100644 --- a/arkindex/process/tests/process/test_destroy.py +++ b/arkindex/process/tests/process/test_destroy.py @@ -111,7 +111,7 @@ class TestProcessDestroy(FixtureAPITestCase): A process with worker runs linked to data cannot be deleted """ self.client.force_login(self.user) - run = self.process.worker_runs.create(version=WorkerVersion.objects.first()) + run = self.process.worker_runs.create(version=WorkerVersion.objects.first(), ttl=0) page = self.corpus.elements.get(name="Volume 1, page 1r") metadata = page.metadatas.get() diff --git a/arkindex/process/tests/process/test_export_process.py b/arkindex/process/tests/process/test_export_process.py new file mode 100644 index 0000000000000000000000000000000000000000..3fd3d5b8eb4a3030a39145bdc06c6c107549dc7c --- /dev/null +++ b/arkindex/process/tests/process/test_export_process.py @@ -0,0 +1,606 @@ +from datetime import datetime, timedelta, timezone +from unittest.mock import call, patch +from uuid import uuid4 + +from django.test import override_settings +from rest_framework import status +from rest_framework.reverse import reverse + +from arkindex.documents.models import Corpus, CorpusExport, CorpusExportState +from arkindex.ponos.models import Farm +from arkindex.process.models import ArkindexFeature, Process, ProcessMode, WorkerVersion +from arkindex.project.tests import FixtureAPITestCase +from arkindex.users.models import Role, User + +DB_CREATED = "2020-02-02T01:23:45.678000Z" + + +@override_settings(EXPORT_SOURCES=["default", "cached_db"], PONOS_MAXIMUM_TASK_TTL=3600) +class TestExportProcess(FixtureAPITestCase): + """ + Test the creation of export processes + """ + @classmethod + def setUpTestData(cls): + super().setUpTestData() + with patch("django.utils.timezone.now") as mock_now: + mock_now.return_value = DB_CREATED + cls.complete_export = CorpusExport.objects.create(corpus=cls.corpus, user=cls.user, state=CorpusExportState.Done, source="default") + cls.other_corpus = Corpus.objects.create(name="Das korpus der anderen", public=True) + cls.corpus_element = cls.corpus.elements.first() + cls.other_element = cls.other_corpus.elements.create( + type=cls.other_corpus.types.create(slug="a_type", display_name="an element type"), + name="alien element", + ) + cls.pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + cls.pdf_export_version.configuration = { + "user_configuration": { + "page_type": {"title": "Page element type", "type": "string", "default": "page", "required": False}, + "order_by_name": {"title": "Order by name or nah", "type": "bool", "required": False, "default": False}, + "example_int": {"title": "Some integer parameter", "type": "int", "required": False, "default": 2}, + "example_choice": {"title": "Some choice parameter", "type": "enum", "required": False, "choices": ["a", "b", "c"], "default": "a"}, + "example_required": {"title": "Some parameter that is required and has no default", "type": "string", "required": True}, + "example_list": {"title": "Some list parameter", "type": "list", "subtype": "string", "default": ["pomme", "poire"], "required": False}, + "example_list_2": {"title": "Some other list parameter", "type": "list", "subtype": "string", "default": ["prune", "abricot"], "required": False} + } + } + cls.pdf_export_version.save() + + def test_requires_logged_in(self): + with self.assertNumQueries(0): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "Authentication credentials were not provided."}) + + def test_requires_verified(self): + test_user = User.objects.create(email="kaji@seele.co.jp") + self.client.force_login(test_user) + + with self.assertNumQueries(2): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have permission to perform this action."}) + + def test_export_id_required(self): + self.client.force_login(self.user) + + with self.assertNumQueries(3): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.other_corpus.id)}), + { + "format": "pdf" + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"export_id": ["This field is required."]}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights", return_value=Corpus.objects.none()) + def test_no_corpus_guest_access(self, filter_rights_mock): + test_user = User.objects.create(email="katsuragi@nerv.co.jp", verified_email=True) + self.client.force_login(test_user) + + with self.assertNumQueries(2): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertDictEqual(response.json(), {"detail": "No Corpus matches the given query."}) + self.assertEqual(filter_rights_mock.call_count, 1) + self.assertEqual(filter_rights_mock.call_args, call(test_user, Corpus, Role.Guest.value)) + + + @patch("arkindex.users.utils.get_max_level") + def test_requires_corpus_admin(self, max_level_mock): + for lvl in [Role.Guest.value, Role.Contributor.value]: + max_level_mock.return_value = lvl + self.client.force_login(self.user) + + with self.assertNumQueries(3): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {"detail": "You do not have an admin access to this corpus."}) + self.assertEqual(max_level_mock.call_count, 2) + self.assertEqual(max_level_mock.call_args, call(self.user, self.corpus)) + + + def test_export_in_corpus(self): + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.other_corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"export_id": [f"Export {str(self.complete_export.id)} does not belong to corpus {str(self.other_corpus.id)}."]}) + + def test_failed_export(self): + failed_export = CorpusExport.objects.create(corpus=self.corpus, user=self.user, state=CorpusExportState.Failed, source="default") + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(failed_export.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"export_id": [f"Export {str(failed_export.id)} has failed."]}) + + def test_export_already_running(self): + CorpusExport.objects.create(corpus=self.corpus, user=self.user, state=CorpusExportState.Running, source="default") + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"export_id": ["An export is already running for this corpus."]}) + + @override_settings(EXPORT_TTL_SECONDS=420) + def test_recent_export_exists(self): + self.client.force_login(self.user) + with patch("django.utils.timezone.now") as mock_now: + mock_now.return_value = datetime.now(timezone.utc) - timedelta(minutes=2) + CorpusExport.objects.create(corpus=self.corpus, user=self.user, state=CorpusExportState.Done, source="default") + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"export_id": ["An export has already been made for this corpus in the last 420 seconds."]}) + + def test_element_does_not_exist(self): + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "element_id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"element_id": ['Invalid pk "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" - object does not exist.']}) + + @patch("arkindex.users.managers.BaseACLManager.filter_rights") + def test_element_non_readable_corpus(self, filter_rights_mock): + filter_rights_mock.return_value = Corpus.objects.filter(pk=self.corpus.id) + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "element_id": str(self.other_element.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"element_id": [f'Invalid pk "{str(self.other_element.id)}" - object does not exist.']}) + + def test_element_wrong_corpus(self): + self.other_corpus.memberships.create(user=self.user, level=Role.Contributor.value) + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "element_id": str(self.other_element.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"element_id": [f"Element {str(self.other_element.id)} does not belong to corpus {str(self.corpus.id)}."]}) + + def test_unsupported_format(self): + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "chaussette", + "export_id": str(self.complete_export.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"format": ["Value is not of type ExportFormat"]}) + + def test_element_or_selection(self): + self.client.force_login(self.user) + self.user.selected_elements.add(self.corpus_element) + + with self.assertNumQueries(6): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "element_id": str(self.corpus_element.id), + "selection": True + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + "non_field_errors": ["Only one of `element` and `selection` may be set."] + }) + + def test_empty_selection(self): + self.client.force_login(self.user) + self.user.selected_elements.add(self.other_element) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "selection": True + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"selection": [f"The user selection does not contain any elements from corpus {str(self.corpus.id)}."]}) + + def test_bad_configuration(self): + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "configuration": { + "page_type": None, + "order_by_name": "nope", + "example_int": "a", + "example_choice": "e", + "example_list": "abricot", + "example_list_2": [12, "prune"], + "bad_param": "hello" + } + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + "configuration": { + "bad_param": ["This is a not a valid parameter for this worker version."], + "example_required": ["This parameter is required."], + "example_choice": ["e is not an available choice."], + "order_by_name": ["This is not a valid value for a field of type bool."], + "example_int": ["This is not a valid value for a field of type int."], + "example_list": ["This is not a valid value for a field of type list."], + "example_list_2": ["All items must be of type str."], + "page_type": ["This is not a valid value for a field of type string."] + } + }) + + def test_create_export_process(self): + self.client.force_login(self.user) + + with self.assertNumQueries(24): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "configuration": { + "example_required": "something" + } + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + created_process = Process.objects.get(mode=ProcessMode.Export) + created_task = created_process.tasks.get() + created_run = created_process.worker_runs.get() + + self.assertDictEqual(created_run.configuration.configuration, { + "example_choice": "a", + "example_int": 2, + "example_list": ["pomme", "poire"], + "example_list_2": ["prune", "abricot"], + "example_required": "something", + "export_id": str(self.complete_export.id), + "order_by_name": False, + "page_type": "page" + }) + + self.assertDictEqual(response.json(), { + "name": None, + "mode": "export", + "id": str(created_process.id), + "state": "unscheduled", + "corpus": str(self.corpus.id), + "created": created_process.created.isoformat().replace("+00:00", "Z"), + "started": created_process.started.isoformat().replace("+00:00", "Z"), + "updated": created_process.updated.isoformat().replace("+00:00", "Z"), + "finished": None, + "activity_state": "disabled", + "chunks": 1, + "element": None, + "element_name_contains": None, + "element_type": None, + "farm": None, + "files": [], + "folder_type": None, + "load_children": False, + "ml_class_id": None, + "template_id": None, + "use_cache": False, + "tasks": [ + { + "depth": 0, + "finished": None, + "id": str(created_task.id), + "original_task_id": None, + "parents": [], + "requires_gpu": False, + "run": 0, + "shm_size": None, + "slug": "pdf_export", + "started": None, + "state": "unscheduled", + "ttl": 3600, + } + ], + }) + + def test_non_default_configuration(self): + self.client.force_login(self.user) + + with self.assertNumQueries(24): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "configuration": { + "page_type": "double_page", + "order_by_name": True, + "example_int": 54, + "example_choice": "b", + "example_list": ["beurre", "sucre"], + "example_required": "something" + } + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + created_process = Process.objects.get(mode=ProcessMode.Export) + created_run = created_process.worker_runs.get() + + self.assertDictEqual(created_run.configuration.configuration, { + "example_choice": "b", + "example_int": 54, + "example_list": ["beurre", "sucre"], + "example_list_2": ["prune", "abricot"], + "example_required": "something", + "export_id": str(self.complete_export.id), + "order_by_name": True, + "page_type": "double_page" + }) + + @patch("arkindex.project.triggers.export.local_export.delay") + def test_create_export_process_new_sql_export(self, delay_mock): + self.client.force_login(self.user) + + with self.assertNumQueries(26): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + "configuration": { + "example_required": "something" + } + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + created_process = Process.objects.get(mode=ProcessMode.Export) + created_run = created_process.worker_runs.get() + new_export = self.corpus.exports.order_by("-created").first() + self.assertEqual(self.corpus.exports.count(), 2) + self.assertNotEqual(new_export.id, self.complete_export.id) + + self.assertDictEqual(created_run.configuration.configuration, { + "example_choice": "a", + "example_int": 2, + "example_list": ["pomme", "poire"], + "example_list_2": ["prune", "abricot"], + "example_required": "something", + "export_id": str(new_export.id), + "order_by_name": False, + "page_type": "page" + }) + + @patch("arkindex.project.triggers.export.local_export.delay") + def test_create_export_process_new_sql_export_with_element(self, delay_mock): + self.client.force_login(self.user) + + with self.assertNumQueries(30): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + "configuration": { + "example_required": "something" + }, + "element_id": str(self.corpus_element.id) + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + created_process = Process.objects.get(mode=ProcessMode.Export) + created_run = created_process.worker_runs.get() + new_export = self.corpus.exports.order_by("-created").first() + self.assertEqual(self.corpus.exports.count(), 2) + self.assertNotEqual(new_export.id, self.complete_export.id) + self.assertEqual(created_process.element_id, self.corpus_element.id) + + self.assertDictEqual(created_run.configuration.configuration, { + "example_choice": "a", + "example_int": 2, + "example_list": ["pomme", "poire"], + "example_list_2": ["prune", "abricot"], + "example_required": "something", + "export_id": str(new_export.id), + "order_by_name": False, + "page_type": "page" + }) + + @patch("arkindex.project.triggers.export.local_export.delay") + def test_create_export_process_new_sql_export_with_selection(self, delay_mock): + self.client.force_login(self.user) + self.user.selected_elements.add(self.corpus_element) + self.user.selected_elements.add(self.other_element) + + with self.assertNumQueries(29): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": None, + "configuration": { + "example_required": "something" + }, + "selection": True + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + created_process = Process.objects.get(mode=ProcessMode.Export) + created_run = created_process.worker_runs.get() + new_export = self.corpus.exports.order_by("-created").first() + self.assertEqual(self.corpus.exports.count(), 2) + self.assertNotEqual(new_export.id, self.complete_export.id) + self.assertEqual(list(created_process.elements.all()), [self.corpus_element]) + + self.assertDictEqual(created_run.configuration.configuration, { + "example_choice": "a", + "example_int": 2, + "example_list": ["pomme", "poire"], + "example_list_2": ["prune", "abricot"], + "example_required": "something", + "export_id": str(new_export.id), + "order_by_name": False, + "page_type": "page" + }) + + def test_farm(self): + farm = Farm.objects.get() + self.client.force_login(self.user) + + with self.assertNumQueries(25): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "farm_id": str(farm.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + process = Process.objects.get(id=response.json()["id"]) + self.assertEqual(process.farm, farm) + + @patch("arkindex.process.serializers.imports.get_default_farm") + def test_default_farm(self, get_default_farm_mock): + farm = Farm.objects.get() + get_default_farm_mock.return_value = farm + self.client.force_login(self.user) + + with self.assertNumQueries(24): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + process = Process.objects.get(id=response.json()["id"]) + self.assertEqual(process.farm, farm) + + @patch("arkindex.ponos.models.Farm.is_available", return_value=False) + def test_farm_unavailable(self, is_available_mock): + farm = Farm.objects.get() + self.client.force_login(self.user) + + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "farm_id": farm.id, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), { + "farm_id": ["You do not have access to this farm."], + }) + self.assertFalse(self.corpus.processes.filter(mode=ProcessMode.Export).exists()) + + def test_unknown_farm_id(self): + self.client.force_login(self.user) + + farm_id = str(uuid4()) + with self.assertNumQueries(5): + response = self.client.post( + reverse("api:export-process", kwargs={"corpus_id": str(self.corpus.id)}), + { + "format": "pdf", + "export_id": str(self.complete_export.id), + "farm_id": farm_id, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), { + "farm_id": [f'Invalid pk "{farm_id}" - object does not exist.'], + }) + self.assertFalse(self.corpus.processes.filter(mode=ProcessMode.Export).exists()) diff --git a/arkindex/process/tests/process/test_list.py b/arkindex/process/tests/process/test_list.py index d51b822caf8138ae67ef5050e2f4a696dcb96e41..3becb856e9a8e2afaaa4c3c3a2906b794fe7f26c 100644 --- a/arkindex/process/tests/process/test_list.py +++ b/arkindex/process/tests/process/test_list.py @@ -5,7 +5,7 @@ from django.urls import reverse from rest_framework import status from arkindex.documents.models import Corpus -from arkindex.ponos.models import State +from arkindex.ponos.models import State, Task from arkindex.process.models import Process, ProcessMode from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User @@ -222,19 +222,37 @@ class TestProcessList(FixtureAPITestCase): Only last run should be used to filter processes by state """ completed_process = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - completed_process.tasks.create(depth=0, run=0, slug="task", state=State.Failed) - completed_process.tasks.create(depth=0, run=1, slug="task", state=State.Completed) + completed_process.tasks.create(depth=0, run=0, slug="task", state=State.Failed, ttl=0) + completed_process.tasks.create(depth=0, run=1, slug="task", state=State.Completed, ttl=0) self.assertEqual(completed_process.state, State.Completed) error_process = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - error_process.tasks.create(depth=0, run=0, slug="error_task", state=State.Error) - error_process.tasks.create(depth=0, run=0, slug="completed_task", state=State.Completed) + # `Failed` has a higher priority than `Error`, so it would normally be returned as the process state, + # but it should be ignored on a task that has been restarted in the same run + restarted_task = error_process.tasks.create(depth=0, run=0, slug="restarted_task", state=State.Failed, ttl=0) + error_process.tasks.create( + depth=0, + run=0, + slug="error_task", + state=State.Error, + ttl=0, + original_task=restarted_task, + ) + error_process.tasks.create(depth=0, run=0, slug="completed_task", state=State.Completed, ttl=0) self.assertEqual(error_process.state, State.Error) stopped_process = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - stopped_process.tasks.create(depth=0, run=0, slug="completed_task", state=State.Completed) - stopped_process.tasks.create(depth=0, run=1, slug="completed_task", state=State.Completed) - stopped_process.tasks.create(depth=0, run=2, slug="stopped_task", state=State.Stopped) + stopped_process.tasks.create(depth=0, run=0, slug="completed_task", state=State.Completed, ttl=0) + stopped_process.tasks.create(depth=0, run=1, slug="completed_task", state=State.Completed, ttl=0) + restarted_task = stopped_process.tasks.create(depth=0, run=1, slug="restarted_task", state=State.Error, ttl=0) + stopped_process.tasks.create( + depth=0, + run=2, + slug="stopped_task", + state=State.Stopped, + ttl=0, + original_task=restarted_task, + ) self.assertEqual(stopped_process.state, State.Stopped) self.client.force_login(self.user) @@ -246,24 +264,24 @@ class TestProcessList(FixtureAPITestCase): def test_state_filter_stopped(self): stopping_process = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - stopping_process.tasks.create(depth=0, run=0, slug="task", state=State.Error) - stopping_process.tasks.create(depth=0, run=1, slug="task", state=State.Stopping) + stopping_process.tasks.create(depth=0, run=0, slug="task", state=State.Error, ttl=0) + stopping_process.tasks.create(depth=0, run=1, slug="task", state=State.Stopping, ttl=0) self.assertEqual(stopping_process.state, State.Stopping) error_process = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - error_process.tasks.create(depth=0, run=0, slug="error_task", state=State.Error) - error_process.tasks.create(depth=0, run=0, slug="stopping_task", state=State.Stopping) + error_process.tasks.create(depth=0, run=0, slug="error_task", state=State.Error, ttl=0) + error_process.tasks.create(depth=0, run=0, slug="stopping_task", state=State.Stopping, ttl=0) self.assertEqual(error_process.state, State.Error) stopped_process_1 = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - stopped_process_1.tasks.create(depth=0, run=0, slug="unscheduled_task", state=State.Unscheduled) - stopped_process_1.tasks.create(depth=0, run=0, slug="stopped_task", state=State.Stopped) + stopped_process_1.tasks.create(depth=0, run=0, slug="unscheduled_task", state=State.Unscheduled, ttl=0) + stopped_process_1.tasks.create(depth=0, run=0, slug="stopped_task", state=State.Stopped, ttl=0) self.assertEqual(stopped_process_1.state, State.Stopped) stopped_process_2 = Process.objects.create(mode=ProcessMode.Workers, creator=self.user, corpus=self.corpus) - stopped_process_2.tasks.create(depth=0, run=0, slug="failed_task", state=State.Failed) - stopped_process_2.tasks.create(depth=0, run=1, slug="pending_task", state=State.Pending) - stopped_process_2.tasks.create(depth=0, run=1, slug="stopped_task", state=State.Stopped) + stopped_process_2.tasks.create(depth=0, run=0, slug="failed_task", state=State.Failed, ttl=0) + stopped_process_2.tasks.create(depth=0, run=1, slug="pending_task", state=State.Pending, ttl=0) + stopped_process_2.tasks.create(depth=0, run=1, slug="stopped_task", state=State.Stopped, ttl=0) self.assertEqual(stopped_process_2.state, State.Stopped) self.client.force_login(self.user) @@ -285,8 +303,8 @@ class TestProcessList(FixtureAPITestCase): creator=self.user, corpus=self.corpus, ) - unscheduled_with_tasks.tasks.create(depth=0, run=0, slug="task", state=State.Completed) - unscheduled_with_tasks.tasks.create(depth=0, run=1, slug="task", state=State.Unscheduled) + unscheduled_with_tasks.tasks.create(depth=0, run=0, slug="task", state=State.Completed, ttl=0) + unscheduled_with_tasks.tasks.create(depth=0, run=1, slug="task", state=State.Unscheduled, ttl=0) unscheduled_with_tasks.started = datetime.now(timezone.utc) unscheduled_with_tasks.save() self.assertEqual(unscheduled_with_tasks.state, State.Unscheduled) @@ -414,3 +432,23 @@ class TestProcessList(FixtureAPITestCase): }, ] }) + + def test_template_mode_ignores_with_tasks(self): + """ + The 'with_tasks' filter should be ignored and some templates should be returned + when filtering on the template process mode, because templates cannot have tasks + """ + self.corpus.processes.create(mode=ProcessMode.Template, creator=self.user) + self.assertFalse(Task.objects.filter(process__mode=ProcessMode.Template).exists()) + + self.client.force_login(self.user) + with self.assertNumQueries(4): + response = self.client.get( + reverse("api:process-list"), + data={"mode": "template", "with_tasks": True}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + self.assertEqual(data["count"], 1) + self.assertNotEqual(data["results"], []) diff --git a/arkindex/process/tests/process/test_process_artifact_download.py b/arkindex/process/tests/process/test_process_artifact_download.py index 8da6836c917873ff6634489a923505e51cfea8b4..004e375f67c2128ae705e54e769c31ac1c7bedf6 100644 --- a/arkindex/process/tests/process/test_process_artifact_download.py +++ b/arkindex/process/tests/process/test_process_artifact_download.py @@ -4,7 +4,12 @@ from django.urls import reverse from rest_framework import status from arkindex.documents.models import Corpus -from arkindex.process.models import Process, ProcessMode, WorkerVersion +from arkindex.process.models import ( + ArkindexFeature, + Process, + ProcessMode, + WorkerVersion, +) from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import Role, User @@ -14,12 +19,13 @@ class TestProcessArtifactDownload(FixtureAPITestCase): @classmethod def setUpTestData(cls): super().setUpTestData() + cls.pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) cls.process = Process.objects.create(mode=ProcessMode.Export, creator=cls.user, corpus=cls.corpus) - cls.worker_run = cls.process.worker_runs.create(version=WorkerVersion.objects.get(worker__slug="reco")) + cls.worker_run = cls.process.worker_runs.create(version=cls.pdf_export_version, ttl=0) with patch("arkindex.process.tasks.initialize_activity.delay"): cls.process.run() - cls.task1, cls.task2 = cls.process.tasks.all()[:2] - cls.artifact = cls.task2.artifacts.create( + cls.task = cls.process.tasks.get() + cls.artifact = cls.task.artifacts.create( path="path/to/file.json", content_type="application/json", size=42, @@ -73,14 +79,14 @@ class TestProcessArtifactDownload(FixtureAPITestCase): self.assertTrue(response.has_header("Location")) self.assertTrue( response["Location"].startswith( - f"http://s3/ponos-artifacts/{self.task2.id}/path/to/file.json" + f"http://s3/ponos-artifacts/{self.task.id}/path/to/file.json" ) ) @patch("arkindex.process.models.Process.get_last_run", return_value=1) def test_wrong_run(self, last_run_mock): self.client.force_login(self.user) - assert self.task2.run == 0 + assert self.task.run == 0 with self.assertNumQueries(5): response = self.client.get( @@ -95,8 +101,9 @@ class TestProcessArtifactDownload(FixtureAPITestCase): run=0, depth=1, slug="new_task", + ttl=0, ) - self.task2.restarts.set([new_task]) + self.task.restarts.set([new_task]) with self.assertNumQueries(5): response = self.client.get( @@ -107,7 +114,7 @@ class TestProcessArtifactDownload(FixtureAPITestCase): def test_multiple_artifacts_returned(self): self.client.force_login(self.user) - self.task2.artifacts.create( + self.task.artifacts.create( path="path/to/other_file.json", content_type="application/json", size=44, @@ -123,11 +130,20 @@ class TestProcessArtifactDownload(FixtureAPITestCase): def test_wrong_depth(self): self.client.force_login(self.user) self.artifact.delete() - self.task1.artifacts.create( + parent_task = self.task.parents.create( + slug="a_parent_task", + process=self.process, + run=0, + depth=0, + ttl=0, + ) + parent_task.artifacts.create( path="path/to/other_file.json", content_type="application/json", size=44, ) + self.task.depth = 1 + self.task.save() with self.assertNumQueries(5): response = self.client.get( diff --git a/arkindex/process/tests/process/test_retrieve.py b/arkindex/process/tests/process/test_retrieve.py index 7019bc5ce2b2ad58b95cbe2e7e7041b4f14c0208..50630e6579e926da89e80d3f117e4680ad741596 100644 --- a/arkindex/process/tests/process/test_retrieve.py +++ b/arkindex/process/tests/process/test_retrieve.py @@ -1,5 +1,6 @@ from unittest.mock import call, patch, seal +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -7,6 +8,7 @@ from arkindex.process.models import ProcessMode from arkindex.project.tests import FixtureAPITestCase +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) class TestProcessRetrieve(FixtureAPITestCase): @classmethod @@ -76,6 +78,7 @@ class TestProcessRetrieve(FixtureAPITestCase): "original_task_id": None, "started": None, "finished": None, + "ttl": 3600, } ], "template_id": None, diff --git a/arkindex/process/tests/process/test_retry.py b/arkindex/process/tests/process/test_retry.py index 85733653c21d33981d26d30d95df8816b5458ac3..9407e2e4c5b3dc82c5a697c9799d7742a0eb01c8 100644 --- a/arkindex/process/tests/process/test_retry.py +++ b/arkindex/process/tests/process/test_retry.py @@ -156,6 +156,7 @@ class TestProcessRetry(FixtureAPITestCase): process.worker_runs.create( version=self.recognizer, model_version=self.model_version, + ttl=0, ) with patch("arkindex.process.tasks.initialize_activity.delay"): process.run() @@ -208,7 +209,7 @@ class TestProcessRetry(FixtureAPITestCase): worker_type, _ = WorkerType.objects.get_or_create(slug=f"type_{slug}", display_name=slug.capitalize()) worker, _ = Worker.objects.get_or_create(slug=slug, defaults={"type": worker_type, "repository_url": "fake"}) version, _ = worker.versions.get_or_create(version=1, defaults={"state": WorkerVersionState.Available, "docker_image_iid": "test"}) - return version.worker_runs.create(process=process) + return version.worker_runs.create(process=process, ttl=0) init_version = WorkerVersion.objects.get_by_feature(feature=ArkindexFeature.InitElements) @@ -230,6 +231,7 @@ class TestProcessRetry(FixtureAPITestCase): process=process, slug=f"any_{i}", state=State.Error, + ttl=0, ) for i, slug in enumerate([*init_workers, "worker_1", "worker_2"]) ) @@ -261,8 +263,8 @@ class TestProcessRetry(FixtureAPITestCase): mode=ProcessMode.Files, creator=self.user, ) - process.worker_runs.create(version=self.recognizer) - process.tasks.create(state=State.Error, run=0, depth=0) + process.worker_runs.create(version=self.recognizer, ttl=0) + process.tasks.create(state=State.Error, run=0, depth=0, ttl=0) self.assertEqual(process.state, State.Error) process.finished = timezone.now() @@ -303,8 +305,9 @@ class TestProcessRetry(FixtureAPITestCase): "iiif_base_url": self.imgsrv.url, }, ), + ttl=0, ) - process.tasks.create(state=State.Error, run=0, depth=0) + process.tasks.create(state=State.Error, run=0, depth=0, ttl=0) self.assertEqual(process.state, State.Error) process.finished = timezone.now() @@ -344,13 +347,15 @@ class TestProcessRetry(FixtureAPITestCase): @patch("arkindex.project.triggers.process_tasks.initialize_activity.delay") def test_export(self, delay_mock): + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) self.client.force_login(self.user) process = self.corpus.processes.create(mode=ProcessMode.Export, creator=self.user) - process.tasks.create(state=State.Error, run=0, depth=0) + process.worker_runs.create(version=pdf_export_version, ttl=0) + process.tasks.create(state=State.Error, run=0, depth=0, ttl=0) self.assertEqual(process.state, State.Error) process.finished = timezone.now() - with self.assertNumQueries(13): + with self.assertNumQueries(12): response = self.client.post(reverse("api:process-retry", kwargs={"pk": process.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/arkindex/process/tests/process/test_run.py b/arkindex/process/tests/process/test_run.py index 8ca2347b3e8be2a151c655d012d5ff9be4f55697..ef0ba31c9f3e15ecff7b2b368348f0056edbcc3b 100644 --- a/arkindex/process/tests/process/test_run.py +++ b/arkindex/process/tests/process/test_run.py @@ -65,8 +65,8 @@ class TestProcessRun(FixtureTestCase): mode=ProcessMode.Workers, ) token_mock.side_effect = [b"12345", b"78945"] - init_run = process.worker_runs.create(version=self.init_worker_version) - run = process.worker_runs.create(version=self.version_with_model, parents=[init_run.id]) + init_run = process.worker_runs.create(version=self.init_worker_version, ttl=0) + run = process.worker_runs.create(version=self.version_with_model, parents=[init_run.id], ttl=0) run.model_version = self.model_version run.save() with patch("arkindex.process.tasks.initialize_activity.delay"): diff --git a/arkindex/process/tests/process/test_start.py b/arkindex/process/tests/process/test_start.py index aa18dec7bae038bac525fcf698d7d20e31811f35..4d96cadcbbe3cd43171bbd93a8d6809c70409f50 100644 --- a/arkindex/process/tests/process/test_start.py +++ b/arkindex/process/tests/process/test_start.py @@ -117,7 +117,7 @@ class TestProcessStart(FixtureAPITestCase): @override_settings(PUBLIC_HOSTNAME="https://darkindex.lol") def test_without_required_model(self): - self.workers_process.worker_runs.create(version=self.version_with_model) + self.workers_process.worker_runs.create(version=self.version_with_model, ttl=0) self.client.force_login(self.user) @@ -135,7 +135,7 @@ class TestProcessStart(FixtureAPITestCase): @override_settings(PUBLIC_HOSTNAME="https://arkindex.localhost") @patch("arkindex.project.triggers.process_tasks.initialize_activity.delay") def test_with_required_model(self, activities_delay_mock): - self.workers_process.worker_runs.create(version=self.version_with_model, model_version=self.model_version) + self.workers_process.worker_runs.create(version=self.version_with_model, model_version=self.model_version, ttl=0) self.assertFalse(self.workers_process.tasks.exists()) self.client.force_login(self.user) @@ -168,7 +168,7 @@ class TestProcessStart(FixtureAPITestCase): ) def test_unavailable_worker_version(self): - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.recognizer.state = WorkerVersionState.Error self.recognizer.save() self.assertFalse(self.workers_process.tasks.exists()) @@ -185,7 +185,7 @@ class TestProcessStart(FixtureAPITestCase): ) def test_unavailable_model_version(self): - self.workers_process.worker_runs.create(version=self.recognizer, model_version=self.model_version) + self.workers_process.worker_runs.create(version=self.recognizer, model_version=self.model_version, ttl=0) self.model_version.state = ModelVersionState.Error self.model_version.save() self.assertFalse(self.workers_process.tasks.exists()) @@ -202,7 +202,7 @@ class TestProcessStart(FixtureAPITestCase): ) def test_archived_models(self): - self.workers_process.worker_runs.create(version=self.recognizer, model_version=self.model_version) + self.workers_process.worker_runs.create(version=self.recognizer, model_version=self.model_version, ttl=0) self.model.archived = timezone.now() self.model.save() self.assertFalse(self.workers_process.tasks.exists()) @@ -242,6 +242,7 @@ class TestProcessStart(FixtureAPITestCase): version=self.recognizer, configuration=None, model_version=None, + ttl=0, ) # The other version is used with a configuration missing the required field self.workers_process.worker_runs.create( @@ -253,6 +254,7 @@ class TestProcessStart(FixtureAPITestCase): }, ), model_version=None, + ttl=0, ) self.client.force_login(self.user) @@ -276,13 +278,13 @@ class TestProcessStart(FixtureAPITestCase): Default chunks, thumbnails and farm are used. Cache is disabled, and worker activities are enabled. """ init_elements_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.workers_process.worker_runs.create(version=init_elements_version) - run = self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id]) + init_run = self.workers_process.worker_runs.create(version=init_elements_version, ttl=0) + run = self.workers_process.worker_runs.create(version=self.recognizer, parents=[init_run.id], ttl=0) self.assertFalse(self.workers_process.tasks.exists()) self.client.force_login(self.user) - with self.assertNumQueries(15): + with self.assertNumQueries(16): response = self.client.post( reverse("api:process-start", kwargs={"pk": str(self.workers_process.id)}) ) @@ -309,13 +311,13 @@ class TestProcessStart(FixtureAPITestCase): def test_inconsistent_gpu_usages(self): # The version's gpu_usage is Disabled, so the run's use_gpu is set to False - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.recognizer.gpu_usage = FeatureUsage.Required self.recognizer.save() self.dla.gpu_usage = FeatureUsage.Required self.dla.save() # The version's gpu_usage is Required, so the run's use_gpu is set to True - self.workers_process.worker_runs.create(version=self.dla) + self.workers_process.worker_runs.create(version=self.dla, ttl=0) self.dla.gpu_usage = FeatureUsage.Disabled self.dla.save() self.assertFalse(self.workers_process.tasks.exists()) @@ -336,7 +338,7 @@ class TestProcessStart(FixtureAPITestCase): ) def test_dataset_requires_datasets(self): - self.dataset_process.worker_runs.create(version=self.recognizer) + self.dataset_process.worker_runs.create(version=self.recognizer, ttl=0) self.assertFalse(self.dataset_process.tasks.exists()) self.client.force_login(self.user) @@ -353,7 +355,7 @@ class TestProcessStart(FixtureAPITestCase): def test_dataset_requires_dataset_in_same_corpus(self): test_set = self.other_dataset.sets.get(name="test") ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set) - self.dataset_process.worker_runs.create(version=self.recognizer) + self.dataset_process.worker_runs.create(version=self.recognizer, ttl=0) self.assertFalse(self.dataset_process.tasks.exists()) self.client.force_login(self.user) @@ -373,7 +375,7 @@ class TestProcessStart(FixtureAPITestCase): test_set_2 = self.dataset2.sets.get(name="test") ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_1) ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_2) - self.dataset_process.worker_runs.create(version=self.recognizer) + self.dataset_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) @@ -398,7 +400,7 @@ class TestProcessStart(FixtureAPITestCase): test_set_2 = self.other_dataset.sets.get(name="test") ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_1) ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_2) - run = self.dataset_process.worker_runs.create(version=self.recognizer) + run = self.dataset_process.worker_runs.create(version=self.recognizer, ttl=0) self.assertFalse(self.dataset_process.tasks.exists()) self.client.force_login(self.user) @@ -432,7 +434,7 @@ class TestProcessStart(FixtureAPITestCase): self.recognizer.save() self.assertEqual(self.recognizer.state, WorkerVersionState.Available) - run = self.workers_process.worker_runs.create(version=self.recognizer) + run = self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.assertFalse(self.workers_process.tasks.exists()) self.client.force_login(self.user) @@ -461,7 +463,7 @@ class TestProcessStart(FixtureAPITestCase): """ A user can specify a ponos farm to use for a process """ - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) farm = Farm.objects.get(name="Wheat farm") self.client.force_login(self.user) @@ -489,7 +491,7 @@ class TestProcessStart(FixtureAPITestCase): farm = Farm.objects.get(name="Wheat farm") get_default_farm_mock.return_value = farm - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.assertFalse(self.workers_process.tasks.exists()) self.client.force_login(self.user) @@ -516,7 +518,7 @@ class TestProcessStart(FixtureAPITestCase): @patch("arkindex.process.serializers.imports.get_default_farm") def test_default_farm_guest(self, get_default_farm_mock, is_available_mock): get_default_farm_mock.return_value = Farm.objects.first() - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -537,7 +539,7 @@ class TestProcessStart(FixtureAPITestCase): @patch("arkindex.ponos.models.Farm.is_available", return_value=False) def test_farm_guest(self, is_available_mock): - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(7): @@ -590,7 +592,7 @@ class TestProcessStart(FixtureAPITestCase): """ StartProcess should restrict the chunks to `settings.MAX_CHUNKS` """ - self.workers_process.worker_runs.create(version=self.recognizer) + self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -609,7 +611,7 @@ class TestProcessStart(FixtureAPITestCase): """ It should be possible to pass chunks parameters when starting a workers process """ - run = self.workers_process.worker_runs.create(version=self.recognizer) + run = self.workers_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(18): @@ -642,7 +644,7 @@ class TestProcessStart(FixtureAPITestCase): test_set_2 = self.dataset2.sets.get(name="test") ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_1) ProcessDatasetSet.objects.create(process=self.dataset_process, set=test_set_2) - run = self.dataset_process.worker_runs.create(version=self.recognizer) + run = self.dataset_process.worker_runs.create(version=self.recognizer, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(12): @@ -688,7 +690,7 @@ class TestProcessStart(FixtureAPITestCase): """ self.assertFalse(self.workers_process.use_cache) self.assertEqual(self.workers_process.activity_state, ActivityState.Disabled) - self.workers_process.worker_runs.create(version=self.version_gpu) + self.workers_process.worker_runs.create(version=self.version_gpu, ttl=0) self.client.force_login(self.user) @@ -721,10 +723,12 @@ class TestProcessStart(FixtureAPITestCase): name="some_config", configuration={"a": "b"}, ), + ttl=0, ) run_2 = self.workers_process.worker_runs.create( version=self.recognizer, parents=[run_1.id], + ttl=0, ) self.assertNotEqual(run_1.task_slug, run_2.task_slug) diff --git a/arkindex/process/tests/templates/__init__.py b/arkindex/process/tests/templates/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/arkindex/process/tests/test_templates.py b/arkindex/process/tests/templates/test_apply.py similarity index 58% rename from arkindex/process/tests/test_templates.py rename to arkindex/process/tests/templates/test_apply.py index 2d0c47c3c75eb56b3e87d8b1058010553ef843c0..0f59330309ab6bb8b803884d30bfbd1e85225b3a 100644 --- a/arkindex/process/tests/test_templates.py +++ b/arkindex/process/tests/templates/test_apply.py @@ -1,6 +1,7 @@ from datetime import datetime, timezone from unittest.mock import call, patch +from django.test import override_settings from rest_framework import status from rest_framework.reverse import reverse @@ -8,22 +9,18 @@ from arkindex.documents.models import Corpus from arkindex.process.models import ( ArkindexFeature, FeatureUsage, - Process, ProcessMode, WorkerConfiguration, - WorkerRun, WorkerVersion, WorkerVersionState, ) from arkindex.project.tests import FixtureAPITestCase from arkindex.training.models import Model, ModelVersionState -from arkindex.users.models import Role, User +from arkindex.users.models import Role -class TestTemplates(FixtureAPITestCase): - """ - Test workflows from Arkindex existing elements - """ +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) +class TestApplyTemplate(FixtureAPITestCase): @classmethod def setUpTestData(cls): @@ -34,9 +31,6 @@ class TestTemplates(FixtureAPITestCase): type=cls.private_corpus.types.create(slug="volume", folder=True), name="Hidden", ) - cls.private_process_template = cls.private_corpus.processes.create( - creator=cls.user, mode=ProcessMode.Workers - ) cls.private_template = cls.private_corpus.processes.create( creator=cls.user, mode=ProcessMode.Template ) @@ -50,9 +44,6 @@ class TestTemplates(FixtureAPITestCase): cls.process = cls.corpus.processes.create( creator=cls.user, mode=ProcessMode.Workers ) - cls.process_template = cls.corpus.processes.create( - creator=cls.user, mode=ProcessMode.Workers - ) cls.template = cls.corpus.processes.create( creator=cls.user, mode=ProcessMode.Template ) @@ -61,242 +52,30 @@ class TestTemplates(FixtureAPITestCase): name="A config", configuration={"param1": "value1"}, ) - cls.run_1 = cls.process_template.worker_runs.create( - version=cls.version_1, parents=[], configuration=cls.worker_configuration - ) - cls.process_template.worker_runs.create( - version=cls.version_2, - parents=[cls.run_1.id], - ) cls.model = Model.objects.create(name="moo") cls.model_version = cls.model.versions.create(state=ModelVersionState.Available) cls.template_run_1 = cls.template.worker_runs.create( - version=cls.version_1, parents=[], configuration=cls.worker_configuration + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=7200, ) cls.template_run_2 = cls.template.worker_runs.create( version=cls.version_2, parents=[cls.template_run_1.id], model_version=cls.model_version, + ttl=0, ) - cls.private_process_template.worker_runs.create( - version=cls.version_1, parents=[], configuration=cls.worker_configuration - ) cls.private_template.worker_runs.create( - version=cls.version_1, parents=[], configuration=cls.worker_configuration - ) - - def test_create(self): - self.client.force_login(self.user) - with self.assertNumQueries(9): - response = self.client.post( - reverse( - "api:create-process-template", kwargs={"pk": str(self.process_template.id)} - ), - {"name": "test_template"}, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["mode"], "template") - - child_run, parent_run = WorkerRun.objects.select_related("version__worker").filter(process__id=response.json()["id"]).order_by("version__worker__slug").all() - # Check dependency - self.assertListEqual(child_run.parents, [parent_run.id]) - - # Check that every new worker_run is the same as one of the template's - self.assertTrue(self.process_template.worker_runs.filter(version=parent_run.version).exists()) - self.assertTrue(self.process_template.worker_runs.filter(version=child_run.version).exists()) - - def test_create_use_gpu(self): - self.client.force_login(self.user) - - # Set invalid values: the version with disabled GPU usage gets a GPU - self.process_template.worker_runs.filter(version=self.version_1).update(use_gpu=True) - # A signal is trying to set use_gpu to the correct values, so we create then update to give no GPU to a version that requires a GPU - self.process_template.worker_runs.create(version=self.version_3) - self.process_template.worker_runs.filter(version=self.version_3).update(use_gpu=False) - - # Have two runs with a version that supports GPU usage, to test that both True and False are copied - self.version_2.gpu_usage = FeatureUsage.Supported - self.version_2.save() - self.process_template.worker_runs.create(version=self.version_2, configuration=self.worker_configuration, use_gpu=True) - - self.assertQuerySetEqual(( - self.process_template.worker_runs - .order_by("version__gpu_usage", "use_gpu") - .values_list("version__gpu_usage", "use_gpu") - ), [ - (FeatureUsage.Disabled, True), - (FeatureUsage.Required, False), - (FeatureUsage.Supported, False), - (FeatureUsage.Supported, True), - ]) - - with self.assertNumQueries(9): - response = self.client.post( - reverse( - "api:create-process-template", kwargs={"pk": str(self.process_template.id)} - ), - {"name": "test_template"}, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - self.assertQuerySetEqual(( - Process.objects.get(id=response.json()["id"]).worker_runs - .order_by("version__gpu_usage", "use_gpu") - .values_list("version__gpu_usage", "use_gpu") - ), [ - (FeatureUsage.Disabled, False), - (FeatureUsage.Required, True), - (FeatureUsage.Supported, False), - (FeatureUsage.Supported, True), - ]) - - def test_create_excludes_init_elements(self): - init_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.process_template.worker_runs.create(version=init_version) - self.run_1.parents = [init_run.id] - self.run_1.save() - - self.client.force_login(self.user) - with self.assertNumQueries(8): - response = self.client.post( - reverse( - "api:create-process-template", kwargs={"pk": str(self.process_template.id)} - ), - {"name": "test_template"}, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["mode"], "template") - - new_process_id = response.json()["id"] - - # Only two worker runs for the created template - self.assertEqual(self.process_template.worker_runs.count(), 3) - self.assertEqual(Process.objects.get(id=new_process_id).worker_runs.count(), 2) - - # No elements initialisation run in the created template - self.assertFalse(WorkerRun.objects.filter(process_id=new_process_id, version=init_version).exists()) - - child_run, parent_run = WorkerRun.objects.select_related("version__worker").filter(process__id=new_process_id).order_by("version__worker__slug").all() - # Check dependencies - self.assertListEqual(parent_run.parents, []) - self.assertListEqual(child_run.parents, [parent_run.id]) - - # Check that every new worker_run is the same as one of the template's - self.assertTrue(self.process_template.worker_runs.filter(version=parent_run.version).exists()) - self.assertTrue(self.process_template.worker_runs.filter(version=child_run.version).exists()) - - def test_create_requires_authentication(self): - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(self.process_template.id)}) - ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - self.assertDictEqual( - response.json(), {"detail": "Authentication credentials were not provided."} - ) - - def test_create_requires_worker_runs(self): - process_no_worker_runs = self.corpus.processes.create( - creator=self.user, mode=ProcessMode.Workers - ) - self.client.force_login(self.user) - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(process_no_worker_runs.id)}) - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertListEqual( - response.json(), - ["This process does not have any worker runs."], - ) - - def test_create_requires_verified_user(self): - new_user = User.objects.create(email="new@test.fr", verified_email=False) - self.client.force_login(new_user) - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(self.process_template.id)}) - ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - @patch("arkindex.project.mixins.get_max_level", return_value=Role.Guest.value) - def test_create_requires_contributor_access_rights_process(self, get_max_level_mock): - self.client.force_login(self.user) - - with self.assertNumQueries(4): - response = self.client.post( - reverse( - "api:create-process-template", - kwargs={"pk": str(self.private_process_template.id)}, - ) - ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - self.assertDictEqual( - response.json(), - {"detail": "You do not have a contributor access to this process."}, + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=0, ) - self.assertEqual(get_max_level_mock.call_count, 1) - self.assertEqual(get_max_level_mock.call_args, call(self.user, self.private_corpus)) - - @patch("arkindex.project.mixins.get_max_level", return_value=Role.Contributor.value) - @patch("arkindex.users.utils.get_max_level", return_value=Role.Guest.value) - def test_create_requires_access_rights_all_workers(self, worker_max_level_mock, process_max_level_mock): - self.client.force_login(self.user) - - with self.assertNumQueries(4): - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(self.private_process_template.id)}) - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - self.assertEqual( - response.json(), - ["You do not have an execution access to every worker of this process."], - ) - self.assertEqual(process_max_level_mock.call_count, 1) - self.assertEqual(process_max_level_mock.call_args, call(self.user, self.private_corpus)) - self.assertEqual(worker_max_level_mock.call_count, 1) - self.assertEqual(worker_max_level_mock.call_args, call(self.user, self.worker_1)) - - def test_create_unsupported_mode(self): - self.client.force_login(self.user) - for mode in set(ProcessMode) - {ProcessMode.Workers, ProcessMode.Dataset, ProcessMode.Local}: - with self.subTest(mode=mode): - self.process.mode = mode - self.process.save() - - with self.assertNumQueries(4): - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(self.template.id)}), - data={"process_id": str(self.process.id)}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - self.assertEqual(response.json(), ["Templates can only be created from Workers or Dataset processes."]) - self.process.refresh_from_db() - self.assertEqual(self.process.template, None) - - def test_create_local(self): - self.client.force_login(self.user) - local_process = self.user.processes.get(mode=ProcessMode.Local) - - with self.assertNumQueries(4): - response = self.client.post( - reverse("api:create-process-template", kwargs={"pk": str(self.template.id)}), - data={"process_id": str(local_process.id)}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - self.assertEqual(response.json(), ["Templates can only be created from Workers or Dataset processes."]) - local_process.refresh_from_db() - self.assertEqual(local_process.template, None) @patch("arkindex.project.mixins.get_max_level", return_value=Role.Guest.value) - def test_apply_requires_contributor_rights_on_template(self, get_max_level_mock): + def test_requires_contributor_rights_on_template(self, get_max_level_mock): """Raise 403 if the user does not have rights on template """ self.client.force_login(self.user) @@ -316,7 +95,7 @@ class TestTemplates(FixtureAPITestCase): self.assertEqual(get_max_level_mock.call_args, call(self.user, self.private_corpus)) @patch("arkindex.project.mixins.get_max_level", side_effect=[Role.Contributor.value, Role.Guest.value]) - def test_apply_requires_contributor_rights_on_process(self, get_max_level_mock): + def test_requires_contributor_rights_on_process(self, get_max_level_mock): """Raise 403 if the user does not have rights on the target process """ self.client.force_login(self.user) @@ -339,7 +118,7 @@ class TestTemplates(FixtureAPITestCase): @patch("arkindex.project.mixins.get_max_level", return_value=Role.Contributor.value) @patch("arkindex.users.utils.get_max_level", return_value=Role.Guest.value) - def test_apply_requires_access_rights_all_workers(self, worker_max_level_mock, process_max_level_mock): + def test_requires_access_rights_all_workers(self, worker_max_level_mock, process_max_level_mock): """ Raise 403 if the user does not have rights on all workers concerned """ @@ -361,7 +140,7 @@ class TestTemplates(FixtureAPITestCase): self.assertEqual(worker_max_level_mock.call_count, 1) self.assertEqual(worker_max_level_mock.call_args, call(self.user, self.worker_1)) - def test_apply_already_applied(self): + def test_already_applied(self): """Raise 400 if the process already has a template attached """ process_with_template = self.corpus.processes.create( @@ -378,7 +157,7 @@ class TestTemplates(FixtureAPITestCase): {"process_id": ["A template has already been applied to this process."]}, ) - def test_apply_without_a_template(self): + def test_without_a_template(self): """ Raise 404 if the template is not a template """ @@ -391,8 +170,9 @@ class TestTemplates(FixtureAPITestCase): def test_apply(self): self.assertIsNotNone(self.version_2.docker_image_iid) + self.assertIsNone(self.corpus.maximum_task_ttl) self.client.force_login(self.user) - with self.assertNumQueries(11): + with self.assertNumQueries(10): response = self.client.post( reverse("api:apply-process-template", kwargs={"pk": str(self.template.id)}), data={"process_id": str(self.process.id)}, @@ -410,16 +190,52 @@ class TestTemplates(FixtureAPITestCase): self.assertIsNone(parent_run.model_version_id) self.assertEqual(parent_run.configuration_id, self.worker_configuration.id) self.assertListEqual(parent_run.parents, []) + # This had a 7200 seconds TTL, but is limited by the instance limit + self.assertEqual(parent_run.ttl, 3600) self.assertEqual(child_run.process_id, self.process.id) self.assertEqual(child_run.version_id, self.version_2.id) self.assertEqual(child_run.model_version_id, self.model_version.id) self.assertIsNone(child_run.configuration_id) self.assertListEqual(child_run.parents, [parent_run.id]) + # This had an infinite TTL, but is limited by the instance limit + self.assertEqual(child_run.ttl, 3600) + + def test_unlimited_ttl(self): + self.corpus.maximum_task_ttl = 0 + self.corpus.save() + self.client.force_login(self.user) - def test_apply_excludes_init_elements(self): + with self.assertNumQueries(10): + response = self.client.post( + reverse("api:apply-process-template", kwargs={"pk": str(self.template.id)}), + data={"process_id": str(self.process.id)}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + child_run, parent_run = self.process.worker_runs.order_by("version__worker__slug") + self.assertEqual(parent_run.ttl, 7200) + self.assertEqual(child_run.ttl, 0) + + def test_corpus_limited_ttl(self): + self.corpus.maximum_task_ttl = 9000 + self.corpus.save() + self.client.force_login(self.user) + + with self.assertNumQueries(10): + response = self.client.post( + reverse("api:apply-process-template", kwargs={"pk": str(self.template.id)}), + data={"process_id": str(self.process.id)}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + child_run, parent_run = self.process.worker_runs.order_by("version__worker__slug") + self.assertEqual(parent_run.ttl, 7200) + self.assertEqual(child_run.ttl, 9000) + + def test_excludes_init_elements(self): init_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) - init_run = self.template.worker_runs.create(version=init_version) + init_run = self.template.worker_runs.create(version=init_version, ttl=0) self.template_run_1.parents = [init_run.id] self.template_run_1.save() @@ -459,7 +275,7 @@ class TestTemplates(FixtureAPITestCase): # No elements initialisation run in the created process self.assertFalse(self.process.worker_runs.filter(version=init_version).exists()) - def test_apply_delete_previous_worker_runs(self): + def test_delete_previous_worker_runs(self): self.client.force_login(self.user) # Create a process with one worker run already process = self.corpus.processes.create( @@ -467,10 +283,10 @@ class TestTemplates(FixtureAPITestCase): ) process.worker_runs.create( version=self.version_2, - parents=[], + ttl=0, ) # Apply a template that has two other worker runs - with self.assertNumQueries(13): + with self.assertNumQueries(12): response = self.client.post( reverse("api:apply-process-template", kwargs={"pk": str(self.template.id)}), data={"process_id": str(process.id)}, @@ -496,19 +312,19 @@ class TestTemplates(FixtureAPITestCase): self.assertIsNone(child_run.configuration_id) self.assertListEqual(child_run.parents, [parent_run.id]) - def test_apply_use_gpu(self): + def test_use_gpu(self): self.client.force_login(self.user) # Set invalid values: the version with disabled GPU usage gets a GPU self.template.worker_runs.filter(version=self.version_1).update(use_gpu=True) # A signal is trying to set use_gpu to the correct values, so we create then update to give no GPU to a version that requires a GPU - self.template.worker_runs.create(version=self.version_3) + self.template.worker_runs.create(version=self.version_3, ttl=0) self.template.worker_runs.filter(version=self.version_3).update(use_gpu=False) # Have two runs with a version that supports GPU usage, to test that both True and False are copied self.version_2.gpu_usage = FeatureUsage.Supported self.version_2.save() - self.template.worker_runs.create(version=self.version_2, configuration=self.worker_configuration, use_gpu=True) + self.template.worker_runs.create(version=self.version_2, configuration=self.worker_configuration, use_gpu=True, ttl=0) self.assertQuerySetEqual(( self.template.worker_runs @@ -521,7 +337,7 @@ class TestTemplates(FixtureAPITestCase): (FeatureUsage.Supported, True), ]) - with self.assertNumQueries(11): + with self.assertNumQueries(10): response = self.client.post( reverse("api:apply-process-template", kwargs={"pk": str(self.template.id)}), {"process_id": str(self.process.id)}, @@ -541,7 +357,7 @@ class TestTemplates(FixtureAPITestCase): ]) - def test_apply_unavailable_worker_version(self): + def test_unavailable_worker_version(self): self.version_2.state = WorkerVersionState.Error self.version_2.save() self.client.force_login(self.user) @@ -555,7 +371,7 @@ class TestTemplates(FixtureAPITestCase): self.process.refresh_from_db() self.assertEqual(self.process.template, None) - def test_apply_archived_worker(self): + def test_archived_worker(self): self.worker_2.archived = datetime.now(timezone.utc) self.worker_2.save() self.client.force_login(self.user) @@ -569,7 +385,7 @@ class TestTemplates(FixtureAPITestCase): self.process.refresh_from_db() self.assertEqual(self.process.template, None) - def test_apply_unavailable_model_version(self): + def test_unavailable_model_version(self): self.model_version.state = ModelVersionState.Error self.model_version.save() self.client.force_login(self.user) @@ -586,7 +402,7 @@ class TestTemplates(FixtureAPITestCase): self.process.refresh_from_db() self.assertEqual(self.process.template, None) - def test_apply_archived_model(self): + def test_archived_model(self): self.model.archived = datetime.now(timezone.utc) self.model.save() self.client.force_login(self.user) @@ -602,7 +418,7 @@ class TestTemplates(FixtureAPITestCase): self.process.refresh_from_db() self.assertEqual(self.process.template, None) - def test_apply_unsupported_mode(self): + def test_unsupported_mode(self): self.client.force_login(self.user) for mode in set(ProcessMode) - {ProcessMode.Workers, ProcessMode.Dataset, ProcessMode.Local}: with self.subTest(mode=mode): @@ -622,7 +438,7 @@ class TestTemplates(FixtureAPITestCase): self.process.refresh_from_db() self.assertEqual(self.process.template, None) - def test_apply_local(self): + def test_local(self): self.client.force_login(self.user) local_process = self.user.processes.get(mode=ProcessMode.Local) @@ -638,15 +454,3 @@ class TestTemplates(FixtureAPITestCase): }) local_process.refresh_from_db() self.assertEqual(local_process.template, None) - - def test_list_ignores_configuration_filter(self): - self.client.force_login(self.user) - with self.assertNumQueries(4): - response = self.client.get( - reverse("api:process-list"), - data={"mode": "template", "with_tasks": True}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - # The 'with_tasks' filter should be ignored and some templates should be returned - # If it wasn't, no template are returned because none are configured - self.assertTrue(len(response.json()) > 0) diff --git a/arkindex/process/tests/templates/test_create.py b/arkindex/process/tests/templates/test_create.py new file mode 100644 index 0000000000000000000000000000000000000000..8f819de010092c3714c3e15cb3715a7b22fd9ca8 --- /dev/null +++ b/arkindex/process/tests/templates/test_create.py @@ -0,0 +1,309 @@ +from unittest.mock import call, patch + +from django.test import override_settings +from rest_framework import status +from rest_framework.reverse import reverse + +from arkindex.documents.models import Corpus +from arkindex.process.models import ( + ArkindexFeature, + FeatureUsage, + Process, + ProcessMode, + WorkerConfiguration, + WorkerRun, + WorkerVersion, +) +from arkindex.project.tests import FixtureAPITestCase +from arkindex.training.models import Model, ModelVersionState +from arkindex.users.models import Role, User + + +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) +class TestCreateTemplate(FixtureAPITestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.private_corpus = Corpus.objects.create(name="private") + cls.private_corpus.memberships.create(user=cls.user, level=Role.Contributor.value) + cls.private_corpus.elements.create( + type=cls.private_corpus.types.create(slug="volume", folder=True), + name="Hidden", + ) + cls.private_process_template = cls.private_corpus.processes.create( + creator=cls.user, mode=ProcessMode.Workers + ) + cls.private_template = cls.private_corpus.processes.create( + creator=cls.user, mode=ProcessMode.Template + ) + # Workers ProcessMode + cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") + cls.worker_1 = cls.version_1.worker + cls.version_2 = WorkerVersion.objects.get(worker__slug="dla") + cls.worker_2 = cls.version_2.worker + cls.version_3 = WorkerVersion.objects.get(worker__slug="worker-gpu") + + cls.process = cls.corpus.processes.create( + creator=cls.user, mode=ProcessMode.Workers + ) + cls.process_template = cls.corpus.processes.create( + creator=cls.user, mode=ProcessMode.Workers + ) + cls.template = cls.corpus.processes.create( + creator=cls.user, mode=ProcessMode.Template + ) + cls.worker_configuration = WorkerConfiguration.objects.create( + worker=cls.version_1.worker, + name="A config", + configuration={"param1": "value1"}, + ) + cls.run_1 = cls.process_template.worker_runs.create( + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=7200, + ) + cls.run_2 = cls.process_template.worker_runs.create( + version=cls.version_2, + parents=[cls.run_1.id], + ttl=0, + ) + + cls.model = Model.objects.create(name="moo") + cls.model_version = cls.model.versions.create(state=ModelVersionState.Available) + + cls.template_run_1 = cls.template.worker_runs.create( + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=0, + ) + cls.template_run_2 = cls.template.worker_runs.create( + version=cls.version_2, + parents=[cls.template_run_1.id], + model_version=cls.model_version, + ttl=0, + ) + + cls.private_process_template.worker_runs.create( + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=0, + ) + cls.private_template.worker_runs.create( + version=cls.version_1, + configuration=cls.worker_configuration, + ttl=0, + ) + + def test_create(self): + self.client.force_login(self.user) + with self.assertNumQueries(8): + response = self.client.post( + reverse( + "api:create-process-template", kwargs={"pk": str(self.process_template.id)} + ), + {"name": "test_template"}, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.json()["mode"], "template") + + template_process = Process.objects.get(id=response.json()["id"]) + self.assertEqual(template_process.mode, ProcessMode.Template) + parent_run = template_process.worker_runs.get(version=self.version_1) + + # Check that every new worker_run is the same as one of the template's + self.assertQuerySetEqual(( + template_process.worker_runs + .order_by("version__worker__slug") + .values_list("version_id", "model_version_id", "configuration_id", "parents", "ttl") + ), [ + (self.version_2.id, None, None, [parent_run.id], 0), + (self.version_1.id, None, self.worker_configuration.id, [], 7200), + ]) + + def test_use_gpu(self): + self.client.force_login(self.user) + + # Set invalid values: the version with disabled GPU usage gets a GPU + self.process_template.worker_runs.filter(version=self.version_1).update(use_gpu=True) + # A signal is trying to set use_gpu to the correct values, so we create then update to give no GPU to a version that requires a GPU + self.process_template.worker_runs.create(version=self.version_3, ttl=0) + self.process_template.worker_runs.filter(version=self.version_3).update(use_gpu=False) + + # Have two runs with a version that supports GPU usage, to test that both True and False are copied + self.version_2.gpu_usage = FeatureUsage.Supported + self.version_2.save() + self.process_template.worker_runs.create(version=self.version_2, configuration=self.worker_configuration, use_gpu=True, ttl=0) + + self.assertQuerySetEqual(( + self.process_template.worker_runs + .order_by("version__gpu_usage", "use_gpu") + .values_list("version__gpu_usage", "use_gpu") + ), [ + (FeatureUsage.Disabled, True), + (FeatureUsage.Required, False), + (FeatureUsage.Supported, False), + (FeatureUsage.Supported, True), + ]) + + with self.assertNumQueries(8): + response = self.client.post( + reverse( + "api:create-process-template", kwargs={"pk": str(self.process_template.id)} + ), + {"name": "test_template"}, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + self.assertQuerySetEqual(( + Process.objects.get(id=response.json()["id"]).worker_runs + .order_by("version__gpu_usage", "use_gpu") + .values_list("version__gpu_usage", "use_gpu") + ), [ + (FeatureUsage.Disabled, False), + (FeatureUsage.Required, True), + (FeatureUsage.Supported, False), + (FeatureUsage.Supported, True), + ]) + + def test_excludes_init_elements(self): + init_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements) + init_run = self.process_template.worker_runs.create(version=init_version, ttl=0) + self.run_1.parents = [init_run.id] + self.run_1.save() + + self.client.force_login(self.user) + with self.assertNumQueries(8): + response = self.client.post( + reverse( + "api:create-process-template", kwargs={"pk": str(self.process_template.id)} + ), + {"name": "test_template"}, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.json()["mode"], "template") + + new_process_id = response.json()["id"] + + # Only two worker runs for the created template + self.assertEqual(self.process_template.worker_runs.count(), 3) + self.assertEqual(Process.objects.get(id=new_process_id).worker_runs.count(), 2) + + # No elements initialisation run in the created template + self.assertFalse(WorkerRun.objects.filter(process_id=new_process_id, version=init_version).exists()) + + child_run, parent_run = WorkerRun.objects.select_related("version__worker").filter(process__id=new_process_id).order_by("version__worker__slug").all() + # Check dependencies + self.assertListEqual(parent_run.parents, []) + self.assertListEqual(child_run.parents, [parent_run.id]) + + # Check that every new worker_run is the same as one of the template's + self.assertTrue(self.process_template.worker_runs.filter(version=parent_run.version).exists()) + self.assertTrue(self.process_template.worker_runs.filter(version=child_run.version).exists()) + + def test_requires_authentication(self): + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(self.process_template.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual( + response.json(), {"detail": "Authentication credentials were not provided."} + ) + + def test_requires_worker_runs(self): + process_no_worker_runs = self.corpus.processes.create( + creator=self.user, mode=ProcessMode.Workers + ) + self.client.force_login(self.user) + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(process_no_worker_runs.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertListEqual( + response.json(), + ["This process does not have any worker runs."], + ) + + def test_requires_verified_user(self): + new_user = User.objects.create(email="new@test.fr", verified_email=False) + self.client.force_login(new_user) + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(self.process_template.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + @patch("arkindex.project.mixins.get_max_level", return_value=Role.Guest.value) + def test_requires_contributor_access_rights_process(self, get_max_level_mock): + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse( + "api:create-process-template", + kwargs={"pk": str(self.private_process_template.id)}, + ) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual( + response.json(), + {"detail": "You do not have a contributor access to this process."}, + ) + self.assertEqual(get_max_level_mock.call_count, 1) + self.assertEqual(get_max_level_mock.call_args, call(self.user, self.private_corpus)) + + @patch("arkindex.project.mixins.get_max_level", return_value=Role.Contributor.value) + @patch("arkindex.users.utils.get_max_level", return_value=Role.Guest.value) + def test_requires_access_rights_all_workers(self, worker_max_level_mock, process_max_level_mock): + self.client.force_login(self.user) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(self.private_process_template.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual( + response.json(), + ["You do not have an execution access to every worker of this process."], + ) + self.assertEqual(process_max_level_mock.call_count, 1) + self.assertEqual(process_max_level_mock.call_args, call(self.user, self.private_corpus)) + self.assertEqual(worker_max_level_mock.call_count, 1) + self.assertEqual(worker_max_level_mock.call_args, call(self.user, self.worker_1)) + + def test_unsupported_mode(self): + self.client.force_login(self.user) + for mode in set(ProcessMode) - {ProcessMode.Workers, ProcessMode.Dataset, ProcessMode.Local}: + with self.subTest(mode=mode): + self.process.mode = mode + self.process.save() + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(self.template.id)}), + data={"process_id": str(self.process.id)}, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["Templates can only be created from Workers or Dataset processes."]) + self.process.refresh_from_db() + self.assertEqual(self.process.template, None) + + def test_local(self): + self.client.force_login(self.user) + local_process = self.user.processes.get(mode=ProcessMode.Local) + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:create-process-template", kwargs={"pk": str(self.template.id)}), + data={"process_id": str(local_process.id)}, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), ["Templates can only be created from Workers or Dataset processes."]) + local_process.refresh_from_db() + self.assertEqual(local_process.template, None) diff --git a/arkindex/process/tests/test_corpus_worker_runs.py b/arkindex/process/tests/test_corpus_worker_runs.py index 3e7cadf540c8062a426cc05060e958e269888be3..e93e20bdf282c7f6149e3655b40532a70b2cdee1 100644 --- a/arkindex/process/tests/test_corpus_worker_runs.py +++ b/arkindex/process/tests/test_corpus_worker_runs.py @@ -29,7 +29,7 @@ class TestCorpusWorkerRuns(FixtureAPITestCase): cls.run_1 = WorkerRun.objects.create( process=cls.process, version=cls.dla_worker_version, - parents=[], + ttl=0, has_results=True ) @@ -57,12 +57,12 @@ class TestCorpusWorkerRuns(FixtureAPITestCase): cls.run_2 = WorkerRun.objects.create( process=cls.private_process, version=cls.reco_worker_version, - parents=[], + ttl=0, ) cls.run_3 = WorkerRun.objects.create( process=cls.private_process, version=cls.dla_worker_version, - parents=[], + ttl=0, has_results=True ) @@ -141,6 +141,7 @@ class TestCorpusWorkerRuns(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 0, }, { "id": str(self.run_3.id), @@ -183,5 +184,6 @@ class TestCorpusWorkerRuns(FixtureAPITestCase): }, "use_gpu": False, "summary": "Worker Document layout analyser @ version 1", + "ttl": 0, } ]) diff --git a/arkindex/process/tests/test_elements_initialisation.py b/arkindex/process/tests/test_elements_initialisation.py index 4d4e5cc13a1bfd63d05b6a4c9240e4290b8f6891..0ec36078b5955b4561276a2b9a24efe37c4693bb 100644 --- a/arkindex/process/tests/test_elements_initialisation.py +++ b/arkindex/process/tests/test_elements_initialisation.py @@ -44,11 +44,12 @@ class TestElementsInit(FixtureAPITestCase): ) init_run = process.worker_runs.create( version=self.init_elements_version, - parents=[] + ttl=0, ) worker_run = process.worker_runs.create( version=self.reco_version, - parents=[init_run.id] + parents=[init_run.id], + ttl=0, ) with self.assertNumQueries(16): @@ -85,8 +86,8 @@ class TestElementsInit(FixtureAPITestCase): then one is created when the process is started. """ self.client.force_login(self.user) - reco_run = self.process.worker_runs.create(version=self.reco_version) - dla_run = self.process.worker_runs.create(version=self.dla_version, parents=[reco_run.id]) + reco_run = self.process.worker_runs.create(version=self.reco_version, ttl=0) + dla_run = self.process.worker_runs.create(version=self.dla_version, parents=[reco_run.id], ttl=0) with self.assertNumQueries(18): response = self.client.post( diff --git a/arkindex/process/tests/test_managers.py b/arkindex/process/tests/test_managers.py index 9072b0ad39e6da2e0851afd0e2635d7bfe4a45d3..aa5c3c12f99c5bd64cfacfb6116494758403e7be 100644 --- a/arkindex/process/tests/test_managers.py +++ b/arkindex/process/tests/test_managers.py @@ -28,19 +28,23 @@ class TestManagers(FixtureTestCase): cls.worker_run_1 = cls.worker_version.worker_runs.create( process=cls.corpus.processes.create(mode=ProcessMode.Workers, creator=cls.user), + ttl=0, ) cls.worker_run_2 = cls.worker_version.worker_runs.create( process=cls.corpus.processes.create(mode=ProcessMode.Workers, creator=cls.user), configuration=cls.worker_configuration, + ttl=0, ) cls.worker_run_3 = cls.worker_version.worker_runs.create( process=cls.corpus.processes.create(mode=ProcessMode.Workers, creator=cls.user), model_version=cls.model_version, + ttl=0, ) cls.worker_run_4 = cls.worker_version.worker_runs.create( process=cls.corpus.processes.create(mode=ProcessMode.Workers, creator=cls.user), model_version=cls.model_version, configuration=cls.worker_configuration, + ttl=0, ) def test_corpus_worker_version_rebuild(self): diff --git a/arkindex/process/tests/test_process_dataset_sets.py b/arkindex/process/tests/test_process_dataset_sets.py index 373dc6dc0a4e04b1fb0d9e72512637b12f24413c..ae7b74de7c0d97a1785af3abb4b51e005b28dd69 100644 --- a/arkindex/process/tests/test_process_dataset_sets.py +++ b/arkindex/process/tests/test_process_dataset_sets.py @@ -270,7 +270,7 @@ class TestProcessDatasetSets(FixtureAPITestCase): def test_create_started(self): self.client.force_login(self.test_user) - self.dataset_process.tasks.create(run=0, depth=0, slug="makrout") + self.dataset_process.tasks.create(run=0, depth=0, slug="makrout", ttl=0) test_set = self.dataset2.sets.get(name="test") with self.assertNumQueries(6): @@ -425,7 +425,7 @@ class TestProcessDatasetSets(FixtureAPITestCase): def test_destroy_started(self): train_set = self.dataset1.sets.get(name="train") self.client.force_login(self.test_user) - self.dataset_process.tasks.create(run=0, depth=0, slug="makrout") + self.dataset_process.tasks.create(run=0, depth=0, slug="makrout", ttl=0) with self.assertNumQueries(4): response = self.client.delete( diff --git a/arkindex/process/tests/test_process_elements.py b/arkindex/process/tests/test_process_elements.py index dc69c9734f620296462c965b3e41ddf9368680aa..0881b4fe1cf77cc96e507d7831c58e916329164d 100644 --- a/arkindex/process/tests/test_process_elements.py +++ b/arkindex/process/tests/test_process_elements.py @@ -537,13 +537,14 @@ class TestProcessElements(FixtureAPITestCase): for element in elements ]) - def test_non_workers(self): + def test_non_supported_modes(self): self.process.load_children = True self.client.force_login(self.superuser) - # Every mode other than Workers never returns elements + # Every mode other than Workers and Export never returns elements modes = list(ProcessMode) modes.remove(ProcessMode.Workers) + modes.remove(ProcessMode.Export) # Local processes do not have a corpus, causing HTTP 404 modes.remove(ProcessMode.Local) @@ -576,33 +577,41 @@ class TestProcessElements(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertEqual(response.json(), {"detail": "No Process matches the given query."}) - def test_workers_mode(self): - # The Workers mode returns some elements + def test_supported_process_modes(self): + """ + ListProcessElements returns elements when the process is of mode Workers or Export + """ + supported_modes = [ProcessMode.Workers, ProcessMode.Export] + elements = Element.objects.filter(corpus=self.private_corpus).order_by("id") # This tests only supports up to 20 elements self.assertLessEqual(elements.count(), 20) - self.process.corpus = self.private_corpus - self.process.load_children = True - self.process.save() - self.client.force_login(self.superuser) - with self.assertNumQueries(6): - response = self.client.get(reverse("api:process-elements-list", kwargs={"pk": self.process.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) + for mode in supported_modes: + with self.subTest(mode=mode): + self.process.corpus = self.private_corpus + self.process.load_children = True + self.process.mode = mode + self.process.save() + self.client.force_login(self.superuser) - data = response.json() - self.assertIsNone(data["count"]) - self.assertIsNone(data["previous"]) - self.assertIsNone(data["next"]) - self.assertListEqual(data["results"], [ - { - "id": str(element.id), - "type_id": str(element.type_id), - "name": element.name, - "confidence": element.confidence, - } - for element in elements - ]) + with self.assertNumQueries(6): + response = self.client.get(reverse("api:process-elements-list", kwargs={"pk": self.process.id})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + self.assertIsNone(data["count"]) + self.assertIsNone(data["previous"]) + self.assertIsNone(data["next"]) + self.assertListEqual(data["results"], [ + { + "id": str(element.id), + "type_id": str(element.type_id), + "name": element.name, + "confidence": element.confidence, + } + for element in elements + ]) def test_list_elements_cursor_pagination(self): """ diff --git a/arkindex/process/tests/test_signals.py b/arkindex/process/tests/test_signals.py index 69c86d4e0d40809b8d7ae47ca0a9e8f25d7a4e16..1c6323c2a153924f0e80eb473fe9543e5b36b8b2 100644 --- a/arkindex/process/tests/test_signals.py +++ b/arkindex/process/tests/test_signals.py @@ -40,7 +40,7 @@ class TestSignals(FixtureAPITestCase): ) cls.run_1 = cls.process_1.worker_runs.create( version=cls.version_1, - parents=[], + ttl=0, ) cls.process_2 = cls.corpus.processes.create( creator=cls.user, @@ -51,7 +51,7 @@ class TestSignals(FixtureAPITestCase): def test_worker_run_check_parents_recursive(self): run_2 = self.process_1.worker_runs.create( version=self.version_2, - parents=[], + ttl=0, ) self.assertListEqual(run_2.parents, []) @@ -68,6 +68,7 @@ class TestSignals(FixtureAPITestCase): self.process_2.worker_runs.create( version=self.version_2, parents=[str(self.run_1.id)], + ttl=0, ) self.process_1.refresh_from_db() @@ -79,6 +80,7 @@ class TestSignals(FixtureAPITestCase): self.process_2.worker_runs.create( version=self.version_2, parents=["12341234-1234-1234-1234-123412341234"], + ttl=0, ) self.process_1.refresh_from_db() @@ -131,6 +133,7 @@ class TestSignals(FixtureAPITestCase): run_2 = self.process_1.worker_runs.create( version=self.version_2, parents=[self.run_1.id], + ttl=0, ) self.run_1.parents = [run_2.id] @@ -172,18 +175,22 @@ class TestSignals(FixtureAPITestCase): run_2 = self.process_1.worker_runs.create( version=self.version_2, parents=[self.run_1.id], + ttl=0, ) run_3 = self.process_1.worker_runs.create( version=version_3, parents=[run_2.id], + ttl=0, ) run_4 = self.process_1.worker_runs.create( version=version_4, parents=[run_3.id], + ttl=0, ) run_5 = self.process_1.worker_runs.create( version=version_5, parents=[run_4.id], + ttl=0, ) self.run_1.parents = [run_5.id] @@ -204,10 +211,12 @@ class TestSignals(FixtureAPITestCase): run_2 = self.process_1.worker_runs.create( version=self.version_2, parents=[self.run_1.id], + ttl=0, ) run_3 = self.process_1.worker_runs.create( version=version_3, parents=[run_2.id], + ttl=0, ) run_3.parents.append(self.run_1.id) @@ -222,6 +231,7 @@ class TestSignals(FixtureAPITestCase): run_2 = self.process_1.worker_runs.create( version=self.version_2, parents=[self.run_1.id], + ttl=0, ) self.assertEqual(len(self.process_1.worker_runs.all()), 2) @@ -235,6 +245,7 @@ class TestSignals(FixtureAPITestCase): run = self.process_1.worker_runs.create( version=self.version_2, parents=[self.run_1.id], + ttl=0, ) self.assertIsNotNone(run.summary) diff --git a/arkindex/process/tests/test_user_workerruns.py b/arkindex/process/tests/test_user_workerruns.py index fa2956ae7cdbc83d33f49503d82138c70802fe71..f900d5e4f1d5fed78d812078af114fdd9e7774b8 100644 --- a/arkindex/process/tests/test_user_workerruns.py +++ b/arkindex/process/tests/test_user_workerruns.py @@ -1,6 +1,7 @@ from datetime import datetime, timezone from unittest.mock import call, patch +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -18,6 +19,7 @@ from arkindex.training.models import Model, ModelVersion, ModelVersionState from arkindex.users.models import Right, Role, User +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) class TestUserWorkerRuns(FixtureAPITestCase): @classmethod def setUpTestData(cls): @@ -65,7 +67,7 @@ class TestUserWorkerRuns(FixtureAPITestCase): test_local_run = WorkerRun.objects.create( process=self.local_process, version=self.version_1, - parents=[], + ttl=0, ) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -112,7 +114,8 @@ class TestUserWorkerRuns(FixtureAPITestCase): "archived": False, } }, - "use_gpu": False + "use_gpu": False, + "ttl": 0, }, { "configuration": None, "id": str(self.local_run.id), @@ -153,7 +156,8 @@ class TestUserWorkerRuns(FixtureAPITestCase): "archived": False, } }, - "use_gpu": False + "use_gpu": False, + "ttl": 0, }]) def test_list_user_runs_only_own_runs(self): @@ -165,7 +169,7 @@ class TestUserWorkerRuns(FixtureAPITestCase): test_local_run = WorkerRun.objects.create( process=test_local_process, version=self.version_1, - parents=[], + ttl=0, ) assert WorkerRun.objects.filter(process__mode=ProcessMode.Local, process__creator=write_user).count() == 1 self.client.force_login(self.user) @@ -250,7 +254,9 @@ class TestUserWorkerRuns(FixtureAPITestCase): "state": "unscheduled", "use_cache": False, }, - "use_gpu": False + "use_gpu": False, + # The TTL is always 0 for user worker runs + "ttl": 0, }) def test_create_user_run_no_local_process(self): @@ -464,7 +470,8 @@ class TestUserWorkerRuns(FixtureAPITestCase): "state": "unscheduled", "use_cache": False, }, - "use_gpu": False + "use_gpu": False, + "ttl": 0, }) def test_create_user_run_duplicate(self): diff --git a/arkindex/process/tests/test_worker_types.py b/arkindex/process/tests/test_worker_types.py index 120d837740f5f3005980332154c4e2c1e424614c..e79e04ebb833fec08634673e3e7b14d728da4b93 100644 --- a/arkindex/process/tests/test_worker_types.py +++ b/arkindex/process/tests/test_worker_types.py @@ -18,6 +18,7 @@ class TestWorkerTypes(FixtureAPITestCase): cls.worker_type_worker = WorkerType.objects.get(slug="worker") cls.worker_type_import = WorkerType.objects.get(slug="import") cls.worker_type_custom = WorkerType.objects.get(slug="custom") + cls.worker_type_export = WorkerType.objects.get(slug="export") cls.init_type = WorkerType.objects.get(slug="init") def test_list_requires_login(self): @@ -50,7 +51,7 @@ class TestWorkerTypes(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { - "count": 6, + "count": 7, "previous": None, "next": None, "number": 1, @@ -60,6 +61,11 @@ class TestWorkerTypes(FixtureAPITestCase): "id": str(self.worker_type_custom.id), "slug": "custom", }, + { + "id": str(self.worker_type_export.id), + "slug": "export", + "display_name": "Document export" + }, { "id": str(self.worker_type_dla.id), "slug": "dla", diff --git a/arkindex/process/tests/worker_activity/test_bulk_insert.py b/arkindex/process/tests/worker_activity/test_bulk_insert.py index 73c0412ee9f33e22f840852f830e0111f74df81c..026292cd5476b6ed60b138c13dbf4dd8de848013 100644 --- a/arkindex/process/tests/worker_activity/test_bulk_insert.py +++ b/arkindex/process/tests/worker_activity/test_bulk_insert.py @@ -26,7 +26,11 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): corpus=cls.corpus, farm=Farm.objects.first(), ) - cls.configuration = WorkerConfiguration.objects.create(worker=cls.worker_version.worker, name="A config", configuration={"a": "b"}) + cls.configuration = WorkerConfiguration.objects.create( + worker=cls.worker_version.worker, + name="A config", + configuration={"a": "b"}, + ) cls.model = Model.objects.create(name="Mochi", public=False) cls.model_version = cls.model.versions.create( state=ModelVersionState.Available, @@ -36,6 +40,7 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): version=cls.worker_version, configuration=cls.configuration, model_version=cls.model_version, + ttl=0, ) def test_worker_version(self): diff --git a/arkindex/process/tests/worker_activity/test_initialize.py b/arkindex/process/tests/worker_activity/test_initialize.py index caaed902d0c140d5051d4d7685dcbb4cd00f75c9..321f1855f7c8d2e9062538552abb57aceddea987 100644 --- a/arkindex/process/tests/worker_activity/test_initialize.py +++ b/arkindex/process/tests/worker_activity/test_initialize.py @@ -19,8 +19,8 @@ class TestInitializeActivity(FixtureTestCase): element_type=cls.corpus.types.get(slug="volume"), activity_state=ActivityState.Pending, ) - cls.process.worker_runs.create(version=cls.worker_version_1) - cls.process.worker_runs.create(version=cls.worker_version_2) + cls.process.worker_runs.create(version=cls.worker_version_1, ttl=0) + cls.process.worker_runs.create(version=cls.worker_version_2, ttl=0) @patch("arkindex.process.tasks.get_current_job") def test_rq_progress(self, job_mock): diff --git a/arkindex/process/tests/worker_activity/test_list.py b/arkindex/process/tests/worker_activity/test_list.py index d4924f504d36bbabbedffd10a140511299a7eccd..bc980a1189a5738caac9943f165c41a3b160948c 100644 --- a/arkindex/process/tests/worker_activity/test_list.py +++ b/arkindex/process/tests/worker_activity/test_list.py @@ -33,7 +33,11 @@ class TestListWorkerActivities(FixtureAPITestCase): corpus=cls.corpus, farm=Farm.objects.first(), ) - cls.configuration = WorkerConfiguration.objects.create(worker=cls.worker_version.worker, name="A config", configuration={"a": "b"}) + cls.configuration = WorkerConfiguration.objects.create( + worker=cls.worker_version.worker, + name="A config", + configuration={"a": "b"}, + ) cls.model = Model.objects.create(name="Mochi", public=False) cls.model_version = cls.model.versions.create( state=ModelVersionState.Available, @@ -44,6 +48,7 @@ class TestListWorkerActivities(FixtureAPITestCase): version=cls.worker_version, configuration=cls.configuration, model_version=cls.model_version, + ttl=0, ) # Run the process, but skip the real activity initialization so that we can control it ourselves diff --git a/arkindex/process/tests/worker_activity/test_update.py b/arkindex/process/tests/worker_activity/test_update.py index 2f19263f2fc63214910f53b919440e947caa084f..e275425a7e5f933cb2378e9775192038ac2078cd 100644 --- a/arkindex/process/tests/worker_activity/test_update.py +++ b/arkindex/process/tests/worker_activity/test_update.py @@ -35,7 +35,11 @@ class TestUpdateWorkerActivity(FixtureAPITestCase): corpus=cls.corpus, farm=Farm.objects.first(), ) - cls.configuration = WorkerConfiguration.objects.create(worker=cls.worker_version.worker, name="A config", configuration={"a": "b"}) + cls.configuration = WorkerConfiguration.objects.create( + worker=cls.worker_version.worker, + name="A config", + configuration={"a": "b"}, + ) cls.model = Model.objects.create(name="Mochi", public=False) cls.model_version = cls.model.versions.create( state=ModelVersionState.Available, @@ -45,6 +49,7 @@ class TestUpdateWorkerActivity(FixtureAPITestCase): version=cls.worker_version, configuration=cls.configuration, model_version=cls.model_version, + ttl=0, ) # Run the process, but skip the real activity initialization so that we can control it ourselves @@ -196,6 +201,7 @@ class TestUpdateWorkerActivity(FixtureAPITestCase): # Different configuration configuration=None, model_version=self.model_version, + ttl=0, ) with self.assertNumQueries(4): @@ -451,15 +457,17 @@ class TestUpdateWorkerActivity(FixtureAPITestCase): run_2 = self.process.worker_runs.create( version=worker_version_2, parents=[run_1.id], + ttl=0, ) self.process.worker_runs.create( version=worker_version_3, parents=[run_2.id], + ttl=0, ) self.process.worker_runs.create( version=worker_version_4, - parents=[], + ttl=0, ) # Create activities for run_2, run_3 and run_4 diff --git a/arkindex/process/tests/worker_runs/test_build_task.py b/arkindex/process/tests/worker_runs/test_build_task.py index 51c8d672036b28ae34b19aced225b83778c1487a..65ce96c232f643b396f70afcfbc4d21ad0f36400 100644 --- a/arkindex/process/tests/worker_runs/test_build_task.py +++ b/arkindex/process/tests/worker_runs/test_build_task.py @@ -25,7 +25,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): ) cls.version = WorkerVersion.objects.get(worker__slug="reco") cls.worker = cls.version.worker - cls.worker_run = cls.process.worker_runs.create(version=cls.version, parents=[]) + cls.worker_run = cls.process.worker_runs.create(version=cls.version, ttl=0) # Model and Model version setup cls.model_1 = Model.objects.create(name="My model") @@ -52,6 +52,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): "TASK_ELEMENTS": "/data/import/elements.json", "ARKINDEX_WORKER_RUN_ID": str(self.worker_run.id), }) + self.assertEqual(task.ttl, 0) def test_build_task_with_chunk(self): task, parent_slugs = self.worker_run.build_task(self.process, ENV.copy(), "import", "/data/import/elements.json", chunk=4) @@ -80,6 +81,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): run_2 = self.process.worker_runs.create( version=version_2, parents=[self.worker_run.id], + ttl=42, ) task, parent_slugs = run_2.build_task(self.process, ENV.copy(), "import", "/data/import/elements.json") @@ -95,6 +97,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): "TASK_ELEMENTS": "/data/import/elements.json", "ARKINDEX_WORKER_RUN_ID": str(run_2.id), }) + self.assertEqual(task.ttl, 42) def test_build_task_with_parent_and_chunk(self): version_2 = WorkerVersion.objects.create( @@ -107,6 +110,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): run_2 = self.process.worker_runs.create( version=version_2, parents=[self.worker_run.id], + ttl=1000, ) task, parent_slugs = run_2.build_task(self.process, ENV.copy(), "import", "/data/import/elements.json", chunk=4) @@ -123,6 +127,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): "TASK_ELEMENTS": "/data/import/elements.json", "ARKINDEX_WORKER_RUN_ID": str(run_2.id), }) + self.assertEqual(task.ttl, 1000) def test_build_task_shm_size(self): self.version.configuration = { @@ -155,6 +160,7 @@ class TestWorkerRunsBuildTask(FixtureAPITestCase): run_2 = self.process.worker_runs.create( version=version_2, parents=[self.worker_run.id], + ttl=0, ) with self.assertRaisesRegex( diff --git a/arkindex/process/tests/worker_runs/test_create.py b/arkindex/process/tests/worker_runs/test_create.py index 70eba8dbe71d08856a2ac8c813448ddd2143645a..0c1496ad436c2d59fa1d54447279956093552d42 100644 --- a/arkindex/process/tests/worker_runs/test_create.py +++ b/arkindex/process/tests/worker_runs/test_create.py @@ -3,6 +3,7 @@ from datetime import datetime, timezone from unittest.mock import call, patch from django.db import transaction +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -19,6 +20,7 @@ from arkindex.training.models import Model, ModelVersion, ModelVersionState from arkindex.users.models import Role +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) class TestWorkerRunsCreate(FixtureAPITestCase): """ Test worker runs create endpoint @@ -36,7 +38,7 @@ class TestWorkerRunsCreate(FixtureAPITestCase): ) cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_1 = cls.version_1.worker - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=0) cls.configuration_1 = cls.worker_1.configurations.create(name="My config", configuration={"key": "value"}) worker_version = WorkerVersion.objects.exclude(worker=cls.version_1.worker).first() cls.configuration_2 = worker_version.worker.configurations.create(name="Config") @@ -325,6 +327,7 @@ class TestWorkerRunsCreate(FixtureAPITestCase): }, "use_gpu": False, "summary": "Worker Recognizer @ version 1", + "ttl": 3600, }) run = WorkerRun.objects.get(pk=pk) # Check generated summary @@ -406,6 +409,7 @@ class TestWorkerRunsCreate(FixtureAPITestCase): }, "use_gpu": False, "summary": "Worker Recognizer @ version 1 using configuration 'My config'", + "ttl": 3600, }) run = WorkerRun.objects.get(pk=pk) # Check generated summary @@ -549,7 +553,8 @@ class TestWorkerRunsCreate(FixtureAPITestCase): "use_cache": False, }, "summary": f"Worker Recognizer @ version {worker_version.version}", - "use_gpu": use_gpu + "use_gpu": use_gpu, + "ttl": 3600, }) run = WorkerRun.objects.get(pk=pk) self.assertEqual(run.use_gpu, use_gpu) @@ -605,7 +610,8 @@ class TestWorkerRunsCreate(FixtureAPITestCase): "use_cache": False, }, "summary": "Worker Recognizer @ version 2", - "use_gpu": True + "use_gpu": True, + "ttl": 3600, }) run = WorkerRun.objects.get(pk=pk) self.assertEqual(run.use_gpu, True) diff --git a/arkindex/process/tests/worker_runs/test_delete.py b/arkindex/process/tests/worker_runs/test_delete.py index efa4cfc957fa80d3b03529e96c0aa18e83ad4ccd..410e5a3e4422a1d81dbce6fb42ea2d679d94ab34 100644 --- a/arkindex/process/tests/worker_runs/test_delete.py +++ b/arkindex/process/tests/worker_runs/test_delete.py @@ -28,7 +28,7 @@ class TestWorkerRunsDelete(FixtureAPITestCase): cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_1 = cls.version_1.worker cls.version_2 = WorkerVersion.objects.get(worker__slug="dla") - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=0) cls.agent = Agent.objects.create( farm=cls.farm, @@ -77,7 +77,7 @@ class TestWorkerRunsDelete(FixtureAPITestCase): """ A user cannot delete a worker run on a local process """ - run = self.local_process.worker_runs.create(version=self.version_1, parents=[]) + run = self.local_process.worker_runs.create(version=self.version_1, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(4): @@ -114,10 +114,12 @@ class TestWorkerRunsDelete(FixtureAPITestCase): run_2 = self.process_1.worker_runs.create( version=version_2, parents=[self.run_1.id], + ttl=0, ) run_3 = self.process_1.worker_runs.create( version=version_3, parents=[self.run_1.id, run_2.id], + ttl=0, ) self.assertTrue(self.run_1.id in run_2.parents) @@ -158,7 +160,7 @@ class TestWorkerRunsDelete(FixtureAPITestCase): """ Ponos agents cannot delete WorkerRuns, even when they can access them """ - self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent) + self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent, ttl=0) # Agent auth is not implemented in CE self.client.force_authenticate(user=self.agent) diff --git a/arkindex/process/tests/worker_runs/test_list.py b/arkindex/process/tests/worker_runs/test_list.py index f6acc9b5aef043001d84f9fe61d8933192f9bafa..17ff7eae28bd65058b31bd90156e5c387bf4179d 100644 --- a/arkindex/process/tests/worker_runs/test_list.py +++ b/arkindex/process/tests/worker_runs/test_list.py @@ -22,7 +22,7 @@ class TestWorkerRunsList(FixtureAPITestCase): ) cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_1 = cls.version_1.worker - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=0) cls.process_2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Workers) def test_list_requires_login(self): @@ -91,12 +91,13 @@ class TestWorkerRunsList(FixtureAPITestCase): }, "use_gpu": False, "summary": "Worker Recognizer @ version 1", + "ttl": 0, }]) def test_list_filter_process(self): run_2 = self.process_2.worker_runs.create( version=self.version_1, - parents=[], + ttl=0, ) self.client.force_login(self.user) @@ -158,4 +159,5 @@ class TestWorkerRunsList(FixtureAPITestCase): }, "use_gpu": False, "summary": "Worker Recognizer @ version 1", + "ttl": 0, }]) diff --git a/arkindex/process/tests/worker_runs/test_partial_update.py b/arkindex/process/tests/worker_runs/test_partial_update.py index 798f2facb5ed2ad9d6ff5b026b4f6dc738697ad2..fb96d9d4f5b6dbd266302a95eb2c27a2b96a6104 100644 --- a/arkindex/process/tests/worker_runs/test_partial_update.py +++ b/arkindex/process/tests/worker_runs/test_partial_update.py @@ -30,7 +30,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_1 = cls.version_1.worker cls.version_2 = WorkerVersion.objects.get(worker__slug="dla") - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=1000) cls.configuration_1 = cls.worker_1.configurations.create(name="My config", configuration={"key": "value"}) worker_version = WorkerVersion.objects.exclude(worker=cls.version_1.worker).first() cls.configuration_2 = worker_version.worker.configurations.create(name="Config") @@ -105,6 +105,10 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): cls.agent.is_agent = True cls.agent.is_anonymous = False + def setUp(self): + super().setUp() + self.maxDiff = None + def test_partial_update_requires_login(self): version_2 = WorkerVersion.objects.create( worker=self.worker_1, @@ -113,7 +117,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) with self.assertNumQueries(0): @@ -152,7 +156,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) self.client.force_login(self.user) @@ -169,7 +173,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): """ A user cannot update a worker run on a local process """ - run = self.local_process.worker_runs.create(version=self.version_1, parents=[]) + run = self.local_process.worker_runs.create(version=self.version_1, ttl=1000) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -259,6 +263,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) self.run_1.refresh_from_db() @@ -321,6 +326,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) self.run_1.refresh_from_db() @@ -388,6 +394,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1 using configuration 'My config'", }) self.assertEqual(self.run_1.configuration.id, self.configuration_1.id) @@ -441,7 +448,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=1000, ) with self.assertNumQueries(5): @@ -470,7 +477,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=1000, ) random_model_version_uuid = str(uuid.uuid4()) @@ -501,7 +508,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=1000, ) # Create a model version, the user has no access to @@ -541,7 +548,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=1000, ) def filter_rights(user, model, level): @@ -605,7 +612,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version, - parents=[], + ttl=1000, ) self.model_version_1.state = ModelVersionState.Error self.model_version_1.save() @@ -632,7 +639,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): configuration={"test": "test2"}, model_usage=FeatureUsage.Required ) - run = self.process_1.worker_runs.create(version=version) + run = self.process_1.worker_runs.create(version=version, ttl=1000) self.model_1.archived = datetime.now(timezone.utc) self.model_1.save() @@ -662,7 +669,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version_with_model, - parents=[], + ttl=0, ) self.assertIsNone(run.model_version_id) self.assertEqual(run.summary, "Worker Recognizer @ version 2") @@ -739,6 +746,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 0, "summary": f"Worker Recognizer @ version 2 with model {model_version.model.name} @ {str(model_version.id)[:6]}", }) self.assertEqual(run.model_version_id, model_version.id) @@ -758,7 +766,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version_with_model, - parents=[], + ttl=0, configuration=self.configuration_1 ) self.assertEqual(run.model_version_id, None) @@ -830,6 +838,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 0, "summary": f"Worker Recognizer @ version 2 with model My model @ {str(self.model_version_1.id)[:6]} using configuration 'My config'", }) self.assertEqual(run.model_version_id, self.model_version_1.id) @@ -843,7 +852,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) self.client.force_login(self.user) @@ -899,6 +908,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) @@ -926,6 +936,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): version=self.version_1, model_version=None if model_version else self.model_version_1, configuration=None if configuration else self.configuration_1, + ttl=0, ) # Having a model version or a configuration adds one query for each @@ -950,7 +961,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): """ Ponos agents cannot update WorkerRuns, even when they can access them """ - self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent) + self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent, ttl=0) # Agent auth is not implemented in CE self.client.force_authenticate(user=self.agent) @@ -976,7 +987,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): run = WorkerRun.objects.create( process=self.process_1, version=worker_version, - parents=[] + ttl=0 ) self.assertEqual(run.use_gpu, True if worker_version.gpu_usage == FeatureUsage.Required else False) with self.assertNumQueries(3): @@ -1006,7 +1017,7 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): run = WorkerRun.objects.create( process=self.process_1, version=worker_version, - parents=[] + ttl=0 ) self.assertEqual(run.use_gpu, True if worker_version.gpu_usage == FeatureUsage.Required else False) @@ -1060,7 +1071,8 @@ class TestWorkerRunsPartialUpdate(FixtureAPITestCase): "use_cache": False, }, "summary": f"Worker Recognizer @ version {worker_version.version}", - "use_gpu": use_gpu + "use_gpu": use_gpu, + "ttl": 0, }) run.refresh_from_db() self.assertEqual(run.use_gpu, use_gpu) diff --git a/arkindex/process/tests/worker_runs/test_retrieve.py b/arkindex/process/tests/worker_runs/test_retrieve.py index ef0af8081a543bb0e31a4c34f5feb9274955d5b7..c1f9b4fbfc9e907fc16d4a23a955bef684537ffa 100644 --- a/arkindex/process/tests/worker_runs/test_retrieve.py +++ b/arkindex/process/tests/worker_runs/test_retrieve.py @@ -15,7 +15,7 @@ from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import Role -class TestWorkerRunsretrieve(FixtureAPITestCase): +class TestWorkerRunsRetrieve(FixtureAPITestCase): """ Test worker runs retrieve endpoint """ @@ -35,7 +35,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): cls.worker_custom = Worker.objects.get(slug="custom") cls.version_custom = cls.worker_custom.versions.get() - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=1000) cls.run_custom = cls.local_process.worker_runs.get(version=cls.version_custom) cls.process_2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Workers) @@ -111,6 +111,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) @@ -172,6 +173,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) @@ -263,13 +265,14 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): }, "summary": "Worker Custom worker @ version 1", "use_gpu": False, + "ttl": 0, }) def test_retrieve_local(self): """ A user can retrieve a run on their own local process """ - run = self.local_process.worker_runs.create(version=self.version_1, parents=[]) + run = self.local_process.worker_runs.create(version=self.version_1, ttl=0) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -320,6 +323,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): "prefix": None, }, "use_gpu": False, + "ttl": 0, "summary": "Worker Recognizer @ version 1", }) @@ -340,7 +344,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): """ A Ponos agent can retrieve a WorkerRun on a process where it has some assigned tasks """ - self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent) + self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent, ttl=0) # Agent auth is not implemented in CE self.client.force_authenticate(user=self.agent) @@ -392,6 +396,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): "prefix": None, }, "use_gpu": False, + "ttl": 1000, "summary": "Worker Recognizer @ version 1", }) @@ -399,7 +404,7 @@ class TestWorkerRunsretrieve(FixtureAPITestCase): """ A Ponos agent cannot retrieve a WorkerRun on a process where it does not have any assigned tasks """ - self.process_1.tasks.create(run=0, depth=0, slug="something", agent=None) + self.process_1.tasks.create(run=0, depth=0, slug="something", agent=None, ttl=0) # Agent auth is not implemented in CE self.client.force_authenticate(user=self.agent) diff --git a/arkindex/process/tests/worker_runs/test_ttl.py b/arkindex/process/tests/worker_runs/test_ttl.py new file mode 100644 index 0000000000000000000000000000000000000000..edd98b6a9b2b821370a857a6a9022561487e2fa5 --- /dev/null +++ b/arkindex/process/tests/worker_runs/test_ttl.py @@ -0,0 +1,272 @@ +from django.test import override_settings +from django.urls import reverse +from rest_framework import status + +from arkindex.ponos.models import Farm +from arkindex.process.models import ( + ProcessMode, + WorkerVersion, +) +from arkindex.project.tests import FixtureAPITestCase + + +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) +class TestWorkerRunTTL(FixtureAPITestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.process = cls.corpus.processes.create( + creator=cls.user, + mode=ProcessMode.Workers, + farm=Farm.objects.first(), + ) + cls.recognizer = WorkerVersion.objects.get(worker__slug="reco") + cls.dla = WorkerVersion.objects.get(worker__slug="dla") + cls.worker_run = cls.process.worker_runs.create(version=cls.dla, ttl=0) + + def test_create_default_ttl(self): + self.client.force_login(self.superuser) + # Corpus TTL / WorkerRun TTL + cases = [ + (0, 0), + (10000, 10000), + # No corpus TTL means the instance wide value should be set + (None, 3600), + ] + + for corpus_ttl, expected_ttl in cases: + with self.subTest(corpus_ttl=corpus_ttl): + self.process.worker_runs.filter(version=self.recognizer).delete() + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(6): + response = self.client.post( + reverse("api:worker-run-list", kwargs={"pk": str(self.process.id)}), + {"worker_version_id": str(self.recognizer.id)}, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + data = response.json() + self.assertEqual(data["ttl"], expected_ttl) + run = self.process.worker_runs.get(id=data["id"]) + self.assertEqual(run.ttl, expected_ttl) + + def test_create_set_ttl(self): + self.client.force_login(self.superuser) + # Corpus TTL / WorkerRun TTL / Expected WorkerRun TTL + cases = [ + (0, 0, 0), + (0, 1000, 1000), + (1800, 1000, 1000), + (1800, 1800, 1800), + # No corpus TTL means the instance wide value is the limit + (None, 600, 600), + (None, 3600, 3600), + ] + + for corpus_ttl, worker_run_ttl, expected_ttl in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.process.worker_runs.filter(version=self.recognizer).delete() + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(6): + response = self.client.post( + reverse("api:worker-run-list", kwargs={"pk": str(self.process.id)}), + { + "worker_version_id": str(self.recognizer.id), + "ttl": worker_run_ttl, + }, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + data = response.json() + self.assertEqual(data["ttl"], expected_ttl) + run = self.process.worker_runs.get(id=data["id"]) + self.assertEqual(run.ttl, expected_ttl) + + def test_create_invalid_ttl(self): + self.client.force_login(self.superuser) + self.process.worker_runs.filter(version=self.recognizer).delete() + + # Corpus TTL, WorkerRun TTL, error message + cases = [ + (None, "one hour", ["A valid integer is required."]), + (None, -1, ["Ensure this value is greater than or equal to 1."]), + (None, 0, ["Ensure this value is greater than or equal to 1."]), + (None, 1e12, ["Ensure this value is less than or equal to 3600."]), + (0, -1, ["Ensure this value is greater than or equal to 0."]), + (0, 1e12, ["Ensure this value is less than or equal to 2147483647."]), + (1800, -1, ["Ensure this value is greater than or equal to 1."]), + (1800, 0, ["Ensure this value is greater than or equal to 1."]), + (1800, 1e12, ["Ensure this value is less than or equal to 1800."]), + ] + for corpus_ttl, worker_run_ttl, expected_error in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(4): + response = self.client.post( + reverse("api:worker-run-list", kwargs={"pk": str(self.process.id)}), + { + "worker_version_id": str(self.recognizer.id), + "ttl": worker_run_ttl, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), { + "ttl": expected_error, + }) + + def test_partial_update_set_ttl(self): + self.client.force_login(self.superuser) + # Corpus TTL / WorkerRun TTL / Expected WorkerRun TTL + cases = [ + (0, 0, 0), + (0, 1000, 1000), + (1800, 1000, 1000), + (1800, 1800, 1800), + # No corpus TTL means the instance wide value is the limit + (None, 600, 600), + (None, 3600, 3600), + ] + + for corpus_ttl, worker_run_ttl, expected_ttl in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(5): + response = self.client.patch( + reverse("api:worker-run-details", kwargs={"pk": str(self.worker_run.id)}), + {"ttl": worker_run_ttl}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + self.assertEqual(data["ttl"], expected_ttl) + run = self.process.worker_runs.get(id=data["id"]) + self.assertEqual(run.ttl, expected_ttl) + + def test_partial_update_invalid_ttl(self): + self.client.force_login(self.superuser) + + # Corpus TTL, WorkerRun TTL, error message + cases = [ + (None, "one hour", ["A valid integer is required."]), + (None, -1, ["Ensure this value is greater than or equal to 1."]), + (None, 0, ["Ensure this value is greater than or equal to 1."]), + (None, 1e12, ["Ensure this value is less than or equal to 3600."]), + (0, -1, ["Ensure this value is greater than or equal to 0."]), + (0, 1e12, ["Ensure this value is less than or equal to 2147483647."]), + (1800, -1, ["Ensure this value is greater than or equal to 1."]), + (1800, 0, ["Ensure this value is greater than or equal to 1."]), + (1800, 1e12, ["Ensure this value is less than or equal to 1800."]), + ] + for corpus_ttl, worker_run_ttl, expected_error in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(3): + response = self.client.patch( + reverse("api:worker-run-details", kwargs={"pk": str(self.worker_run.id)}), + {"ttl": worker_run_ttl}, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), { + "ttl": expected_error, + }) + + + def test_update_default_ttl(self): + self.client.force_login(self.superuser) + # Corpus TTL / WorkerRun TTL + cases = [ + (0, 0), + (10000, 10000), + # No corpus TTL means the instance wide value should be set + (None, 3600), + ] + + for corpus_ttl, expected_ttl in cases: + with self.subTest(corpus_ttl=corpus_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(5): + response = self.client.put( + reverse("api:worker-run-details", kwargs={"pk": str(self.worker_run.id)}), + {}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + self.assertEqual(response.json()["ttl"], expected_ttl) + self.worker_run.refresh_from_db() + self.assertEqual(self.worker_run.ttl, expected_ttl) + + def test_update_set_ttl(self): + self.client.force_login(self.superuser) + # Corpus TTL / WorkerRun TTL / Expected WorkerRun TTL + cases = [ + (0, 0, 0), + (0, 1000, 1000), + (1800, 1000, 1000), + (1800, 1800, 1800), + # No corpus TTL means the instance wide value is the limit + (None, 600, 600), + (None, 3600, 3600), + ] + + for corpus_ttl, worker_run_ttl, expected_ttl in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(5): + response = self.client.put( + reverse("api:worker-run-details", kwargs={"pk": str(self.worker_run.id)}), + {"ttl": worker_run_ttl}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + self.assertEqual(data["ttl"], expected_ttl) + run = self.process.worker_runs.get(id=data["id"]) + self.assertEqual(run.ttl, expected_ttl) + + def test_update_invalid_ttl(self): + self.client.force_login(self.superuser) + + # Corpus TTL, WorkerRun TTL, error message + cases = [ + (None, "one hour", ["A valid integer is required."]), + (None, -1, ["Ensure this value is greater than or equal to 1."]), + (None, 0, ["Ensure this value is greater than or equal to 1."]), + (None, 1e12, ["Ensure this value is less than or equal to 3600."]), + (0, -1, ["Ensure this value is greater than or equal to 0."]), + (0, 1e12, ["Ensure this value is less than or equal to 2147483647."]), + (1800, -1, ["Ensure this value is greater than or equal to 1."]), + (1800, 0, ["Ensure this value is greater than or equal to 1."]), + (1800, 1e12, ["Ensure this value is less than or equal to 1800."]), + ] + for corpus_ttl, worker_run_ttl, expected_error in cases: + with self.subTest(corpus_ttl=corpus_ttl, worker_run_ttl=worker_run_ttl): + self.corpus.maximum_task_ttl = corpus_ttl + self.corpus.save() + + with self.assertNumQueries(3): + response = self.client.put( + reverse("api:worker-run-details", kwargs={"pk": str(self.worker_run.id)}), + {"ttl": worker_run_ttl}, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), { + "ttl": expected_error, + }) diff --git a/arkindex/process/tests/worker_runs/test_update.py b/arkindex/process/tests/worker_runs/test_update.py index 2d6a1cc11b9501d319331e7c7ce8237508115591..15f94ec55f06d37e2ec95e35125a3d85c98d1f32 100644 --- a/arkindex/process/tests/worker_runs/test_update.py +++ b/arkindex/process/tests/worker_runs/test_update.py @@ -2,6 +2,7 @@ import uuid from datetime import datetime, timezone from unittest.mock import call, patch +from django.test import override_settings from django.urls import reverse from rest_framework import status from rest_framework.exceptions import ValidationError @@ -13,6 +14,7 @@ from arkindex.training.models import Model, ModelVersion, ModelVersionState from arkindex.users.models import Role +@override_settings(PONOS_MAXIMUM_TASK_TTL=3600) class TestWorkerRunsUpdate(FixtureAPITestCase): """ Test worker runs update endpoint @@ -31,7 +33,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): cls.version_1 = WorkerVersion.objects.get(worker__slug="reco") cls.worker_1 = cls.version_1.worker cls.version_2 = WorkerVersion.objects.get(worker__slug="dla") - cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[]) + cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, ttl=1000) cls.configuration_1 = cls.worker_1.configurations.create(name="My config", configuration={"key": "value"}) worker_version = WorkerVersion.objects.exclude(worker=cls.version_1.worker).first() cls.configuration_2 = worker_version.worker.configurations.create(name="Config") @@ -97,7 +99,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) with self.assertNumQueries(0): @@ -135,7 +137,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): """ A user cannot update a worker run on a local process """ - run = self.local_process.worker_runs.create(version=self.version_1, parents=[]) + run = self.local_process.worker_runs.create(version=self.version_1, ttl=1000) self.client.force_login(self.user) with self.assertNumQueries(5): @@ -159,7 +161,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) self.client.force_login(self.user) @@ -192,7 +194,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): def test_update_duplicate_parents(self): self.client.force_login(self.user) - run_2 = self.process_1.worker_runs.create(version=self.version_2) + run_2 = self.process_1.worker_runs.create(version=self.version_2, ttl=0) with self.assertNumQueries(4): response = self.client.put( @@ -216,7 +218,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): """ run_2 = self.process_1.worker_runs.create( version=self.version_2, - parents=[], + ttl=0, ) run_2.parents = [self.run_1.id, self.run_1.id] @@ -281,6 +283,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 3600, "summary": "Worker Recognizer @ version 1", }) self.run_1.refresh_from_db() @@ -343,6 +346,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): }, }, "use_gpu": False, + "ttl": 3600, "summary": "Worker Recognizer @ version 1", }) self.run_1.refresh_from_db() @@ -413,6 +417,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 3600, "summary": "Worker Recognizer @ version 1 using configuration 'My config'", }) self.assertEqual(self.run_1.configuration.id, self.configuration_1.id) @@ -467,7 +472,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=0, ) with self.assertNumQueries(5): @@ -497,7 +502,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=0, ) random_model_version_uuid = str(uuid.uuid4()) @@ -529,7 +534,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=0, ) # Create a model version, the user has no access to @@ -576,7 +581,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_no_model, - parents=[], + ttl=0, ) def filter_rights(user, model, level): @@ -641,7 +646,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version, - parents=[], + ttl=0, ) self.model_version_1.state = ModelVersionState.Error self.model_version_1.save() @@ -668,7 +673,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): configuration={"test": "test2"}, model_usage=FeatureUsage.Required ) - run = self.process_1.worker_runs.create(version=version) + run = self.process_1.worker_runs.create(version=version, ttl=0) self.model_1.archived = datetime.now(timezone.utc) self.model_1.save() @@ -698,7 +703,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version_with_model, - parents=[], + ttl=0, ) self.assertEqual(run.model_version, None) # Check generated summary, before updating, there should be only information about the worker version @@ -777,6 +782,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 3600, "summary": f"Worker Recognizer @ version 2 with model {model_version.model.name} @ {str(model_version.id)[:6]}", }) self.assertEqual(run.model_version_id, model_version.id) @@ -795,7 +801,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run = self.process_1.worker_runs.create( version=version_with_model, - parents=[], + ttl=0, ) self.assertIsNone(run.model_version) self.assertIsNone(run.configuration) @@ -871,6 +877,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 3600, "summary": f"Worker Recognizer @ version 2 with model My model @ {str(self.model_version_1.id)[:6]} using configuration 'My config'", }) self.assertEqual(run.model_version_id, self.model_version_1.id) @@ -885,7 +892,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): ) run_2 = self.process_1.worker_runs.create( version=version_2, - parents=[], + ttl=0, ) self.client.force_login(self.user) @@ -894,6 +901,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): reverse("api:worker-run-details", kwargs={"pk": str(self.run_1.id)}), data={ "parents": [str(run_2.id)], + "ttl": 500, }, ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -941,6 +949,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): } }, "use_gpu": False, + "ttl": 500, "summary": "Worker Recognizer @ version 1", }) @@ -968,6 +977,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): version=self.version_1, model_version=None if model_version else self.model_version_1, configuration=None if configuration else self.configuration_1, + ttl=0, ) # Having a model version or a configuration adds one query for each @@ -992,7 +1002,7 @@ class TestWorkerRunsUpdate(FixtureAPITestCase): """ Ponos agents cannot update WorkerRuns, even when they can access them """ - self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent) + self.process_1.tasks.create(run=0, depth=0, slug="something", agent=self.agent, ttl=0) # Agent auth is not implemented in CE self.client.force_authenticate(user=self.agent) diff --git a/arkindex/process/tests/worker_versions/test_create.py b/arkindex/process/tests/worker_versions/test_create.py index 15ca24971d17a2ec6db7a03395095722ca74d5b3..d060bb9e27485a1107c71f0e00d09cf7f366d4a2 100644 --- a/arkindex/process/tests/worker_versions/test_create.py +++ b/arkindex/process/tests/worker_versions/test_create.py @@ -38,7 +38,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): public=False, ) - def test_create_non_existing_worker(self): + def test_non_existing_worker(self): with self.assertNumQueries(2): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": "12341234-1234-1234-1234-123412341234"}), @@ -49,7 +49,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertDictEqual(response.json(), {"detail": "No Worker matches the given query."}) - def test_create_available_requires_docker_image(self): + def test_available_requires_docker_image(self): with self.assertNumQueries(2): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), @@ -67,7 +67,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): ] }) - def test_create_invalid_docker_image(self): + def test_invalid_docker_image(self): with self.assertNumQueries(2): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), @@ -86,7 +86,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): ] }) - def test_create_empty(self): + def test_empty(self): self.user.can_create_worker_version = True self.user.save() self.client.force_login(self.user) @@ -98,7 +98,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertDictEqual(response.json(), {"configuration": ["This field is required."]}) - def test_create_archived(self): + def test_archived(self): self.worker_reco.archived = datetime.now(timezone.utc) self.worker_reco.save() @@ -117,7 +117,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): "worker": ["This worker is archived."], }) - def test_create_null_revision_url_requires_null_worker_repository_url(self): + def test_null_revision_url_requires_null_worker_repository_url(self): self.worker_reco.repository_url = "https://gitlab.com/NERV/eva" self.worker_reco.save() self.user.can_create_worker_version = True @@ -135,7 +135,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): "revision_url": ["A revision url is required when creating a version for a worker linked to a repository."], }) - def test_create_null_revision_url_forbidden_task_auth(self): + def test_null_revision_url_forbidden_task_auth(self): """ Ponos Task auth cannot create a version on a worker that is not linked to a repository. """ @@ -152,7 +152,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): }) @patch("arkindex.users.utils.get_max_level", return_value=Role.Contributor.value) - def test_create_user_auth_requires_admin_access_to_worker(self, max_level_mock): + def test_user_auth_requires_admin_access_to_worker(self, max_level_mock): self.user.can_create_worker_version = True self.user.save() self.client.force_login(self.user) @@ -169,7 +169,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(max_level_mock.call_count, 1) self.assertEqual(max_level_mock.call_args, call(self.user, self.worker_reco)) - def test_create_user_auth_with_worker_repository_url_ok(self): + def test_user_auth_with_worker_repository_url_ok(self): self.user.can_create_worker_version = True self.user.save() self.client.force_login(self.user) @@ -208,7 +208,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): }, }) - def test_create_null_revision_url(self): + def test_null_revision_url(self): """ A worker version can be created with no revision_url through user authentication """ @@ -249,7 +249,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): }, }) - def test_create_configuration_wrong_type(self): + def test_configuration_wrong_type(self): """ Configuration body must be an object """ @@ -264,7 +264,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(response.json(), {"configuration": ['Expected a dictionary of items but got type "str".']}) - def test_create_requires_permission(self): + def test_requires_permission(self): self.assertFalse(self.user.can_create_worker_version) self.client.force_login(self.user) response = self.client.post( @@ -289,7 +289,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - def test_create_task_auth(self): + def test_task_auth(self): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), data={ @@ -310,7 +310,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(data["gpu_usage"], "disabled") self.assertEqual(data["model_usage"], FeatureUsage.Required.value) - def test_create_duplicate_revision_url(self): + def test_duplicate_revision_url(self): self.version_1.revision_url = "https://gitlab.com/NERV/eva/commit/eva-01" self.version_1.version = None self.version_1.save() @@ -325,7 +325,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), {"revision_url": ["A version already exists for this worker with this revision_url."]}) - def test_create_with_tag(self): + def test_with_tag(self): with self.assertNumQueries(7): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), @@ -349,7 +349,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(data["model_usage"], FeatureUsage.Required.value) self.assertEqual(data["tag"], "eva-01") - def test_create_unique_tag(self): + def test_unique_tag(self): self.version_1.tag = "eva-01" self.version_1.save() @@ -362,7 +362,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), {"tag": ["A version already exists for this worker with this tag."]}) - def test_create_unassign_branch(self): + def test_unassign_branch(self): self.version_1.branch = "operation-yashima" self.version_1.save() self.version_2.branch = "operation-yashima" @@ -389,7 +389,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): self.version_2.refresh_from_db() self.assertEqual(self.version_2.branch, "operation-yashima") - def test_create_empty_tag_branch(self): + def test_empty_tag_branch(self): with self.assertNumQueries(3): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), @@ -407,7 +407,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): "branch": ["This field may not be blank."], }) - def test_create_wrong_gpu_usage(self): + def test_wrong_gpu_usage(self): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_reco.id)}), data={"configuration": {"test": "test2"}, "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", "gpu_usage": "not_supported"}, @@ -415,7 +415,7 @@ class TestWorkerVersionCreate(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - def test_create_no_user_configuration_ok(self): + def test_no_user_configuration_ok(self): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), data={ @@ -427,475 +427,192 @@ class TestWorkerVersionCreate(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - def test_create_valid_user_configuration(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_integer": {"title": "Demo Integer", "type": "int", "required": True, "default": 1}, - "demo_boolean": {"title": "Demo Boolean", "type": "bool", "required": False, "default": True}, - } - }, - "gpu_usage": "disabled", - "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", + def test_valid_user_configuration(self): + user_configuration = { + "demo_integer": { + "title": "Demo Integer", + "type": "int", + "required": True, + "default": 1, }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["configuration"], { - "user_configuration": { - "demo_integer": { - "title": "Demo Integer", - "type": "int", - "required": True, - "default": 1 - }, - "demo_boolean": { - "title": "Demo Boolean", - "type": "bool", - "required": False, - "default": True - } - } - }) - - def test_create_valid_user_configuration_dict(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_dict": {"title": "Demo Dict", "type": "dict", "required": True, "default": {"a": "b", "c": "d"}}, - } - }, - "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", - "gpu_usage": "disabled", + "demo_boolean": { + "title": "Demo Boolean", + "type": "bool", + "required": False, + "default": True, }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["configuration"], { - "user_configuration": { - "demo_dict": { - "title": "Demo Dict", - "type": "dict", - "required": True, - "default": {"a": "b", "c": "d"} - } - } - }) - - def test_create_user_configuration_dict_strings_only(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_dict": {"title": "Demo Dict", "type": "dict", "required": True, "default": {"a": ["12", "13"], "c": "d"}}, - } - }, - "gpu_usage": "disabled", + "demo_dict": { + "title": "Demo Dict", + "type": "dict", + "required": True, + "default": {"a": "b", "c": "d"}, }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - def test_create_valid_user_configuration_enum(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_choice": {"title": "Decisions", "type": "enum", "required": True, "default": 1, "choices": [1, 2, 3]} - } - }, - "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", - "gpu_usage": "disabled", + "demo_choice": { + "title": "Decisions", + "type": "enum", + "required": True, + "default": 1, + "choices": [1, 2, 3], }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["configuration"], { - "user_configuration": { - "demo_choice": { - "title": "Decisions", - "type": "enum", - "required": True, - "default": 1, - "choices": [1, 2, 3] - } - } - }) - - def test_create_valid_user_configuration_list(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "int", "default": [1, 2, 3, 4]}, - "boolean_list": {"title": "It's a list of booleans", "type": "list", "required": False, "subtype": "bool", "default": [True, False, False]} - } - }, - "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", - "gpu_usage": "disabled", + "demo_list": { + "title": "Demo List", + "type": "list", + "required": True, + "subtype": "int", + "default": [1, 2, 3, 4], }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["configuration"], { - "user_configuration": { - "demo_list": { - "title": "Demo List", - "type": "list", - "subtype": "int", - "required": True, - "default": [1, 2, 3, 4] - }, - "boolean_list": { - "title": "It's a list of booleans", - "type": "list", - "subtype": "bool", - "required": False, - "default": [True, False, False] - } - } - }) - - def test_create_valid_user_configuration_model(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_model": {"title": "Model for training", "type": "model", "required": True}, - "other_model": {"title": "Model the second", "type": "model", "default": str(self.model.id)} - } - }, - "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", - "gpu_usage": "disabled", + "boolean_list": { + "title": "It's a list of booleans", + "type": "list", + "required": False, + "subtype": "bool", + "default": [True, False, False], }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["configuration"], { - "user_configuration": { - "demo_model": { - "title": "Model for training", - "type": "model", - "required": True - }, - "other_model": { - "title": "Model the second", - "type": "model", - "default": str(self.model.id) - } - } - }) - - def test_create_invalid_user_configuration_list_requires_subtype(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_list": {"title": "Demo List", "type": "list", "required": True, "default": [1, 2, 3, 4]}, - } - }, - "gpu_usage": "disabled", + "demo_model": { + "title": "Model for training", + "type": "model", + "required": True, }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "demo_list": { - "subtype": ['The "subtype" field must be set for "list" type properties.'] - } - }] - } - }) - - def test_create_invalid_user_configuration_list_wrong_default(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "int", "default": 12}, - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "demo_list": { - "default": ["This is not a valid value for a field of type list."] - } - }] - } - }) - - def test_create_invalid_user_configuration_list_wrong_subtype(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "dict", "default": [1, 2, 3, 4]}, - } - }, - "gpu_usage": "disabled", + "other_model": { + "title": "Model the second", + "type": "model", + "default": str(self.model.id), }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "demo_list": { - "subtype": ["Subtype can only be int, float, bool or string."] - } - }] + "multiline_string": { + "title": "Multiline string", + "type": "string", + "multiline": True, + "required": True, } - }) + } - def test_create_invalid_user_configuration_list_wrong_default_subtype(self): response = self.client.post( reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), data={ "configuration": { - "user_configuration": { - "demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "int", "default": [1, 2, "three", 4]}, - } + "user_configuration": user_configuration, }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "demo_list": { - "default": ["All items in the default value must be of type int."] - } - }] - } - }) - - def test_create_invalid_user_configuration_not_list_choices(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_choice": {"title": "Decisions", "type": "enum", "required": True, "default": 1, "choices": "eeee"} - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "demo_choice": { - "choices": ['Expected a list of items but got type "str".'] - } - }] - } - }) - - def test_create_invalid_user_configuration_not_dict(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": "non" - }, - "gpu_usage": "disabled", + "revision_url": "https://gitlab.com/NERV/eva/commit/eva-01", }, HTTP_AUTHORIZATION=f"Ponos {self.task.token}", ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"configuration": {"user_configuration": [['Expected a dictionary of items but got type "str".']]}}) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) - def test_create_invalid_user_configuration_item_not_dict(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "secrets": ["aaaaaa"] - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"configuration": { - "user_configuration": [{ - "secrets": - {"__all__": ["User configuration field definitions should be of type dict, not list."]} - }] - }}) - - def test_create_invalid_user_configuration_wrong_field_type(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "something": { - "title": "some thing", - "type": "uh oh", - "required": 2 - } - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "something": { - "required": ["Must be a valid boolean."], - "type": ["Value is not of type UserConfigurationFieldType"] - } - }] - } + data = response.json() + self.assertEqual(data["configuration"], { + "user_configuration": user_configuration, }) - - def test_create_invalid_user_configuration_wrong_default_type(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "one_float": { - "title": "a float", - "type": "float", - "default": "bonjour", - "required": True - } - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "one_float": { - "default": ["This is not a valid value for a field of type float."] - } - }] - } + self.assertEqual(WorkerVersion.objects.get(id=data["id"]).configuration, { + "user_configuration": user_configuration, }) - def test_create_invalid_user_configuration_choices_no_enum(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "something": { - "title": "some thing", - "type": "int", - "required": False, - "choices": [1, 2, 3] - } - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [{ - "something": { - "choices": ['The "choices" field can only be set for an "enum" type property.'] - } - }] - } - }) + def test_invalid_user_configuration(self): + cases = [ + ( + # User configuration must be a dict + "non", + ['Expected a dictionary of items but got type "str".'], + ), + ( + # User a configuration must a dict of dicts + {"something": ["aaaaa"]}, + {"something": {"__all__": ["User configuration field definitions should be of type dict, not list."]}}, + ), + ( + # Fields have required keys + {"something": {}}, + {"something": { + "title": ["This field is required."], + "type": ["This field is required."], + }}, + ), + ( + # Unsupported keys cause errors + {"something": {"title": "some thing", "type": "int", "required": True, "some_key": "some_value"}}, + {"something": { + "some_key": [ + "Configurable properties can only be defined using the following keys: " + "title, type, required, default, choices, subtype, multiline." + ], + }} + ), + ( + # Field types should be valid + {"something": {"title": "some thing", "type": "uh oh", "required": True}}, + {"something": {"type": ["Value is not of type UserConfigurationFieldType"]}}, + ), + ( + # `required` should be a boolean + {"something": {"title": "some thing", "type": "string", "required": "maybe"}}, + {"something": {"required": ["Must be a valid boolean."]}}, + ), + ( + # Field defaults must match their field types + {"something": {"title": "some thing", "type": "float", "required": False, "default": "bonjour"}}, + {"something": {"default": ["This is not a valid value for a field of type float."]}}, + ), + ( + # Dict field defaults can only contain strings + {"demo_dict": {"title": "Demo Dict", "type": "dict", "required": True, "default": {"a": ["12", "13"], "c": "d"}}}, + {"demo_dict": {"default": ["This is not a valid value for a field of type dict."]}}, + ), + ( + # List fields require a subtype + {"demo_list": {"title": "Demo List", "type": "list", "required": True, "default": [1, 2, 3, 4]}}, + {"demo_list": {"subtype": ['The "subtype" field must be set for "list" type properties.']}}, + ), + ( + # List field subtypes are restricted + {"demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "dict", "default": [{"a": "b"}]}}, + {"demo_list": {"subtype": ["Subtype can only be int, float, bool or string."]}}, + ), + ( + # List field defaults must match the subtype + {"demo_list": {"title": "Demo List", "type": "list", "required": True, "subtype": "int", "default": [1, 2, "three", 4]}}, + {"demo_list": {"default": ["All items in the default value must be of type int."]}}, + ), + ( + # Choices can only be set on enums + {"something": {"title": "some thing", "type": "int", "required": False, "choices": [1, 2, 3]}}, + {"something": {"choices": ['The "choices" field can only be set for an "enum" type property.']}}, + ), + ( + # Enum field choices must be a list + {"demo_choice": {"title": "Decisions", "type": "enum", "required": True, "default": 1, "choices": "eeee"}}, + {"demo_choice": {"choices": ['Expected a list of items but got type "str".']}}, + ), + ( + # Model field defaults must be existing model IDs + {"model": {"title": "Model to train", "type": "model", "default": "12341234-1234-1234-1234-123412341234"}}, + {"model": {"default": ["Model 12341234-1234-1234-1234-123412341234 not found."]}} + ), + ( + {"stringn't": {"title": "Stringn't", "type": "int", "multiline": True}}, + {"stringn't": {"multiline": ['The "multiline" field can only be set for a "string" type property.']}}, + ), + ( + # `multiline` should be a boolean + {"string": {"title": "String", "type": "string", "multiline": "only during the full moon"}}, + {"string": {"multiline": ["Must be a valid boolean."]}}, + ), + ] - def test_create_invalid_user_configuration_missing_key(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_integer": {"type": "int", "required": True, "default": 1} - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json(), - { - "configuration": { - "user_configuration": [{ - "demo_integer": { - "title": ["This field is required."] - } - }] - } - } - ) + self.client.force_login(self.superuser) - def test_create_invalid_user_configuration_invalid_key(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "demo_integer": { - "title": "an integer", - "type": "int", - "required": True, - "default": 1, - "some_key": "oh no", - } - } - }, - "gpu_usage": "disabled", - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), { - "configuration": { - "user_configuration": [ - { - "demo_integer": { - "some_key": ["Configurable properties can only be defined using the following keys: title, type, required, default, subtype, choices."] - } - } - ] - } - }) + for user_configuration, expected_errors in cases: + with self.subTest(user_configuration=user_configuration): + response = self.client.post( + reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), + data={ + "configuration": { + "user_configuration": user_configuration, + }, + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {"configuration": {"user_configuration": expected_errors}}) - def test_create_invalid_user_configuration_default_value(self): + def test_invalid_user_configuration_default_value(self): cases = [ ({"type": "int", "default": False}, "int"), ({"type": "int", "default": True}, "int"), @@ -922,21 +639,12 @@ class TestWorkerVersionCreate(FixtureAPITestCase): HTTP_AUTHORIZATION=f"Ponos {self.task.token}", ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"configuration": {"user_configuration": [{"param": {"default": [f"This is not a valid value for a field of type {expected}."]}}]}}) - - def test_create_user_configuration_model_default_doesnt_exist(self): - response = self.client.post( - reverse("api:worker-versions", kwargs={"pk": str(self.worker_dla.id)}), - data={ - "configuration": { - "user_configuration": { - "param": {"title": "Model to train", "type": "model", "default": "12341234-1234-1234-1234-123412341234"} + self.assertEqual(response.json(), { + "configuration": { + "user_configuration": { + "param": { + "default": [f"This is not a valid value for a field of type {expected}."], + }, + }, } - }, - }, - HTTP_AUTHORIZATION=f"Ponos {self.task.token}", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"configuration": {"user_configuration": [{"param": {"default": [ - "Model 12341234-1234-1234-1234-123412341234 not found." - ]}}]}}) + }) diff --git a/arkindex/process/tests/workers/test_list.py b/arkindex/process/tests/workers/test_list.py index ac7c5519d585a114c8ac511cbf7706b21b536dda..d159139ce644be7102ece52f47d30d00c6d6e7fd 100644 --- a/arkindex/process/tests/workers/test_list.py +++ b/arkindex/process/tests/workers/test_list.py @@ -24,6 +24,7 @@ class TestWorkerList(FixtureAPITestCase): cls.worker_file_import = Worker.objects.get(slug="file_import") cls.worker_custom = Worker.objects.get(slug="custom") cls.init_worker = Worker.objects.get(slug="initialisation") + cls.worker_pdf_export = Worker.objects.get(slug="pdf_export") cls.model = Model.objects.create( name="Generic model", @@ -42,7 +43,7 @@ class TestWorkerList(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { - "count": 7, + "count": 8, "next": None, "number": 1, "previous": None, @@ -92,6 +93,15 @@ class TestWorkerList(FixtureAPITestCase): "type": "recognizer", "archived": False, }, + { + "id": str(self.worker_pdf_export.id), + "repository_url": None, + "name": "PDF export worker", + "slug": "pdf_export", + "description": "", + "type": "export", + "archived": False, + }, { "id": str(self.worker_reco.id), "repository_url": None, @@ -360,7 +370,7 @@ class TestWorkerList(FixtureAPITestCase): { "archived": False, "description": "", - "id": "bf56381e-a669-4e61-8d19-fc0e23171463", + "id": str(self.init_worker.id), "name": "Elements Initialisation Worker", "repository_url": None, "slug": "initialisation", @@ -369,7 +379,7 @@ class TestWorkerList(FixtureAPITestCase): { "archived": False, "description": "", - "id": "f0f5787d-3e41-4d16-aa56-b84f64103c12", + "id": str(self.worker_file_import.id), "name": "File import", "repository_url": None, "slug": "file_import", diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 17e6cb49991d94775faabfdacc5d33845a9f6e06..17c22a99ebae0fec9fcef8e65d6254047f602906 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -70,6 +70,7 @@ from arkindex.process.api import ( DataFileCreate, DataFileList, DataFileRetrieve, + ExportProcess, FeatureWorkerVersionRetrieve, FilesProcess, ListProcessElements, @@ -271,6 +272,9 @@ api = [ path("process/<uuid:process>/set/<uuid:set>/", ProcessDatasetSetManage.as_view(), name="process-set"), path("process/<uuid:pk>/download/", ProcessArtifactDownload.as_view(), name="process-artifact-download"), + # Export processes + path("process/export/<uuid:corpus_id>/", ExportProcess.as_view(), name="export-process"), + # ML models training path("modelversion/<uuid:pk>/", ModelVersionsRetrieve.as_view(), name="model-version-retrieve"), path("models/", ModelsList.as_view(), name="models"), diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py index ac98c858d6c621278ace564a9d2e674143b78de1..3245ed056e0a6cb50ae67a26e7a3a79eeff2c377 100644 --- a/arkindex/project/aws.py +++ b/arkindex/project/aws.py @@ -30,6 +30,10 @@ def get_s3_resource( signature_version="s3v4", s3={ "addressing_style": "auto" if endpoint else "virtual", + }, + retries={ + "mode": "standard", + "max_attempts": settings.AWS_MAX_RETRIES, } ) @@ -106,6 +110,11 @@ class S3FileMixin: "Key": self.s3_object.key, }) + @property + def s3_filename(self): + """Optionally override the filename for downloading a file from its S3 URL""" + return + @property @requires_s3_object def s3_url(self) -> str: @@ -122,10 +131,13 @@ class S3FileMixin: ) else: client = s3.meta.client - return client.generate_presigned_url("get_object", Params={ + params = { "Bucket": self.s3_object.bucket_name, "Key": self.s3_object.key, - }) + } + if self.s3_filename is not None: + params["ResponseContentDisposition"] = f"attachment; filename={self.s3_filename}" + return client.generate_presigned_url("get_object", Params=params) def exists(self): """ diff --git a/arkindex/project/config.py b/arkindex/project/config.py index 47a70f380101a86397a550a4312a98e49eb46260..edcb4e3a4f736c881cd14a091047bf0707502569 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -209,6 +209,7 @@ def get_settings_parser(base_dir): parser.add_option("cache", default={}, type=cache_validator) s3_parser = add_s3_parser(parser, "s3") + s3_parser.add_option("max_retries", type=int, default=5) s3_parser.add_option("thumbnails_bucket", type=str, default="thumbnails") s3_parser.add_option("staging_bucket", type=str, default="staging") s3_parser.add_option("export_bucket", type=str, default="export") diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 1b2d950cbca60e711c9e2d0427f38fe3c00e1075..d3173a6245291737df01f330537e19716a2d166f 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -528,6 +528,7 @@ AWS_ACCESS_KEY = conf["s3"]["access_key_id"] AWS_SECRET_KEY = conf["s3"]["secret_access_key"] AWS_ENDPOINT = conf["s3"]["endpoint"] AWS_REGION = conf["s3"]["region"] +AWS_MAX_RETRIES = conf["s3"]["max_retries"] PONOS_S3_LOGS_BUCKET = conf["s3"]["ponos_logs_bucket"] PONOS_S3_ARTIFACTS_BUCKET = conf["s3"]["ponos_artifacts_bucket"] diff --git a/arkindex/project/tests/__init__.py b/arkindex/project/tests/__init__.py index 09261a5413c07f333709a585b426bfc6042fcc3c..f1bc9e90bae7adc959beb8f116aec90cfdfaad3d 100644 --- a/arkindex/project/tests/__init__.py +++ b/arkindex/project/tests/__init__.py @@ -13,7 +13,6 @@ from rest_framework.test import APITestCase from arkindex.documents.models import Corpus from arkindex.images.models import ImageServer -from arkindex.process.models import WorkerVersion from arkindex.users.models import Group, User @@ -119,10 +118,6 @@ class ArkindexTestMixin: # Clean content type cache for SQL requests checks consistency ContentType.objects.clear_cache() - # `cache_clear` is a function defined by the `functools.lru_cache` decorator - # on the function itself, not on its return value - WorkerVersion.objects.get_by_feature.cache_clear() - # Clear the local cached properties so that it is re-fetched on each test # to avoid intermittently changing query counts. # Using `del` on a cached property that has not been accessed yet can cause an AttributeError. diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index d2e108d675a9157f0a4040e4126485c1fd72e4d6..24aa0d51d4b66cab220df1ad131648ab26039348 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -81,6 +81,7 @@ s3: access_key_id: null endpoint: null export_bucket: export + max_retries: 5 ponos_artifacts_bucket: ponos-artifacts ponos_logs_bucket: ponos-logs region: null diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml index 5925b919da4f933f8c94df033bc368dc81a6e325..fd795eded5eb97f408d21b5263afabbb62e8f64e 100644 --- a/arkindex/project/tests/config_samples/errors.yaml +++ b/arkindex/project/tests/config_samples/errors.yaml @@ -62,6 +62,7 @@ redis: timeout: sauce s3: endpoint: null + max_retries: [1, 2] ponos_artifacts_bucket: {} ponos_logs_bucket: null region: null diff --git a/arkindex/project/tests/config_samples/expected_errors.yaml b/arkindex/project/tests/config_samples/expected_errors.yaml index b9ec8b030b4281343607231044d324cfa23d5dc3..97cd216bd5e9e28b0c3db8db72e49faa75379c7c 100644 --- a/arkindex/project/tests/config_samples/expected_errors.yaml +++ b/arkindex/project/tests/config_samples/expected_errors.yaml @@ -40,6 +40,8 @@ redis: port: "invalid literal for int() with base 10: 'over nine thousand'" db: "invalid literal for int() with base 10: 'idk'" timeout: "invalid literal for int() with base 10: 'sauce'" +s3: + max_retries: "int() argument must be a string, a bytes-like object or a real number, not 'list'" session: cookie_samesite: "'foo' is not a valid CookieSameSiteOption" signup_default_group: "badly formed hexadecimal UUID string" diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index 7a807a1fef29b69b3b84ce3a76a4bddb4d682e34..1de12bd3e8d5ab4153378b78a765ef46c79f682b 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -98,6 +98,7 @@ s3: access_key_id: abcd endpoint: http://somewhere export_bucket: exbort + max_retries: 99 ponos_artifacts_bucket: zstandardland ponos_logs_bucket: plaintexttown region: middle-earth-1 diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index 8e0014abadde244dc551610a28c3d2aa0f652a31..f2fdee66e994127c405854fa6e22033406d595c6 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -244,6 +244,7 @@ def notify_process_completion(process: Process): State.Failed: "with failures", State.Error: "with errors", State.Stopped: "because it was stopped", + State.Cancelled: "because it was cancelled", } if state in state_msg.keys(): process_name = process.name or str(process.id) diff --git a/arkindex/sql_validation/indexer_prefetch.sql b/arkindex/sql_validation/indexer_prefetch.sql index ce597800590c59c75215106ebf6e996cfbf9bf05..ccefaba5202f3df1bfb97d339a7ed8044282a9eb 100644 --- a/arkindex/sql_validation/indexer_prefetch.sql +++ b/arkindex/sql_validation/indexer_prefetch.sql @@ -56,7 +56,8 @@ SELECT "process_workerrun"."id", "process_workerrun"."created", "process_workerrun"."updated", "process_workerrun"."has_results", - "process_workerrun"."use_gpu" + "process_workerrun"."use_gpu", + "process_workerrun"."ttl" FROM "process_workerrun" WHERE "process_workerrun"."id" IN ('{worker_run_id}'::uuid); @@ -148,7 +149,8 @@ SELECT "process_workerrun"."id", "process_workerrun"."created", "process_workerrun"."updated", "process_workerrun"."has_results", - "process_workerrun"."use_gpu" + "process_workerrun"."use_gpu", + "process_workerrun"."ttl" FROM "process_workerrun" WHERE "process_workerrun"."id" IN ('{worker_run_id}'::uuid); diff --git a/arkindex/sql_validation/list_elements.sql b/arkindex/sql_validation/list_elements.sql index b04bd57d0d0afa1b16618f66a96be59d5d5c37b5..98e3284cfa465154415b194f505440a0ae88d3b5 100644 --- a/arkindex/sql_validation/list_elements.sql +++ b/arkindex/sql_validation/list_elements.sql @@ -40,7 +40,8 @@ SELECT "documents_element"."id", "process_workerrun"."created", "process_workerrun"."updated", "process_workerrun"."has_results", - "process_workerrun"."use_gpu" + "process_workerrun"."use_gpu", + "process_workerrun"."ttl" FROM "documents_element" LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid diff --git a/arkindex/system_workers.yml b/arkindex/system_workers.yml index 3d146aa7f65e41a0773b812f00393966ffc70e21..3a94f9635b51bc788ab053da20308d28fb3191ba 100644 --- a/arkindex/system_workers.yml +++ b/arkindex/system_workers.yml @@ -1,19 +1,19 @@ # When releasing Arkindex, check that the Docker images set here are up to date, # then update the `version` to the current Arkindex version as set in the `VERSION` file # to confirm that the images have been manually checked. -version: 1.6.6 +version: 1.7.0 features: file_import: - image: registry.gitlab.teklia.com/arkindex/workers/import/file:0.1.0 + image: registry.gitlab.teklia.com/arkindex/workers/import/file:0.1.1 init_elements: image: registry.gitlab.teklia.com/arkindex/workers/init-elements:0.1.1 command: worker-init-elements s3_ingest: image: registry.gitlab.teklia.com/arkindex/workers/import/s3:0.1.0 pdf_export: - image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0 + image: registry.gitlab.teklia.com/arkindex/workers/export:0.2.0 command: worker-export-pdf pagexml_export: - image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0 + image: registry.gitlab.teklia.com/arkindex/workers/export:0.2.0 command: worker-export-pagexml diff --git a/arkindex/training/models.py b/arkindex/training/models.py index 1e08f99d9f27965b213f8045cd5a05d2e86fee39..9181d627a1dd0cb073c611d3691d07200e24a5d6 100644 --- a/arkindex/training/models.py +++ b/arkindex/training/models.py @@ -1,6 +1,7 @@ import logging import uuid from hashlib import sha256 +from pathlib import Path from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation @@ -177,6 +178,11 @@ class ModelVersion(S3FileMixin, IndexableModel): """ return ModelVersion.objects.executable(user).filter(id=self.id).exists() + @property + def s3_filename(self): + stem = Path(self.s3_object.key).stem + return f"{stem}.tar.zst" + def __str__(self): if self.tag: return f"{self.model.name} @ {self.tag} ({self.truncated_id}…)" diff --git a/arkindex/training/tests/test_model_api.py b/arkindex/training/tests/test_model_api.py index 114abe1b9735560bf5e3b033b3021e01fc470674..2ea04c1bdf83d6485c41591b96c862ebd4f78136 100644 --- a/arkindex/training/tests/test_model_api.py +++ b/arkindex/training/tests/test_model_api.py @@ -1,4 +1,5 @@ from unittest.mock import call, patch +from urllib.parse import parse_qs, urlparse from uuid import uuid4 from django.urls import reverse @@ -376,6 +377,10 @@ class TestModelAPI(FixtureAPITestCase): response = self.client.get(reverse("api:model-version-download", kwargs={"pk": str(self.model_version5.id)}), {"token": token}) self.assertEqual(response.status_code, status.HTTP_302_FOUND) self.assertEqual(response.headers["Location"], self.model_version5.s3_url) + query_params = parse_qs(urlparse(response.headers["Location"]).query) + self.assertEqual(query_params.get("response-content-disposition"), [ + f"attachment; filename={self.model_version5.id}.tar.zst" + ]) # RetrieveModel diff --git a/requirements.txt b/requirements.txt index ae8ea01d0c5f3ac781f225c883558f6c75a02c7c..fdfda8d85c4c1f1858e6c14a93588fde9d5aa9b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ bleach==6.0.0 django-admin-hstore-widget==1.2.1 django-cors-headers==3.14.0 -django-enumfields==2.1.1 +django-enumfields2==3.0.2 django-pgtrigger==4.7.0 django-rq==2.10.1 djangorestframework==3.15.2