diff --git a/arkindex/process/management/commands/update_system_workers.py b/arkindex/process/management/commands/update_system_workers.py index 803a4e3fef4272d7af593deb9aa183a9cbfeb141..49949726e778305b7f38c53beaa7520df947f495 100644 --- a/arkindex/process/management/commands/update_system_workers.py +++ b/arkindex/process/management/commands/update_system_workers.py @@ -90,6 +90,9 @@ class Command(BaseCommand): """ assert worker.archived is None, "Cannot assign a version on an archived worker to a feature" + # Since there can be worker versions using the same Docker image but with different commands, + # we need to filter by command as well as by Docker image. + docker_command_filter = {"configuration__docker__command": docker_command} if docker_command else {"configuration__docker__command__isnull": True} # docker_image_iid is not unique, so we sort by state and take the most recent one. # Sorting by state means we prefer `available` versions first, then `created`, then `error`. worker_version = worker.versions.filter( @@ -99,6 +102,7 @@ class Command(BaseCommand): configuration__user_configuration__isnull=True, gpu_usage=FeatureUsage.Disabled, model_usage=FeatureUsage.Disabled, + **docker_command_filter ).order_by("state", "-updated").first() if worker_version: diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 9d437382c17dd0d2802bec1281a33117bc57dccd..4dec68a471c76f76d147fec5b032402f22b19da6 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -666,6 +666,8 @@ class ArkindexFeature(Enum): InitElements = "init_elements" FileImport = "file_import" S3Ingest = "s3_ingest" + ExportPDF = "pdf_export" + ExportPageXML = "pagexml_export" class WorkerVersion(models.Model): diff --git a/arkindex/process/tests/commands/test_update_system_workers.py b/arkindex/process/tests/commands/test_update_system_workers.py index ba8ea9c87bb68c79b2583052cb43956813cdee3a..30736c395469d251ef5a4457ec545d29d7600336 100644 --- a/arkindex/process/tests/commands/test_update_system_workers.py +++ b/arkindex/process/tests/commands/test_update_system_workers.py @@ -13,7 +13,9 @@ MOCK_CONFIG = { "file_import": {"image": "registry.example.com/file-import:latest", "command": None}, "init_elements": {"image": "registry.example.com/init-elements:latest", "command": "init-elements"}, - "s3_ingest": {"image": "registry.example.com/s3-ingest:latest", "command": None} + "s3_ingest": {"image": "registry.example.com/s3-ingest:latest", "command": None}, + "pdf_export": {"image": "registry.example.com/file-export:latest", "command": "worker-export-pdf"}, + "pagexml_export": {"image": "registry.example.com/file-export:latest", "command": "worker-export-pagexml"} } } @@ -66,6 +68,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): s3_ingest_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest) self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + + pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML) + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) + # Check the attributes on the new workers self.assertEqual(file_import_version.worker.name, "FileImport") self.assertEqual(file_import_version.worker.slug, "file_import") @@ -76,6 +84,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertEqual(s3_ingest_version.worker.name, "S3Ingest") self.assertEqual(s3_ingest_version.worker.slug, "s3_ingest") self.assertEqual(s3_ingest_version.worker.type, worker_type) + self.assertEqual(pdf_export_version.worker.name, "ExportPDF") + self.assertEqual(pdf_export_version.worker.slug, "pdf_export") + self.assertEqual(pdf_export_version.worker.type, worker_type) + self.assertEqual(pagexml_export_version.worker.name, "ExportPageXML") + self.assertEqual(pagexml_export_version.worker.slug, "pagexml_export") + self.assertEqual(pagexml_export_version.worker.type, worker_type) self.assertEqual(output, dedent( f""" @@ -95,6 +109,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Created new S3Ingest system worker Creating new worker version Using new worker version {s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Created new ExportPDF system worker + Creating new worker version + Using new worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Created new ExportPageXML system worker + Creating new worker version + Using new worker version {pagexml_export_version.id} """ ).strip()) @@ -115,6 +139,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): s3_ingest_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest) self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + + pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML) + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) + # Check the attributes on the two new workers self.assertEqual(init_elements_version.worker.name, "InitElements") self.assertEqual(init_elements_version.worker.slug, "init_elements") @@ -122,6 +152,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertEqual(s3_ingest_version.worker.name, "S3Ingest") self.assertEqual(s3_ingest_version.worker.slug, "s3_ingest") self.assertEqual(s3_ingest_version.worker.type, worker_type) + self.assertEqual(pdf_export_version.worker.name, "ExportPDF") + self.assertEqual(pdf_export_version.worker.slug, "pdf_export") + self.assertEqual(pdf_export_version.worker.type, worker_type) + self.assertEqual(pagexml_export_version.worker.name, "ExportPageXML") + self.assertEqual(pagexml_export_version.worker.slug, "pagexml_export") + self.assertEqual(pagexml_export_version.worker.type, worker_type) self.assertEqual(output, dedent( f""" @@ -140,6 +176,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Created new S3Ingest system worker Creating new worker version Using new worker version {s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Created new ExportPDF system worker + Creating new worker version + Using new worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Created new ExportPageXML system worker + Creating new worker version + Using new worker version {pagexml_export_version.id} """ ).strip()) @@ -164,11 +210,23 @@ class TestUpdateSystemWorkers(ArkindexTestCase): name="S3, Ingest of the", public=True, ) + pdf_export_worker = Worker.objects.create( + type=worker_type, + slug="pdf_export", + name="PDF, export of the", + public=True + ) + pagexml_export_worker = Worker.objects.create( + type=worker_type, + slug="pagexml_export", + name="Page XML, der Export", + public=True + ) output = self.update_system_workers() self.assertEqual(WorkerType.objects.count(), 1) - self.assertEqual(Worker.objects.count(), 3) + self.assertEqual(Worker.objects.count(), 5) file_import_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) self.assertEqual(file_import_version.worker, file_import_worker) @@ -182,6 +240,14 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertEqual(s3_ingest_version.worker, s3_ingest_worker) self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + self.assertEqual(pdf_export_version.worker, pdf_export_worker) + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + + pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML) + self.assertEqual(pagexml_export_version.worker, pagexml_export_worker) + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) + self.assertEqual(output, dedent( f""" ────────────────────────────────── FileImport ────────────────────────────────── @@ -202,6 +268,18 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Worker is up to date Creating new worker version Using new worker version {s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Using existing system worker PDF, export of the + Worker is up to date + Creating new worker version + Using new worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Using existing system worker Page XML, der Export + Worker is up to date + Creating new worker version + Using new worker version {pagexml_export_version.id} """ ).strip()) @@ -261,7 +339,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): output = self.update_system_workers() self.assertEqual(WorkerType.objects.count(), 1) - self.assertEqual(Worker.objects.count(), 3) + self.assertEqual(Worker.objects.count(), 5) file_import_worker.refresh_from_db() self.assertEqual(file_import_worker.name, "Sir File-a-Lot") @@ -296,6 +374,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertEqual(s3_ingest_version.worker, s3_ingest_worker) self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + + pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML) + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) + self.assertEqual(output, dedent( f""" ────────────────────────────────── FileImport ────────────────────────────────── @@ -316,6 +400,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Unarchiving worker Creating new worker version Using new worker version {s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Created new ExportPDF system worker + Creating new worker version + Using new worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Created new ExportPageXML system worker + Creating new worker version + Using new worker version {pagexml_export_version.id} """ ).strip()) @@ -344,6 +438,18 @@ class TestUpdateSystemWorkers(ArkindexTestCase): name="S3, Ingest of the", public=True, ) + pdf_export_worker = Worker.objects.create( + type=worker_type, + slug="pdf_export", + name="PDF, export of the", + public=True + ) + pagexml_export_worker = Worker.objects.create( + type=worker_type, + slug="pagexml_export", + name="Page XML, der Export", + public=True + ) # Those versions use the correct image, but do not have acceptable attributes init_elements_worker.versions.create( @@ -374,6 +480,11 @@ class TestUpdateSystemWorkers(ArkindexTestCase): version=4, model_usage=FeatureUsage.Required, ) + pdf_export_worker.versions.create( + docker_image_iid="registry.example.com/file-export:latest", + version=2, + configuration={"docker": {"command": "something"}} + ) # These versions should be assigned to the features file_import_version = file_import_worker.versions.create( @@ -385,12 +496,23 @@ class TestUpdateSystemWorkers(ArkindexTestCase): docker_image_iid="registry.example.com/init-elements:latest", version=5, state=WorkerVersionState.Available, + configuration={"docker": {"command": "init-elements"}}, ) s3_ingest_version = s3_ingest_worker.versions.create( docker_image_iid="registry.example.com/s3-ingest:latest", version=1, state=WorkerVersionState.Created, ) + pdf_export_version = pdf_export_worker.versions.create( + docker_image_iid="registry.example.com/file-export:latest", + version=1, + configuration={"docker": {"command": "worker-export-pdf"}} + ) + pagexml_export_version = pagexml_export_worker.versions.create( + docker_image_iid="registry.example.com/file-export:latest", + version=1, + configuration={"docker": {"command": "worker-export-pagexml"}} + ) self.assertEqual(self.update_system_workers(), dedent( f""" @@ -414,16 +536,34 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Assigning existing worker version {s3_ingest_version.id} to the feature Marking the worker version as available Using existing worker version {s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Using existing system worker PDF, export of the + Worker is up to date + Assigning existing worker version {pdf_export_version.id} to the feature + Setup docker command: worker-export-pdf + Marking the worker version as available + Using existing worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Using existing system worker Page XML, der Export + Worker is up to date + Assigning existing worker version {pagexml_export_version.id} to the feature + Setup docker command: worker-export-pagexml + Marking the worker version as available + Using existing worker version {pagexml_export_version.id} """ ).strip()) self.assertEqual(WorkerType.objects.count(), 1) - self.assertEqual(Worker.objects.count(), 3) - self.assertEqual(WorkerVersion.objects.count(), 7) + self.assertEqual(Worker.objects.count(), 5) + self.assertEqual(WorkerVersion.objects.count(), 10) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport), file_import_version) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements), init_elements_version) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest), s3_ingest_version) + self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF), pdf_export_version) + self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML), pagexml_export_version) file_import_version.refresh_from_db() self.check_feature_version(file_import_version, ArkindexFeature.FileImport) @@ -431,6 +571,10 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.check_feature_version(init_elements_version, ArkindexFeature.InitElements) s3_ingest_version.refresh_from_db() self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version.refresh_from_db() + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + pagexml_export_version.refresh_from_db() + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) def test_unassigns_incompatible_versions(self): @@ -496,12 +640,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertEqual(system_worker_type.slug, "system") self.assertEqual(system_worker_type.display_name, "System") - self.assertEqual(Worker.objects.count(), 4) + self.assertEqual(Worker.objects.count(), 6) new_s3_ingest_worker = Worker.objects.get(slug="s3_ingest") self.assertEqual(new_s3_ingest_worker.type, system_worker_type) self.assertEqual(new_s3_ingest_worker.name, "S3Ingest") - self.assertEqual(WorkerVersion.objects.count(), 6) + self.assertEqual(WorkerVersion.objects.count(), 8) new_file_import_version = file_import_worker.versions.get(version=2) self.check_feature_version(new_file_import_version, ArkindexFeature.FileImport) new_init_elements_version = init_elements_worker.versions.get(version=2) @@ -509,6 +653,9 @@ class TestUpdateSystemWorkers(ArkindexTestCase): new_s3_ingest_version = new_s3_ingest_worker.versions.get() self.check_feature_version(new_s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF) + pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML) + self.assertEqual(output, dedent( f""" ────────────────────────────────── FileImport ────────────────────────────────── @@ -538,6 +685,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Created new S3Ingest system worker Creating new worker version Using new worker version {new_s3_ingest_version.id} + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Created new ExportPDF system worker + Creating new worker version + Using new worker version {pdf_export_version.id} + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Created new ExportPageXML system worker + Creating new worker version + Using new worker version {pagexml_export_version.id} """ ).strip()) @@ -565,6 +722,20 @@ class TestUpdateSystemWorkers(ArkindexTestCase): version=3, state=WorkerVersionState.Available, ) + pdf_export_version = worker.versions.create( + feature=ArkindexFeature.ExportPDF, + docker_image_iid="registry.example.com/file-export:latest", + configuration={"docker": {"command": "worker-export-pdf"}}, + version=4, + state=WorkerVersionState.Available + ) + pagexml_export_version = worker.versions.create( + feature=ArkindexFeature.ExportPageXML, + docker_image_iid="registry.example.com/file-export:latest", + configuration={"docker": {"command": "worker-export-pagexml"}}, + version=5, + state=WorkerVersionState.Available + ) self.assertEqual(self.update_system_workers(), dedent( f""" @@ -584,16 +755,30 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Current worker version: {s3_ingest_version.id} (registry.example.com/s3-ingest:latest) Worker is up to date Worker version for S3Ingest is up to date + ────────────────────────────────── ExportPDF ─────────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPDF + Current worker version: {pdf_export_version.id} (registry.example.com/file-export:latest) + This version uses the required Docker command. + Worker is up to date + Worker version for ExportPDF is up to date + ──────────────────────────────── ExportPageXML ───────────────────────────────── + Using registry.example.com/file-export:latest to provide ExportPageXML + Current worker version: {pagexml_export_version.id} (registry.example.com/file-export:latest) + This version uses the required Docker command. + Worker is up to date + Worker version for ExportPageXML is up to date """ ).strip()) self.assertEqual(WorkerType.objects.count(), 1) self.assertEqual(Worker.objects.count(), 1) - self.assertEqual(WorkerVersion.objects.count(), 3) + self.assertEqual(WorkerVersion.objects.count(), 5) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport), file_import_version) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements), init_elements_version) self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest), s3_ingest_version) + self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF), pdf_export_version) + self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML), pagexml_export_version) file_import_version.refresh_from_db() self.check_feature_version(file_import_version, ArkindexFeature.FileImport) @@ -601,3 +786,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.check_feature_version(init_elements_version, ArkindexFeature.InitElements) s3_ingest_version.refresh_from_db() self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest) + pdf_export_version.refresh_from_db() + self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) + pagexml_export_version.refresh_from_db() + self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) diff --git a/arkindex/system_workers.yml b/arkindex/system_workers.yml index 1ae20e3a4c9d30f89f7da7e1ce9d237c3a593650..bfccfe16c41d8e4b1ba6758b483e2449b140ffd2 100644 --- a/arkindex/system_workers.yml +++ b/arkindex/system_workers.yml @@ -11,3 +11,9 @@ features: command: worker-init-elements s3_ingest: image: registry.gitlab.teklia.com/arkindex/workers/import/s3:0.1.0 + pdf_export: + image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0 + command: worker-export-pdf + pagexml_export: + image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0 + command: worker-export-pagexml