Skip to content
Snippets Groups Projects
Commit 78f4ff80 authored by ml bonhomme's avatar ml bonhomme :bee: Committed by Erwan Rouchet
Browse files

Add PDF and Page XML export Arkindex features

parent 391be3d0
No related branches found
No related tags found
1 merge request!2473Add PDF and Page XML export Arkindex features
......@@ -90,6 +90,9 @@ class Command(BaseCommand):
"""
assert worker.archived is None, "Cannot assign a version on an archived worker to a feature"
# Since there can be worker versions using the same Docker image but with different commands,
# we need to filter by command as well as by Docker image.
docker_command_filter = {"configuration__docker__command": docker_command} if docker_command else {"configuration__docker__command__isnull": True}
# docker_image_iid is not unique, so we sort by state and take the most recent one.
# Sorting by state means we prefer `available` versions first, then `created`, then `error`.
worker_version = worker.versions.filter(
......@@ -99,6 +102,7 @@ class Command(BaseCommand):
configuration__user_configuration__isnull=True,
gpu_usage=FeatureUsage.Disabled,
model_usage=FeatureUsage.Disabled,
**docker_command_filter
).order_by("state", "-updated").first()
if worker_version:
......
......@@ -666,6 +666,8 @@ class ArkindexFeature(Enum):
InitElements = "init_elements"
FileImport = "file_import"
S3Ingest = "s3_ingest"
ExportPDF = "pdf_export"
ExportPageXML = "pagexml_export"
class WorkerVersion(models.Model):
......
......@@ -13,7 +13,9 @@ MOCK_CONFIG = {
"file_import": {"image": "registry.example.com/file-import:latest", "command": None},
"init_elements":
{"image": "registry.example.com/init-elements:latest", "command": "init-elements"},
"s3_ingest": {"image": "registry.example.com/s3-ingest:latest", "command": None}
"s3_ingest": {"image": "registry.example.com/s3-ingest:latest", "command": None},
"pdf_export": {"image": "registry.example.com/file-export:latest", "command": "worker-export-pdf"},
"pagexml_export": {"image": "registry.example.com/file-export:latest", "command": "worker-export-pagexml"}
}
}
......@@ -66,6 +68,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
s3_ingest_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest)
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF)
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML)
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
# Check the attributes on the new workers
self.assertEqual(file_import_version.worker.name, "FileImport")
self.assertEqual(file_import_version.worker.slug, "file_import")
......@@ -76,6 +84,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.assertEqual(s3_ingest_version.worker.name, "S3Ingest")
self.assertEqual(s3_ingest_version.worker.slug, "s3_ingest")
self.assertEqual(s3_ingest_version.worker.type, worker_type)
self.assertEqual(pdf_export_version.worker.name, "ExportPDF")
self.assertEqual(pdf_export_version.worker.slug, "pdf_export")
self.assertEqual(pdf_export_version.worker.type, worker_type)
self.assertEqual(pagexml_export_version.worker.name, "ExportPageXML")
self.assertEqual(pagexml_export_version.worker.slug, "pagexml_export")
self.assertEqual(pagexml_export_version.worker.type, worker_type)
self.assertEqual(output, dedent(
f"""
......@@ -95,6 +109,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Created new S3Ingest system worker
Creating new worker version
Using new worker version {s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Created new ExportPDF system worker
Creating new worker version
Using new worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Created new ExportPageXML system worker
Creating new worker version
Using new worker version {pagexml_export_version.id}
"""
).strip())
......@@ -115,6 +139,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
s3_ingest_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest)
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF)
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML)
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
# Check the attributes on the two new workers
self.assertEqual(init_elements_version.worker.name, "InitElements")
self.assertEqual(init_elements_version.worker.slug, "init_elements")
......@@ -122,6 +152,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.assertEqual(s3_ingest_version.worker.name, "S3Ingest")
self.assertEqual(s3_ingest_version.worker.slug, "s3_ingest")
self.assertEqual(s3_ingest_version.worker.type, worker_type)
self.assertEqual(pdf_export_version.worker.name, "ExportPDF")
self.assertEqual(pdf_export_version.worker.slug, "pdf_export")
self.assertEqual(pdf_export_version.worker.type, worker_type)
self.assertEqual(pagexml_export_version.worker.name, "ExportPageXML")
self.assertEqual(pagexml_export_version.worker.slug, "pagexml_export")
self.assertEqual(pagexml_export_version.worker.type, worker_type)
self.assertEqual(output, dedent(
f"""
......@@ -140,6 +176,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Created new S3Ingest system worker
Creating new worker version
Using new worker version {s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Created new ExportPDF system worker
Creating new worker version
Using new worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Created new ExportPageXML system worker
Creating new worker version
Using new worker version {pagexml_export_version.id}
"""
).strip())
......@@ -164,11 +210,23 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
name="S3, Ingest of the",
public=True,
)
pdf_export_worker = Worker.objects.create(
type=worker_type,
slug="pdf_export",
name="PDF, export of the",
public=True
)
pagexml_export_worker = Worker.objects.create(
type=worker_type,
slug="pagexml_export",
name="Page XML, der Export",
public=True
)
output = self.update_system_workers()
self.assertEqual(WorkerType.objects.count(), 1)
self.assertEqual(Worker.objects.count(), 3)
self.assertEqual(Worker.objects.count(), 5)
file_import_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport)
self.assertEqual(file_import_version.worker, file_import_worker)
......@@ -182,6 +240,14 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.assertEqual(s3_ingest_version.worker, s3_ingest_worker)
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF)
self.assertEqual(pdf_export_version.worker, pdf_export_worker)
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML)
self.assertEqual(pagexml_export_version.worker, pagexml_export_worker)
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
self.assertEqual(output, dedent(
f"""
────────────────────────────────── FileImport ──────────────────────────────────
......@@ -202,6 +268,18 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Worker is up to date
Creating new worker version
Using new worker version {s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Using existing system worker PDF, export of the
Worker is up to date
Creating new worker version
Using new worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Using existing system worker Page XML, der Export
Worker is up to date
Creating new worker version
Using new worker version {pagexml_export_version.id}
"""
).strip())
......@@ -261,7 +339,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
output = self.update_system_workers()
self.assertEqual(WorkerType.objects.count(), 1)
self.assertEqual(Worker.objects.count(), 3)
self.assertEqual(Worker.objects.count(), 5)
file_import_worker.refresh_from_db()
self.assertEqual(file_import_worker.name, "Sir File-a-Lot")
......@@ -296,6 +374,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.assertEqual(s3_ingest_version.worker, s3_ingest_worker)
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF)
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML)
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
self.assertEqual(output, dedent(
f"""
────────────────────────────────── FileImport ──────────────────────────────────
......@@ -316,6 +400,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Unarchiving worker
Creating new worker version
Using new worker version {s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Created new ExportPDF system worker
Creating new worker version
Using new worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Created new ExportPageXML system worker
Creating new worker version
Using new worker version {pagexml_export_version.id}
"""
).strip())
......@@ -344,6 +438,18 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
name="S3, Ingest of the",
public=True,
)
pdf_export_worker = Worker.objects.create(
type=worker_type,
slug="pdf_export",
name="PDF, export of the",
public=True
)
pagexml_export_worker = Worker.objects.create(
type=worker_type,
slug="pagexml_export",
name="Page XML, der Export",
public=True
)
# Those versions use the correct image, but do not have acceptable attributes
init_elements_worker.versions.create(
......@@ -374,6 +480,11 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
version=4,
model_usage=FeatureUsage.Required,
)
pdf_export_worker.versions.create(
docker_image_iid="registry.example.com/file-export:latest",
version=2,
configuration={"docker": {"command": "something"}}
)
# These versions should be assigned to the features
file_import_version = file_import_worker.versions.create(
......@@ -385,12 +496,23 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
docker_image_iid="registry.example.com/init-elements:latest",
version=5,
state=WorkerVersionState.Available,
configuration={"docker": {"command": "init-elements"}},
)
s3_ingest_version = s3_ingest_worker.versions.create(
docker_image_iid="registry.example.com/s3-ingest:latest",
version=1,
state=WorkerVersionState.Created,
)
pdf_export_version = pdf_export_worker.versions.create(
docker_image_iid="registry.example.com/file-export:latest",
version=1,
configuration={"docker": {"command": "worker-export-pdf"}}
)
pagexml_export_version = pagexml_export_worker.versions.create(
docker_image_iid="registry.example.com/file-export:latest",
version=1,
configuration={"docker": {"command": "worker-export-pagexml"}}
)
self.assertEqual(self.update_system_workers(), dedent(
f"""
......@@ -414,16 +536,34 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Assigning existing worker version {s3_ingest_version.id} to the feature
Marking the worker version as available
Using existing worker version {s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Using existing system worker PDF, export of the
Worker is up to date
Assigning existing worker version {pdf_export_version.id} to the feature
Setup docker command: worker-export-pdf
Marking the worker version as available
Using existing worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Using existing system worker Page XML, der Export
Worker is up to date
Assigning existing worker version {pagexml_export_version.id} to the feature
Setup docker command: worker-export-pagexml
Marking the worker version as available
Using existing worker version {pagexml_export_version.id}
"""
).strip())
self.assertEqual(WorkerType.objects.count(), 1)
self.assertEqual(Worker.objects.count(), 3)
self.assertEqual(WorkerVersion.objects.count(), 7)
self.assertEqual(Worker.objects.count(), 5)
self.assertEqual(WorkerVersion.objects.count(), 10)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport), file_import_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements), init_elements_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest), s3_ingest_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF), pdf_export_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML), pagexml_export_version)
file_import_version.refresh_from_db()
self.check_feature_version(file_import_version, ArkindexFeature.FileImport)
......@@ -431,6 +571,10 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.check_feature_version(init_elements_version, ArkindexFeature.InitElements)
s3_ingest_version.refresh_from_db()
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version.refresh_from_db()
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version.refresh_from_db()
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
@patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG)
def test_unassigns_incompatible_versions(self):
......@@ -496,12 +640,12 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.assertEqual(system_worker_type.slug, "system")
self.assertEqual(system_worker_type.display_name, "System")
self.assertEqual(Worker.objects.count(), 4)
self.assertEqual(Worker.objects.count(), 6)
new_s3_ingest_worker = Worker.objects.get(slug="s3_ingest")
self.assertEqual(new_s3_ingest_worker.type, system_worker_type)
self.assertEqual(new_s3_ingest_worker.name, "S3Ingest")
self.assertEqual(WorkerVersion.objects.count(), 6)
self.assertEqual(WorkerVersion.objects.count(), 8)
new_file_import_version = file_import_worker.versions.get(version=2)
self.check_feature_version(new_file_import_version, ArkindexFeature.FileImport)
new_init_elements_version = init_elements_worker.versions.get(version=2)
......@@ -509,6 +653,9 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
new_s3_ingest_version = new_s3_ingest_worker.versions.get()
self.check_feature_version(new_s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF)
pagexml_export_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML)
self.assertEqual(output, dedent(
f"""
────────────────────────────────── FileImport ──────────────────────────────────
......@@ -538,6 +685,16 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Created new S3Ingest system worker
Creating new worker version
Using new worker version {new_s3_ingest_version.id}
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Created new ExportPDF system worker
Creating new worker version
Using new worker version {pdf_export_version.id}
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Created new ExportPageXML system worker
Creating new worker version
Using new worker version {pagexml_export_version.id}
"""
).strip())
......@@ -565,6 +722,20 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
version=3,
state=WorkerVersionState.Available,
)
pdf_export_version = worker.versions.create(
feature=ArkindexFeature.ExportPDF,
docker_image_iid="registry.example.com/file-export:latest",
configuration={"docker": {"command": "worker-export-pdf"}},
version=4,
state=WorkerVersionState.Available
)
pagexml_export_version = worker.versions.create(
feature=ArkindexFeature.ExportPageXML,
docker_image_iid="registry.example.com/file-export:latest",
configuration={"docker": {"command": "worker-export-pagexml"}},
version=5,
state=WorkerVersionState.Available
)
self.assertEqual(self.update_system_workers(), dedent(
f"""
......@@ -584,16 +755,30 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
Current worker version: {s3_ingest_version.id} (registry.example.com/s3-ingest:latest)
Worker is up to date
Worker version for S3Ingest is up to date
────────────────────────────────── ExportPDF ───────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPDF
Current worker version: {pdf_export_version.id} (registry.example.com/file-export:latest)
This version uses the required Docker command.
Worker is up to date
Worker version for ExportPDF is up to date
──────────────────────────────── ExportPageXML ─────────────────────────────────
Using registry.example.com/file-export:latest to provide ExportPageXML
Current worker version: {pagexml_export_version.id} (registry.example.com/file-export:latest)
This version uses the required Docker command.
Worker is up to date
Worker version for ExportPageXML is up to date
"""
).strip())
self.assertEqual(WorkerType.objects.count(), 1)
self.assertEqual(Worker.objects.count(), 1)
self.assertEqual(WorkerVersion.objects.count(), 3)
self.assertEqual(WorkerVersion.objects.count(), 5)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport), file_import_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.InitElements), init_elements_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.S3Ingest), s3_ingest_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPDF), pdf_export_version)
self.assertEqual(WorkerVersion.objects.get_by_feature(ArkindexFeature.ExportPageXML), pagexml_export_version)
file_import_version.refresh_from_db()
self.check_feature_version(file_import_version, ArkindexFeature.FileImport)
......@@ -601,3 +786,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase):
self.check_feature_version(init_elements_version, ArkindexFeature.InitElements)
s3_ingest_version.refresh_from_db()
self.check_feature_version(s3_ingest_version, ArkindexFeature.S3Ingest)
pdf_export_version.refresh_from_db()
self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF)
pagexml_export_version.refresh_from_db()
self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML)
......@@ -11,3 +11,9 @@ features:
command: worker-init-elements
s3_ingest:
image: registry.gitlab.teklia.com/arkindex/workers/import/s3:0.1.0
pdf_export:
image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0
command: worker-export-pdf
pagexml_export:
image: registry.gitlab.teklia.com/arkindex/workers/export:0.1.0
command: worker-export-pagexml
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment