Merge branch 'fix-wk-version-ids' into 'main'

Fix worker_run/version IDs See merge request workers/generic-training-dataset!18

Merge branch 'fix-wk-version-ids' into 'main'
9ca910ea · Eva Bardou · 5003366e · 13b801c7 · 9ca910ea
Commit 9ca910ea authored 1 year ago by Eva Bardou
--- a/worker_generic_training_dataset/worker.py
+++ b/worker_generic_training_dataset/worker.py
@@ -11,7 +11,7 @@ from typing import List, Optional
 from uuid import UUID

 from apistar.exceptions import ErrorResponse
-from arkindex_export import Element, open_database
+from arkindex_export import Element, WorkerRun, WorkerVersion, open_database
 from arkindex_export.queries import list_children
 from arkindex_worker.cache import (
    CachedClassification,
@@ -32,6 +32,7 @@ from arkindex_worker.models import Dataset
 from arkindex_worker.models import Element as WorkerElement
 from arkindex_worker.utils import create_tar_zst_archive
 from arkindex_worker.worker import DatasetWorker
+from peewee import CharField
 from worker_generic_training_dataset.db import (
    list_classifications,
    list_transcription_entities,
@@ -50,6 +51,10 @@ def _format_element(element: WorkerElement) -> Element:
    return retrieve_element(element.id)


+def get_object_id(instance: WorkerVersion | WorkerRun | None) -> CharField | None:
+    return instance.id if instance else None
+
+
 class DatasetExtractor(DatasetWorker):
    def configure(self) -> None:
        self.args: Namespace = self.parser.parse_args()
@@ -147,7 +152,7 @@ class DatasetExtractor(DatasetWorker):
                class_name=classification.class_name,
                confidence=classification.confidence,
                state=classification.state,
-                worker_run_id=classification.worker_run,
+                worker_run_id=get_object_id(classification.worker_run),
            )
            for classification in list_classifications(element.id)
        ]
@@ -170,8 +175,8 @@ class DatasetExtractor(DatasetWorker):
                text=transcription.text,
                confidence=transcription.confidence,
                orientation=DEFAULT_TRANSCRIPTION_ORIENTATION,
-                worker_version_id=transcription.worker_version,
-                worker_run_id=transcription.worker_run,
+                worker_version_id=get_object_id(transcription.worker_version),
+                worker_run_id=get_object_id(transcription.worker_run),
            )
            for transcription in list_transcriptions(element.id)
        ]
@@ -196,7 +201,7 @@ class DatasetExtractor(DatasetWorker):
                    name=transcription_entity.entity.name,
                    validated=transcription_entity.entity.validated,
                    metas=transcription_entity.entity.metas,
-                    worker_run_id=transcription_entity.entity.worker_run,
+                    worker_run_id=get_object_id(transcription_entity.entity.worker_run),
                )
                entities.append(entity)
                transcription_entities.append(
@@ -207,7 +212,7 @@ class DatasetExtractor(DatasetWorker):
                        offset=transcription_entity.offset,
                        length=transcription_entity.length,
                        confidence=transcription_entity.confidence,
-                        worker_run_id=transcription_entity.worker_run,
+                        worker_run_id=get_object_id(transcription_entity.worker_run),
                    )
                )
        if entities:
@@ -279,8 +284,8 @@ class DatasetExtractor(DatasetWorker):
                polygon=element.polygon,
                rotation_angle=element.rotation_angle,
                mirrored=element.mirrored,
-                worker_version_id=element.worker_version,
-                worker_run_id=element.worker_run,
+                worker_version_id=get_object_id(element.worker_version),
+                worker_run_id=get_object_id(element.worker_run),
                confidence=element.confidence,
            )