From 0c2bf11efd39dcb0afe02b5dfdb23af8ce163c8e Mon Sep 17 00:00:00 2001 From: Yoann Schneider <yschneider@teklia.com> Date: Mon, 26 Sep 2022 10:15:39 +0000 Subject: [PATCH] Add worker_run_id on publication fields and in cache --- arkindex_worker/cache.py | 5 ++ arkindex_worker/worker/classification.py | 30 +++++++--- arkindex_worker/worker/element.py | 3 + arkindex_worker/worker/entity.py | 3 + arkindex_worker/worker/metadata.py | 1 + arkindex_worker/worker/transcription.py | 7 +++ tests/test_cache.py | 10 ++-- .../test_classifications.py | 57 ++++++++++++++++++- tests/test_elements_worker/test_elements.py | 12 ++++ tests/test_elements_worker/test_entities.py | 14 +++++ tests/test_elements_worker/test_metadata.py | 5 +- .../test_transcriptions.py | 37 ++++++++++++ 12 files changed, 167 insertions(+), 17 deletions(-) diff --git a/arkindex_worker/cache.py b/arkindex_worker/cache.py index 4baa14e5..fe7a4d62 100644 --- a/arkindex_worker/cache.py +++ b/arkindex_worker/cache.py @@ -95,6 +95,7 @@ class CachedElement(Model): mirrored = BooleanField(default=False) initial = BooleanField(default=False) worker_version_id = UUIDField(null=True) + worker_run_id = UUIDField(null=True) confidence = FloatField(null=True) class Meta: @@ -173,6 +174,7 @@ class CachedTranscription(Model): confidence = FloatField() orientation = CharField(max_length=50) worker_version_id = UUIDField(null=True) + worker_run_id = UUIDField(null=True) class Meta: database = db @@ -190,6 +192,7 @@ class CachedClassification(Model): confidence = FloatField() state = CharField(max_length=10) worker_version_id = UUIDField(null=True) + worker_run_id = UUIDField(null=True) class Meta: database = db @@ -207,6 +210,7 @@ class CachedEntity(Model): validated = BooleanField(default=False) metas = JSONField(null=True) worker_version_id = UUIDField(null=True) + worker_run_id = UUIDField(null=True) class Meta: database = db @@ -225,6 +229,7 @@ class CachedTranscriptionEntity(Model): offset = IntegerField(constraints=[Check("offset >= 0")]) length = IntegerField(constraints=[Check("length > 0")]) worker_version_id = UUIDField(null=True) + worker_run_id = UUIDField(null=True) confidence = FloatField(null=True) class Meta: diff --git a/arkindex_worker/worker/classification.py b/arkindex_worker/worker/classification.py index 53229776..4d9c0b7f 100644 --- a/arkindex_worker/worker/classification.py +++ b/arkindex_worker/worker/classification.py @@ -105,6 +105,7 @@ class ClassificationMixin(object): "element": str(element.id), "ml_class": self.get_ml_class_id(ml_class), "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "confidence": confidence, "high_confidence": high_confidence, }, @@ -121,6 +122,7 @@ class ClassificationMixin(object): "confidence": created["confidence"], "state": created["state"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } ] CachedClassification.insert_many(to_insert).execute() @@ -130,15 +132,23 @@ class ClassificationMixin(object): ) except ErrorResponse as e: # Detect already existing classification - if ( - e.status_code == 400 - and "non_field_errors" in e.content - and "The fields element, worker_version, ml_class must make a unique set." - in e.content["non_field_errors"] - ): - logger.warning( - f"This worker version has already set {ml_class} on element {element.id}" - ) + if e.status_code == 400 and "non_field_errors" in e.content: + if ( + "The fields element, worker_version, ml_class must make a unique set." + in e.content["non_field_errors"] + ): + logger.warning( + f"This worker version has already set {ml_class} on element {element.id}" + ) + elif ( + "The fields element, worker_run, ml_class must make a unique set." + in e.content["non_field_errors"] + ): + logger.warning( + f"This worker run has already set {ml_class} on element {element.id}" + ) + else: + raise return # Propagate any other API error @@ -202,6 +212,7 @@ class ClassificationMixin(object): body={ "parent": str(element.id), "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "classifications": classifications, }, )["classifications"] @@ -220,6 +231,7 @@ class ClassificationMixin(object): "confidence": created_cl["confidence"], "state": created_cl["state"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } for created_cl in created_cls ] diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py index 0a30e8da..4caac206 100644 --- a/arkindex_worker/worker/element.py +++ b/arkindex_worker/worker/element.py @@ -139,6 +139,7 @@ class ElementMixin(object): "polygon": polygon, "parent": element.id, "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "confidence": confidence, }, ) @@ -234,6 +235,7 @@ class ElementMixin(object): id=parent.id, body={ "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "elements": elements, }, ) @@ -266,6 +268,7 @@ class ElementMixin(object): "image_id": image_id, "polygon": element["polygon"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, "confidence": element.get("confidence"), } for idx, element in enumerate(elements) diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index 0ccc7ddc..aa2ab829 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -71,6 +71,7 @@ class EntityMixin(object): "validated": validated, "corpus": self.corpus_id, "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, }, ) self.report.add_entity(element.id, entity["id"], type.value, name) @@ -141,6 +142,7 @@ class EntityMixin(object): "length": length, "offset": offset, "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } if confidence is not None: body["confidence"] = confidence @@ -161,6 +163,7 @@ class EntityMixin(object): offset=offset, length=length, worker_version_id=self.worker_version_id, + worker_run_id=self.worker_run_id, confidence=confidence, ) except IntegrityError as e: diff --git a/arkindex_worker/worker/metadata.py b/arkindex_worker/worker/metadata.py index 635ed629..8d0e8f5f 100644 --- a/arkindex_worker/worker/metadata.py +++ b/arkindex_worker/worker/metadata.py @@ -102,6 +102,7 @@ class MetaDataMixin(object): "value": value, "entity_id": entity, "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, }, ) self.report.add_metadata(element.id, metadata["id"], type.value, name) diff --git a/arkindex_worker/worker/transcription.py b/arkindex_worker/worker/transcription.py index 77f9287b..e454e20d 100644 --- a/arkindex_worker/worker/transcription.py +++ b/arkindex_worker/worker/transcription.py @@ -83,6 +83,7 @@ class TranscriptionMixin(object): body={ "text": text, "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "confidence": confidence, "orientation": orientation.value, }, @@ -101,6 +102,7 @@ class TranscriptionMixin(object): "confidence": created["confidence"], "orientation": created["orientation"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } ] CachedTranscription.insert_many(to_insert).execute() @@ -171,6 +173,7 @@ class TranscriptionMixin(object): "CreateTranscriptions", body={ "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "transcriptions": transcriptions_payload, }, )["transcriptions"] @@ -189,6 +192,7 @@ class TranscriptionMixin(object): "confidence": created_tr["confidence"], "orientation": created_tr["orientation"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } for created_tr in created_trs ] @@ -284,6 +288,7 @@ class TranscriptionMixin(object): body={ "element_type": sub_element_type, "worker_version": self.worker_version_id, + "worker_run_id": self.worker_run_id, "transcriptions": transcriptions_payload, "return_elements": True, }, @@ -321,6 +326,7 @@ class TranscriptionMixin(object): "image_id": element.image_id, "polygon": transcription["polygon"], "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } ) @@ -336,6 +342,7 @@ class TranscriptionMixin(object): "orientation", TextOrientation.HorizontalLeftToRight ).value, "worker_version_id": self.worker_version_id, + "worker_run_id": self.worker_run_id, } ) diff --git a/tests/test_cache.py b/tests/test_cache.py index 6cdcd984..41dd82f1 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -58,12 +58,12 @@ def test_create_tables(tmp_path): init_cache_db(db_path) create_tables() - expected_schema = """CREATE TABLE "classifications" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "class_name" TEXT NOT NULL, "confidence" REAL NOT NULL, "state" VARCHAR(10) NOT NULL, "worker_version_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id")) -CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id")) -CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_version_id" TEXT) + expected_schema = """CREATE TABLE "classifications" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "class_name" TEXT NOT NULL, "confidence" REAL NOT NULL, "state" VARCHAR(10) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id")) +CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id")) +CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_version_id" TEXT, "worker_run_id" TEXT) CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL) -CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "entity_id" TEXT NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_version_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "entity_id"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"), FOREIGN KEY ("entity_id") REFERENCES "entities" ("id")) -CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL NOT NULL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))""" +CREATE TABLE "transcription_entities" ("transcription_id" TEXT NOT NULL, "entity_id" TEXT NOT NULL, "offset" INTEGER NOT NULL CHECK (offset >= 0), "length" INTEGER NOT NULL CHECK (length > 0), "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, PRIMARY KEY ("transcription_id", "entity_id"), FOREIGN KEY ("transcription_id") REFERENCES "transcriptions" ("id"), FOREIGN KEY ("entity_id") REFERENCES "entities" ("id")) +CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL NOT NULL, "orientation" VARCHAR(50) NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))""" actual_schema = "\n".join( [ diff --git a/tests/test_elements_worker/test_classifications.py b/tests/test_elements_worker/test_classifications.py index 32f9d876..5f2be88e 100644 --- a/tests/test_elements_worker/test_classifications.py +++ b/tests/test_elements_worker/test_classifications.py @@ -250,6 +250,7 @@ def test_create_classification_wrong_ml_class(mock_elements_worker, responses): "element": "12341234-1234-1234-1234-123412341234", "ml_class": "new-ml-class-1234", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "high_confidence": True, }, @@ -401,6 +402,7 @@ def test_create_classification(responses, mock_elements_worker): "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "high_confidence": True, } @@ -426,6 +428,7 @@ def test_create_classification_with_cache(responses, mock_elements_worker_with_c "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "high_confidence": True, "state": "pending", @@ -450,6 +453,7 @@ def test_create_classification_with_cache(responses, mock_elements_worker_with_c "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "high_confidence": True, } @@ -468,11 +472,14 @@ def test_create_classification_with_cache(responses, mock_elements_worker_with_c confidence=0.42, state="pending", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) ] -def test_create_classification_duplicate(responses, mock_elements_worker): +def test_create_classification_duplicate_worker_version( + responses, mock_elements_worker +): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } @@ -506,6 +513,50 @@ def test_create_classification_duplicate(responses, mock_elements_worker): "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", + "confidence": 0.42, + "high_confidence": True, + } + + # Classification has NOT been created + assert mock_elements_worker.report.report_data["elements"] == {} + + +def test_create_classification_duplicate_worker_run(responses, mock_elements_worker): + mock_elements_worker.classes = { + "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} + } + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + "http://testserver/api/v1/classifications/", + status=400, + json={ + "non_field_errors": [ + "The fields element, worker_run, ml_class must make a unique set." + ] + }, + ) + + mock_elements_worker.create_classification( + element=elt, + ml_class="a_class", + confidence=0.42, + high_confidence=True, + ) + + assert len(responses.calls) == len(BASE_API_CALLS) + 1 + assert [ + (call.request.method, call.request.url) for call in responses.calls + ] == BASE_API_CALLS + [ + ("POST", "http://testserver/api/v1/classifications/"), + ] + + assert json.loads(responses.calls[-1].request.body) == { + "element": "12341234-1234-1234-1234-123412341234", + "ml_class": "0000", + "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "high_confidence": True, } @@ -827,6 +878,7 @@ def test_create_classifications(responses, mock_elements_worker_with_cache): json={ "parent": str(elt.id), "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "classifications": [ { "id": "00000000-0000-0000-0000-000000000000", @@ -860,6 +912,7 @@ def test_create_classifications(responses, mock_elements_worker_with_cache): assert json.loads(responses.calls[-1].request.body) == { "parent": str(elt.id), "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "classifications": classes, } @@ -872,6 +925,7 @@ def test_create_classifications(responses, mock_elements_worker_with_cache): confidence=0.75, state="pending", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), CachedClassification( id=UUID("11111111-1111-1111-1111-111111111111"), @@ -880,5 +934,6 @@ def test_create_classifications(responses, mock_elements_worker_with_cache): confidence=0.25, state="pending", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), ] diff --git a/tests/test_elements_worker/test_elements.py b/tests/test_elements_worker/test_elements.py index 46a60630..2d894feb 100644 --- a/tests/test_elements_worker/test_elements.py +++ b/tests/test_elements_worker/test_elements.py @@ -574,6 +574,7 @@ def test_create_sub_element(responses, mock_elements_worker, slim_output): "polygon": [[1, 1], [2, 2], [2, 1], [1, 2]], "parent": "12341234-1234-1234-1234-123412341234", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": None, } if slim_output: @@ -619,6 +620,7 @@ def test_create_sub_element_confidence(responses, mock_elements_worker): "polygon": [[1, 1], [2, 2], [2, 1], [1, 2]], "parent": "12341234-1234-1234-1234-123412341234", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, } assert sub_element_id == "12345678-1234-1234-1234-123456789123" @@ -994,6 +996,7 @@ def test_create_elements_cached_element(responses, mock_elements_worker_with_cac } ], "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert created_ids == [{"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"}] @@ -1007,6 +1010,7 @@ def test_create_elements_cached_element(responses, mock_elements_worker_with_cac image_id="c0fec0fe-c0fe-c0fe-c0fe-c0fec0fec0fe", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), ] @@ -1061,6 +1065,7 @@ def test_create_elements(responses, mock_elements_worker_with_cache, tmp_path): } ], "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert created_ids == [{"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"}] @@ -1075,6 +1080,7 @@ def test_create_elements(responses, mock_elements_worker_with_cache, tmp_path): image_id="c0fec0fe-c0fe-c0fe-c0fe-c0fec0fec0fe", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), confidence=None, ) ] @@ -1134,6 +1140,7 @@ def test_create_elements_confidence( } ], "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert created_ids == [{"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"}] @@ -1148,6 +1155,7 @@ def test_create_elements_confidence( image_id="c0fec0fe-c0fe-c0fe-c0fe-c0fec0fec0fe", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), confidence=0.42, ) ] @@ -1392,6 +1400,7 @@ def test_list_element_children(responses, mock_elements_worker): "best_classes": None, "has_children": None, "worker_version_id": None, + "worker_run_id": None, }, { "id": "1111", @@ -1403,6 +1412,7 @@ def test_list_element_children(responses, mock_elements_worker): "best_classes": None, "has_children": None, "worker_version_id": None, + "worker_run_id": None, }, { "id": "2222", @@ -1414,6 +1424,7 @@ def test_list_element_children(responses, mock_elements_worker): "best_classes": None, "has_children": None, "worker_version_id": None, + "worker_run_id": None, }, ] responses.add( @@ -1456,6 +1467,7 @@ def test_list_element_children_manual_worker_version(responses, mock_elements_wo "best_classes": None, "has_children": None, "worker_version_id": None, + "worker_run_id": None, } ] responses.add( diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index 0df2fe4a..82012dcb 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -187,6 +187,7 @@ def test_create_entity(responses, mock_elements_worker): "validated": None, "corpus": "11111111-1111-1111-1111-111111111111", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert entity_id == "12345678-1234-1234-1234-123456789123" @@ -220,6 +221,7 @@ def test_create_entity_with_cache(responses, mock_elements_worker_with_cache): "validated": None, "corpus": "11111111-1111-1111-1111-111111111111", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert entity_id == "12345678-1234-1234-1234-123456789123" @@ -232,6 +234,7 @@ def test_create_entity_with_cache(responses, mock_elements_worker_with_cache): validated=False, metas={}, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) ] @@ -419,6 +422,7 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work "offset": 5, "length": 10, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } @@ -457,6 +461,7 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo "offset": 5, "length": 10, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.33, } @@ -496,6 +501,7 @@ def test_create_transcription_entity_confidence_none(responses, mock_elements_wo "offset": 5, "length": 10, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } @@ -513,12 +519,14 @@ def test_create_transcription_entity_with_cache( confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) CachedEntity.create( id=UUID("11111111-1111-1111-1111-111111111111"), type="person", name="Bob Bob", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) responses.add( @@ -553,6 +561,7 @@ def test_create_transcription_entity_with_cache( "offset": 5, "length": 10, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } # Check that created transcription entity was properly stored in SQLite cache @@ -563,6 +572,7 @@ def test_create_transcription_entity_with_cache( offset=5, length=10, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) ] @@ -581,12 +591,14 @@ def test_create_transcription_entity_with_confidence_with_cache( confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) CachedEntity.create( id=UUID("11111111-1111-1111-1111-111111111111"), type="person", name="Bob Bob", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) responses.add( @@ -623,6 +635,7 @@ def test_create_transcription_entity_with_confidence_with_cache( "offset": 5, "length": 10, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.77, } @@ -634,6 +647,7 @@ def test_create_transcription_entity_with_confidence_with_cache( offset=5, length=10, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), confidence=0.77, ) ] diff --git a/tests/test_elements_worker/test_metadata.py b/tests/test_elements_worker/test_metadata.py index feb19c6d..40e2b423 100644 --- a/tests/test_elements_worker/test_metadata.py +++ b/tests/test_elements_worker/test_metadata.py @@ -194,6 +194,7 @@ def test_create_metadata(responses, mock_elements_worker): "value": "La Turbine, Grenoble 38000", "entity_id": None, "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", } assert metadata_id == "12345678-1234-1234-1234-123456789123" @@ -221,8 +222,8 @@ def test_create_metadatas(responses, mock_elements_worker, metadatas): "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/metadata/bulk/", status=201, json={ - "worker_version": mock_elements_worker.worker_version_id, - "worker_run_id": mock_elements_worker.worker_run_id, + "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "metadata_list": [ { "id": "fake_metadata_id", diff --git a/tests/test_elements_worker/test_transcriptions.py b/tests/test_elements_worker/test_transcriptions.py index 0411db22..655df6e4 100644 --- a/tests/test_elements_worker/test_transcriptions.py +++ b/tests/test_elements_worker/test_transcriptions.py @@ -134,6 +134,7 @@ def test_create_transcription_default_orientation(responses, mock_elements_worke "text": "Animula vagula blandula", "confidence": 0.42, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", }, ) mock_elements_worker.create_transcription( @@ -144,6 +145,7 @@ def test_create_transcription_default_orientation(responses, mock_elements_worke assert json.loads(responses.calls[-1].request.body) == { "text": "Animula vagula blandula", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "orientation": "horizontal-lr", } @@ -160,6 +162,7 @@ def test_create_transcription_orientation(responses, mock_elements_worker): "text": "Animula vagula blandula", "confidence": 0.42, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", }, ) mock_elements_worker.create_transcription( @@ -171,6 +174,7 @@ def test_create_transcription_orientation(responses, mock_elements_worker): assert json.loads(responses.calls[-1].request.body) == { "text": "Animula vagula blandula", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "orientation": "vertical-lr", } @@ -230,6 +234,7 @@ def test_create_transcription(responses, mock_elements_worker): "text": "i am a line", "confidence": 0.42, "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", }, ) @@ -249,6 +254,7 @@ def test_create_transcription(responses, mock_elements_worker): assert json.loads(responses.calls[-1].request.body) == { "text": "i am a line", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "confidence": 0.42, "orientation": "horizontal-lr", } @@ -267,6 +273,7 @@ def test_create_transcription_with_cache(responses, mock_elements_worker_with_ca "confidence": 0.42, "orientation": "horizontal-lr", "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", }, ) @@ -286,6 +293,7 @@ def test_create_transcription_with_cache(responses, mock_elements_worker_with_ca assert json.loads(responses.calls[-1].request.body) == { "text": "i am a line", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "orientation": "horizontal-lr", "confidence": 0.42, } @@ -299,6 +307,7 @@ def test_create_transcription_with_cache(responses, mock_elements_worker_with_ca confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) ] @@ -317,6 +326,7 @@ def test_create_transcription_orientation_with_cache( "confidence": 0.42, "orientation": "vertical-lr", "worker_version_id": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", }, ) mock_elements_worker_with_cache.create_transcription( @@ -328,6 +338,7 @@ def test_create_transcription_orientation_with_cache( assert json.loads(responses.calls[-1].request.body) == { "text": "Animula vagula blandula", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "orientation": "vertical-lr", "confidence": 0.42, } @@ -345,12 +356,14 @@ def test_create_transcription_orientation_with_cache( "mirrored": False, "initial": False, "worker_version_id": None, + "worker_run_id": None, "confidence": None, }, "text": "Animula vagula blandula", "confidence": 0.42, "orientation": TextOrientation.VerticalLeftToRight.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), } ] @@ -663,6 +676,7 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache): status=200, json={ "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "id": "00000000-0000-0000-0000-000000000000", @@ -695,6 +709,7 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache): assert json.loads(responses.calls[-1].request.body) == { "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "element_id": "11111111-1111-1111-1111-111111111111", @@ -720,6 +735,7 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache): confidence=0.75, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), CachedTranscription( id=UUID("11111111-1111-1111-1111-111111111111"), @@ -728,6 +744,7 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache): confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), ] @@ -755,6 +772,7 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ status=200, json={ "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "id": "00000000-0000-0000-0000-000000000000", @@ -780,6 +798,7 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ assert json.loads(responses.calls[-1].request.body) == { "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "element_id": "11111111-1111-1111-1111-111111111111", @@ -810,12 +829,14 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ "mirrored": False, "initial": False, "worker_version_id": None, + "worker_run_id": None, "confidence": None, }, "text": "Animula vagula blandula", "confidence": 0.12, "orientation": TextOrientation.HorizontalRightToLeft.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), }, { "id": UUID("11111111-1111-1111-1111-111111111111"), @@ -829,12 +850,14 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ "mirrored": False, "initial": False, "worker_version_id": None, + "worker_run_id": None, "confidence": None, }, "text": "Hospes comesque corporis", "confidence": 0.21, "orientation": TextOrientation.VerticalLeftToRight.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), }, ] @@ -1307,6 +1330,7 @@ def test_create_element_transcriptions(responses, mock_elements_worker): assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], @@ -1392,6 +1416,7 @@ def test_create_element_transcriptions_with_cache( assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], @@ -1440,6 +1465,7 @@ def test_create_element_transcriptions_with_cache( type="page", polygon="[[100, 150], [700, 150], [700, 200], [100, 200]]", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), CachedElement( id=UUID("22222222-2222-2222-2222-222222222222"), @@ -1447,6 +1473,7 @@ def test_create_element_transcriptions_with_cache( type="page", polygon="[[0, 0], [2000, 0], [2000, 3000], [0, 3000]]", worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), ] assert list(CachedTranscription.select()) == [ @@ -1457,6 +1484,7 @@ def test_create_element_transcriptions_with_cache( confidence=0.5, orientation=TextOrientation.HorizontalLeftToRight.value, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), CachedTranscription( id=UUID("67896789-6789-6789-6789-678967896789"), @@ -1465,6 +1493,7 @@ def test_create_element_transcriptions_with_cache( confidence=0.75, orientation=TextOrientation.HorizontalLeftToRight.value, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), CachedTranscription( id=UUID("78907890-7890-7890-7890-789078907890"), @@ -1473,6 +1502,7 @@ def test_create_element_transcriptions_with_cache( confidence=0.9, orientation=TextOrientation.HorizontalLeftToRight.value, worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ), ] @@ -1534,6 +1564,7 @@ def test_create_transcriptions_orientation_with_cache( assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "transcriptions": [ { "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], @@ -1588,12 +1619,14 @@ def test_create_transcriptions_orientation_with_cache( "mirrored": False, "initial": False, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), "confidence": None, }, "text": "Animula vagula blandula", "confidence": 0.5, "orientation": TextOrientation.HorizontalLeftToRight.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), }, { "id": UUID("67896789-6789-6789-6789-678967896789"), @@ -1607,12 +1640,14 @@ def test_create_transcriptions_orientation_with_cache( "mirrored": False, "initial": False, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), "confidence": None, }, "text": "Hospes comesque corporis", "confidence": 0.75, "orientation": TextOrientation.VerticalLeftToRight.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), }, { "id": UUID("78907890-7890-7890-7890-789078907890"), @@ -1626,12 +1661,14 @@ def test_create_transcriptions_orientation_with_cache( "mirrored": False, "initial": False, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), "confidence": None, }, "text": "Quae nunc abibis in loca", "confidence": 0.9, "orientation": TextOrientation.HorizontalRightToLeft.value, "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"), }, ] -- GitLab