From 95c31fab8c0919bdccd8327be9d29abdd3f0749b Mon Sep 17 00:00:00 2001 From: Eva Bardou <ebardou@teklia.com> Date: Tue, 25 Aug 2020 11:47:36 +0000 Subject: [PATCH] Add a helper to reach CreateElementTranscriptions API endpoint --- arkindex_worker/worker.py | 65 +++++ tests/test_elements_worker.py | 519 ++++++++++++++++++++++++++++++++++ 2 files changed, 584 insertions(+) diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 3fc01b31..8f10b07e 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -321,3 +321,68 @@ class ElementsWorker(BaseWorker): self.report.add_entity(element.id, entity["id"], type.value, name) return entity["id"] + + def create_element_transcriptions( + self, element, sub_element_type, transcription_type, transcriptions + ): + """ + Create multiple sub elements with their transcriptions on the given element through API + """ + assert element and isinstance( + element, Element + ), "element shouldn't be null and should be of type Element" + assert sub_element_type and isinstance( + sub_element_type, str + ), "sub_element_type shouldn't be null and should be of type str" + assert transcription_type and isinstance( + transcription_type, TranscriptionType + ), "transcription_type shouldn't be null and should be of type TranscriptionType" + assert transcriptions and isinstance( + transcriptions, list + ), "transcriptions shouldn't be null and should be of type list" + + for index, transcription in enumerate(transcriptions): + text = transcription.get("text") + assert text and isinstance( + text, str + ), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str" + + score = transcription.get("score") + assert ( + score and isinstance(score, float) and 0 <= score <= 1 + ), f"Transcription at index {index} in transcriptions: score shouldn't be null and should be a float in [0..1] range" + + polygon = transcription.get("polygon") + assert polygon and isinstance( + polygon, list + ), f"Transcription at index {index} in transcriptions: polygon shouldn't be null and should be of type list" + assert ( + len(polygon) >= 3 + ), f"Transcription at index {index} in transcriptions: polygon should have at least three points" + assert all( + isinstance(point, list) and len(point) == 2 for point in polygon + ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two items" + assert all( + isinstance(coord, (int, float)) for point in polygon for coord in point + ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers" + + annotations = self.api_client.request( + "CreateElementTranscriptions", + id=element.id, + body={ + "element_type": sub_element_type, + "transcription_type": transcription_type.value, + "worker_version": self.worker_version_id, + "transcriptions": transcriptions, + "return_elements": True, + }, + ) + for annotation in annotations: + if annotation["created"]: + logger.debug( + f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation" + ) + self.report.add_element(element.id, sub_element_type) + self.report.add_transcription(annotation["id"], transcription_type.value) + + return annotations diff --git a/tests/test_elements_worker.py b/tests/test_elements_worker.py index 42fccaea..22d1aaf5 100644 --- a/tests/test_elements_worker.py +++ b/tests/test_elements_worker.py @@ -12,6 +12,24 @@ from apistar.exceptions import ErrorResponse from arkindex_worker.models import Element from arkindex_worker.worker import ElementsWorker, EntityType, TranscriptionType +TRANSCRIPTIONS_SAMPLE = [ + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 0.5, + "text": "The", + }, + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "first", + }, + { + "polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]], + "score": 0.9, + "text": "line", + }, +] + def test_cli_default(monkeypatch): _, path = tempfile.mkstemp() @@ -877,3 +895,504 @@ def test_create_entity(responses): "worker_version": "12341234-1234-1234-1234-123412341234", } assert entity_id == "12345678-1234-1234-1234-123456789123" + + +def test_create_element_transcriptions_wrong_element(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=None, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element="not element type", + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + +def test_create_element_transcriptions_wrong_sub_element_type(): + worker = ElementsWorker() + elt = Element({"zone": None}) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type=None, + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert ( + str(e.value) == "sub_element_type shouldn't be null and should be of type str" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type=1234, + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert ( + str(e.value) == "sub_element_type shouldn't be null and should be of type str" + ) + + +def test_create_element_transcriptions_wrong_transcription_type(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=None, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert ( + str(e.value) + == "transcription_type shouldn't be null and should be of type TranscriptionType" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=1234, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert ( + str(e.value) + == "transcription_type shouldn't be null and should be of type TranscriptionType" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type="not_a_transcription_type", + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + assert ( + str(e.value) + == "transcription_type shouldn't be null and should be of type TranscriptionType" + ) + + +def test_create_element_transcriptions_wrong_transcriptions(): + worker = ElementsWorker() + elt = Element({"zone": None}) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=None, + ) + assert str(e.value) == "transcriptions shouldn't be null and should be of type list" + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=1234, + ) + assert str(e.value) == "transcriptions shouldn't be null and should be of type list" + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 0.5, + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 0.5, + "text": None, + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 0.5, + "text": 1234, + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": None, + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": "a wrong score", + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 0, + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "score": 2.00, + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + {"score": 0.5, "text": "word"}, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon shouldn't be null and should be of type list" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + {"polygon": None, "score": 0.5, "text": "word"}, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon shouldn't be null and should be of type list" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + {"polygon": "not a polygon", "score": 0.5, "text": "word"}, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon shouldn't be null and should be of type list" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + {"polygon": [[1, 1], [2, 2]], "score": 0.5, "text": "word"}, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon should have at least three points" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [[1, 1, 1], [2, 2, 1], [2, 1, 1], [1, 2, 1]], + "score": 0.5, + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon points should be lists of two items" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + {"polygon": [[1], [2], [2], [1]], "score": 0.5, "text": "word"}, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon points should be lists of two items" + ) + + with pytest.raises(AssertionError) as e: + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=[ + { + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "score": 0.75, + "text": "The", + }, + { + "polygon": [["not a coord", 1], [2, 2], [2, 1], [1, 2]], + "score": 0.5, + "text": "word", + }, + ], + ) + assert ( + str(e.value) + == "Transcription at index 1 in transcriptions: polygon points should be lists of two numbers" + ) + + +def test_create_element_transcriptions_api_error(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcriptions/bulk/", + status=500, + ) + + with pytest.raises(ErrorResponse): + worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcriptions/bulk/" + ) + + +def test_create_element_transcriptions(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcriptions/bulk/", + status=200, + json=[ + {"id": "word1_1_1", "created": False}, + {"id": "word1_1_2", "created": False}, + {"id": "word1_1_3", "created": False}, + ], + ) + + annotations = worker.create_element_transcriptions( + element=elt, + sub_element_type="page", + transcription_type=TranscriptionType.Word, + transcriptions=TRANSCRIPTIONS_SAMPLE, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcriptions/bulk/" + ) + assert json.loads(responses.calls[0].request.body) == { + "element_type": "page", + "transcription_type": "word", + "worker_version": "12341234-1234-1234-1234-123412341234", + "transcriptions": TRANSCRIPTIONS_SAMPLE, + "return_elements": True, + } + assert annotations == [ + {"id": "word1_1_1", "created": False}, + {"id": "word1_1_2", "created": False}, + {"id": "word1_1_3", "created": False}, + ] -- GitLab