From 82c14adaec57dd1031603f2da0b3b670162348af Mon Sep 17 00:00:00 2001 From: Chaza Abdelwahab <abdelwahab@teklia.com> Date: Wed, 28 Sep 2022 08:38:31 +0000 Subject: [PATCH] Report Transcription Entity creation --- arkindex_worker/reporting.py | 27 +++++ arkindex_worker/worker/entity.py | 14 +-- tests/test_elements_worker/test_entities.py | 107 ++++++++++++++++---- tests/test_reporting.py | 38 +++++++ 4 files changed, 162 insertions(+), 24 deletions(-) diff --git a/arkindex_worker/reporting.py b/arkindex_worker/reporting.py index d20c8fae..b9ee03ef 100644 --- a/arkindex_worker/reporting.py +++ b/arkindex_worker/reporting.py @@ -14,6 +14,7 @@ from uuid import UUID from apistar.exceptions import ErrorResponse from arkindex_worker import logger +from arkindex_worker.models import Transcription class Reporter(object): @@ -53,6 +54,8 @@ class Reporter(object): "classifications": {}, # Created entities ({"id": "", "type": "", "name": ""}) from this element "entities": [], + # Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element + "transcription_entities": [], # Created metadata ({"id": "", "type": "", "name": ""}) from this element "metadata": [], "errors": [], @@ -141,6 +144,30 @@ class Reporter(object): entities = self._get_element(element_id)["entities"] entities.append({"id": entity_id, "type": type, "name": name}) + def add_transcription_entity( + self, + entity_id: Union[str, UUID], + transcription: Transcription, + transcription_entity_id: Union[str, UUID], + ): + """ + Report creating a transcription entity on an element. + + :param entity_id: ID of the entity element. + :param transcription: Transcription to add the entity on + :param transcription_entity_id: ID of the transcription entity that is created. + """ + transcription_entities = self._get_element(transcription.element.id)[ + "transcription_entities" + ] + transcription_entities.append( + { + "transcription_id": transcription.id, + "entity_id": entity_id, + "transcription_entity_id": transcription_entity_id, + } + ) + def add_entity_link(self, *args, **kwargs): """ Report creating an entity link. Not currently supported. diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index 48e60262..5be7824e 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -96,7 +96,7 @@ class EntityMixin(object): def create_transcription_entity( self, - transcription: str, + transcription: Transcription, entity: str, offset: int, length: int, @@ -106,7 +106,7 @@ class EntityMixin(object): Create a link between an existing entity and an existing transcription. If cache support is enabled, a `CachedTranscriptionEntity` will also be created. - :param transcription: UUID of the existing transcription. + :param transcription: Transcription to create the entity on. :param entity: UUID of the existing entity. :param offset: Starting position of the entity in the transcription's text, as a 0-based index. @@ -116,8 +116,8 @@ class EntityMixin(object): or None if the worker is in read-only mode. """ assert transcription and isinstance( - transcription, str - ), "transcription shouldn't be null and should be of type str" + transcription, Transcription + ), "transcription shouldn't be null and should be a Transcription" assert entity and isinstance( entity, str ), "entity shouldn't be null and should be of type str" @@ -147,16 +147,16 @@ class EntityMixin(object): transcription_ent = self.request( "CreateTranscriptionEntity", - id=transcription, + id=transcription.id, body=body, ) - # TODO: Report transcription entity creation + self.report.add_transcription_entity(entity, transcription, transcription_ent) if self.use_cache: # Store transcription entity in local cache try: CachedTranscriptionEntity.create( - transcription=transcription, + transcription=transcription.id, entity=entity, offset=offset, length=length, diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index b0cba5f1..4d9e4459 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -244,7 +244,9 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker): offset=5, length=10, ) - assert str(e.value) == "transcription shouldn't be null and should be of type str" + assert ( + str(e.value) == "transcription shouldn't be null and should be a Transcription" + ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( @@ -253,13 +255,20 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker): offset=5, length=10, ) - assert str(e.value) == "transcription shouldn't be null and should be of type str" + assert ( + str(e.value) == "transcription shouldn't be null and should be a Transcription" + ) def test_create_transcription_entity_wrong_entity(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity=None, offset=5, length=10, @@ -268,7 +277,12 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity=1234, offset=5, length=10, @@ -279,7 +293,12 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker): def test_create_transcription_entity_wrong_offset(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=None, length=10, @@ -288,7 +307,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset="not an int", length=10, @@ -297,7 +321,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=-1, length=10, @@ -308,7 +337,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker): def test_create_transcription_entity_wrong_length(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=None, @@ -320,7 +354,12 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length="not an int", @@ -332,7 +371,12 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=0, @@ -352,7 +396,12 @@ def test_create_transcription_entity_api_error(responses, mock_elements_worker): with pytest.raises(ErrorResponse): mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, @@ -399,7 +448,12 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work ) mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, @@ -436,7 +490,12 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo ) mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, @@ -475,7 +534,12 @@ def test_create_transcription_entity_confidence_none(responses, mock_elements_wo ) mock_elements_worker.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, @@ -533,7 +597,12 @@ def test_create_transcription_entity_with_cache( ) mock_elements_worker_with_cache.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, @@ -554,7 +623,6 @@ def test_create_transcription_entity_with_cache( "length": 10, "worker_run_id": "56785678-5678-5678-5678-567856785678", } - # Check that created transcription entity was properly stored in SQLite cache assert list(CachedTranscriptionEntity.select()) == [ CachedTranscriptionEntity( @@ -602,7 +670,12 @@ def test_create_transcription_entity_with_confidence_with_cache( ) mock_elements_worker_with_cache.create_transcription_entity( - transcription="11111111-1111-1111-1111-111111111111", + transcription=Transcription( + { + "id": "11111111-1111-1111-1111-111111111111", + "element": {"id": "myelement"}, + } + ), entity="11111111-1111-1111-1111-111111111111", offset=5, length=10, diff --git a/tests/test_reporting.py b/tests/test_reporting.py index 11e45c52..1ef13e85 100644 --- a/tests/test_reporting.py +++ b/tests/test_reporting.py @@ -7,6 +7,7 @@ from tempfile import NamedTemporaryFile import pytest from apistar.exceptions import ErrorResponse +from arkindex_worker.models import Transcription from arkindex_worker.reporting import Reporter @@ -35,6 +36,7 @@ def test_process(): "transcriptions": 0, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -51,6 +53,7 @@ def test_add_element(): "transcriptions": 0, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -70,6 +73,7 @@ def test_add_element_count(): "transcriptions": 0, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -86,6 +90,7 @@ def test_add_classification(): "transcriptions": 0, "classifications": {"three": 1}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -116,6 +121,7 @@ def test_add_classifications(): "transcriptions": 0, "classifications": {"three": 3, "two": 2}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -132,6 +138,7 @@ def test_add_transcription(): "transcriptions": 1, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -151,6 +158,7 @@ def test_add_transcription_count(): "transcriptions": 1337, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [], "errors": [], } @@ -175,6 +183,34 @@ def test_add_entity(): "name": "Bob Bob", } ], + "transcription_entities": [], + "metadata": [], + "errors": [], + } + + +def test_add_transcription_entity(): + reporter = Reporter("worker") + reporter.add_transcription_entity( + "5678", + Transcription({"id": "1234-5678", "element": {"id": "myelement"}}), + "1234", + ) + assert "myelement" in reporter.report_data["elements"] + element_data = reporter.report_data["elements"]["myelement"] + del element_data["started"] + assert element_data == { + "elements": {}, + "transcriptions": 0, + "classifications": {}, + "entities": [], + "transcription_entities": [ + { + "transcription_id": "1234-5678", + "entity_id": "5678", + "transcription_entity_id": "1234", + } + ], "metadata": [], "errors": [], } @@ -193,6 +229,7 @@ def test_add_metadata(): "transcriptions": 0, "classifications": {}, "entities": [], + "transcription_entities": [], "metadata": [ { "id": "12341234-1234-1234-1234-123412341234", @@ -246,6 +283,7 @@ def test_reporter_save(mocker): "classifications": {}, "elements": {"text_line": 4}, "entities": [], + "transcription_entities": [], "errors": [], "metadata": [], "started": "2000-01-01T00:00:00", -- GitLab