From 82c14adaec57dd1031603f2da0b3b670162348af Mon Sep 17 00:00:00 2001
From: Chaza Abdelwahab <abdelwahab@teklia.com>
Date: Wed, 28 Sep 2022 08:38:31 +0000
Subject: [PATCH] Report Transcription Entity creation

---
 arkindex_worker/reporting.py                |  27 +++++
 arkindex_worker/worker/entity.py            |  14 +--
 tests/test_elements_worker/test_entities.py | 107 ++++++++++++++++----
 tests/test_reporting.py                     |  38 +++++++
 4 files changed, 162 insertions(+), 24 deletions(-)

diff --git a/arkindex_worker/reporting.py b/arkindex_worker/reporting.py
index d20c8fae..b9ee03ef 100644
--- a/arkindex_worker/reporting.py
+++ b/arkindex_worker/reporting.py
@@ -14,6 +14,7 @@ from uuid import UUID
 from apistar.exceptions import ErrorResponse
 
 from arkindex_worker import logger
+from arkindex_worker.models import Transcription
 
 
 class Reporter(object):
@@ -53,6 +54,8 @@ class Reporter(object):
                 "classifications": {},
                 # Created entities ({"id": "", "type": "", "name": ""}) from this element
                 "entities": [],
+                # Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element
+                "transcription_entities": [],
                 # Created metadata ({"id": "", "type": "", "name": ""}) from this element
                 "metadata": [],
                 "errors": [],
@@ -141,6 +144,30 @@ class Reporter(object):
         entities = self._get_element(element_id)["entities"]
         entities.append({"id": entity_id, "type": type, "name": name})
 
+    def add_transcription_entity(
+        self,
+        entity_id: Union[str, UUID],
+        transcription: Transcription,
+        transcription_entity_id: Union[str, UUID],
+    ):
+        """
+        Report creating a transcription entity on an element.
+
+        :param entity_id: ID of the entity element.
+        :param transcription: Transcription to add the entity on
+        :param transcription_entity_id: ID of the transcription entity that is created.
+        """
+        transcription_entities = self._get_element(transcription.element.id)[
+            "transcription_entities"
+        ]
+        transcription_entities.append(
+            {
+                "transcription_id": transcription.id,
+                "entity_id": entity_id,
+                "transcription_entity_id": transcription_entity_id,
+            }
+        )
+
     def add_entity_link(self, *args, **kwargs):
         """
         Report creating an entity link. Not currently supported.
diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py
index 48e60262..5be7824e 100644
--- a/arkindex_worker/worker/entity.py
+++ b/arkindex_worker/worker/entity.py
@@ -96,7 +96,7 @@ class EntityMixin(object):
 
     def create_transcription_entity(
         self,
-        transcription: str,
+        transcription: Transcription,
         entity: str,
         offset: int,
         length: int,
@@ -106,7 +106,7 @@ class EntityMixin(object):
         Create a link between an existing entity and an existing transcription.
         If cache support is enabled, a `CachedTranscriptionEntity` will also be created.
 
-        :param transcription: UUID of the existing transcription.
+        :param transcription: Transcription to create the entity on.
         :param entity: UUID of the existing entity.
         :param offset: Starting position of the entity in the transcription's text,
            as a 0-based index.
@@ -116,8 +116,8 @@ class EntityMixin(object):
            or None if the worker is in read-only mode.
         """
         assert transcription and isinstance(
-            transcription, str
-        ), "transcription shouldn't be null and should be of type str"
+            transcription, Transcription
+        ), "transcription shouldn't be null and should be a Transcription"
         assert entity and isinstance(
             entity, str
         ), "entity shouldn't be null and should be of type str"
@@ -147,16 +147,16 @@ class EntityMixin(object):
 
         transcription_ent = self.request(
             "CreateTranscriptionEntity",
-            id=transcription,
+            id=transcription.id,
             body=body,
         )
-        # TODO: Report transcription entity creation
+        self.report.add_transcription_entity(entity, transcription, transcription_ent)
 
         if self.use_cache:
             # Store transcription entity in local cache
             try:
                 CachedTranscriptionEntity.create(
-                    transcription=transcription,
+                    transcription=transcription.id,
                     entity=entity,
                     offset=offset,
                     length=length,
diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py
index b0cba5f1..4d9e4459 100644
--- a/tests/test_elements_worker/test_entities.py
+++ b/tests/test_elements_worker/test_entities.py
@@ -244,7 +244,9 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker):
             offset=5,
             length=10,
         )
-    assert str(e.value) == "transcription shouldn't be null and should be of type str"
+    assert (
+        str(e.value) == "transcription shouldn't be null and should be a Transcription"
+    )
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
@@ -253,13 +255,20 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker):
             offset=5,
             length=10,
         )
-    assert str(e.value) == "transcription shouldn't be null and should be of type str"
+    assert (
+        str(e.value) == "transcription shouldn't be null and should be a Transcription"
+    )
 
 
 def test_create_transcription_entity_wrong_entity(mock_elements_worker):
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity=None,
             offset=5,
             length=10,
@@ -268,7 +277,12 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker):
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity=1234,
             offset=5,
             length=10,
@@ -279,7 +293,12 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker):
 def test_create_transcription_entity_wrong_offset(mock_elements_worker):
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=None,
             length=10,
@@ -288,7 +307,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset="not an int",
             length=10,
@@ -297,7 +321,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=-1,
             length=10,
@@ -308,7 +337,12 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
 def test_create_transcription_entity_wrong_length(mock_elements_worker):
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=5,
             length=None,
@@ -320,7 +354,12 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=5,
             length="not an int",
@@ -332,7 +371,12 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
 
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=5,
             length=0,
@@ -352,7 +396,12 @@ def test_create_transcription_entity_api_error(responses, mock_elements_worker):
 
     with pytest.raises(ErrorResponse):
         mock_elements_worker.create_transcription_entity(
-            transcription="11111111-1111-1111-1111-111111111111",
+            transcription=Transcription(
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "element": {"id": "myelement"},
+                }
+            ),
             entity="11111111-1111-1111-1111-111111111111",
             offset=5,
             length=10,
@@ -399,7 +448,12 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work
     )
 
     mock_elements_worker.create_transcription_entity(
-        transcription="11111111-1111-1111-1111-111111111111",
+        transcription=Transcription(
+            {
+                "id": "11111111-1111-1111-1111-111111111111",
+                "element": {"id": "myelement"},
+            }
+        ),
         entity="11111111-1111-1111-1111-111111111111",
         offset=5,
         length=10,
@@ -436,7 +490,12 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo
     )
 
     mock_elements_worker.create_transcription_entity(
-        transcription="11111111-1111-1111-1111-111111111111",
+        transcription=Transcription(
+            {
+                "id": "11111111-1111-1111-1111-111111111111",
+                "element": {"id": "myelement"},
+            }
+        ),
         entity="11111111-1111-1111-1111-111111111111",
         offset=5,
         length=10,
@@ -475,7 +534,12 @@ def test_create_transcription_entity_confidence_none(responses, mock_elements_wo
     )
 
     mock_elements_worker.create_transcription_entity(
-        transcription="11111111-1111-1111-1111-111111111111",
+        transcription=Transcription(
+            {
+                "id": "11111111-1111-1111-1111-111111111111",
+                "element": {"id": "myelement"},
+            }
+        ),
         entity="11111111-1111-1111-1111-111111111111",
         offset=5,
         length=10,
@@ -533,7 +597,12 @@ def test_create_transcription_entity_with_cache(
     )
 
     mock_elements_worker_with_cache.create_transcription_entity(
-        transcription="11111111-1111-1111-1111-111111111111",
+        transcription=Transcription(
+            {
+                "id": "11111111-1111-1111-1111-111111111111",
+                "element": {"id": "myelement"},
+            }
+        ),
         entity="11111111-1111-1111-1111-111111111111",
         offset=5,
         length=10,
@@ -554,7 +623,6 @@ def test_create_transcription_entity_with_cache(
         "length": 10,
         "worker_run_id": "56785678-5678-5678-5678-567856785678",
     }
-
     # Check that created transcription entity was properly stored in SQLite cache
     assert list(CachedTranscriptionEntity.select()) == [
         CachedTranscriptionEntity(
@@ -602,7 +670,12 @@ def test_create_transcription_entity_with_confidence_with_cache(
     )
 
     mock_elements_worker_with_cache.create_transcription_entity(
-        transcription="11111111-1111-1111-1111-111111111111",
+        transcription=Transcription(
+            {
+                "id": "11111111-1111-1111-1111-111111111111",
+                "element": {"id": "myelement"},
+            }
+        ),
         entity="11111111-1111-1111-1111-111111111111",
         offset=5,
         length=10,
diff --git a/tests/test_reporting.py b/tests/test_reporting.py
index 11e45c52..1ef13e85 100644
--- a/tests/test_reporting.py
+++ b/tests/test_reporting.py
@@ -7,6 +7,7 @@ from tempfile import NamedTemporaryFile
 import pytest
 from apistar.exceptions import ErrorResponse
 
+from arkindex_worker.models import Transcription
 from arkindex_worker.reporting import Reporter
 
 
@@ -35,6 +36,7 @@ def test_process():
         "transcriptions": 0,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -51,6 +53,7 @@ def test_add_element():
         "transcriptions": 0,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -70,6 +73,7 @@ def test_add_element_count():
         "transcriptions": 0,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -86,6 +90,7 @@ def test_add_classification():
         "transcriptions": 0,
         "classifications": {"three": 1},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -116,6 +121,7 @@ def test_add_classifications():
         "transcriptions": 0,
         "classifications": {"three": 3, "two": 2},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -132,6 +138,7 @@ def test_add_transcription():
         "transcriptions": 1,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -151,6 +158,7 @@ def test_add_transcription_count():
         "transcriptions": 1337,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [],
         "errors": [],
     }
@@ -175,6 +183,34 @@ def test_add_entity():
                 "name": "Bob Bob",
             }
         ],
+        "transcription_entities": [],
+        "metadata": [],
+        "errors": [],
+    }
+
+
+def test_add_transcription_entity():
+    reporter = Reporter("worker")
+    reporter.add_transcription_entity(
+        "5678",
+        Transcription({"id": "1234-5678", "element": {"id": "myelement"}}),
+        "1234",
+    )
+    assert "myelement" in reporter.report_data["elements"]
+    element_data = reporter.report_data["elements"]["myelement"]
+    del element_data["started"]
+    assert element_data == {
+        "elements": {},
+        "transcriptions": 0,
+        "classifications": {},
+        "entities": [],
+        "transcription_entities": [
+            {
+                "transcription_id": "1234-5678",
+                "entity_id": "5678",
+                "transcription_entity_id": "1234",
+            }
+        ],
         "metadata": [],
         "errors": [],
     }
@@ -193,6 +229,7 @@ def test_add_metadata():
         "transcriptions": 0,
         "classifications": {},
         "entities": [],
+        "transcription_entities": [],
         "metadata": [
             {
                 "id": "12341234-1234-1234-1234-123412341234",
@@ -246,6 +283,7 @@ def test_reporter_save(mocker):
                 "classifications": {},
                 "elements": {"text_line": 4},
                 "entities": [],
+                "transcription_entities": [],
                 "errors": [],
                 "metadata": [],
                 "started": "2000-01-01T00:00:00",
-- 
GitLab