From 03e666fd23f6ba6e41c127db08c469e1c4e913b1 Mon Sep 17 00:00:00 2001
From: mlbonhomme <bonhomme@teklia.com>
Date: Wed, 10 Nov 2021 18:04:33 +0100
Subject: [PATCH] no more lawlessly mutating things, update tests

---
 .isort.cfg                                    |   2 +-
 arkindex_worker/worker/transcription.py       |  43 ++--
 .../test_transcriptions.py                    | 228 +++++++++---------
 3 files changed, 131 insertions(+), 142 deletions(-)

diff --git a/.isort.cfg b/.isort.cfg
index 2fe5c98a..6d30d8e7 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -8,4 +8,4 @@ line_length = 88
 
 default_section=FIRSTPARTY
 known_first_party = arkindex,arkindex_common
-known_third_party =PIL,apistar,gitlab,gnupg,peewee,pytest,requests,setuptools,sh,tenacity,yaml
+known_third_party =PIL,apistar,gitlab,gnupg,peewee,playhouse,pytest,requests,setuptools,sh,tenacity,yaml
diff --git a/arkindex_worker/worker/transcription.py b/arkindex_worker/worker/transcription.py
index c415beb1..98e3f19a 100644
--- a/arkindex_worker/worker/transcription.py
+++ b/arkindex_worker/worker/transcription.py
@@ -85,7 +85,10 @@ class TranscriptionMixin(object):
             transcriptions, list
         ), "transcriptions shouldn't be null and should be of type list"
 
-        for index, transcription in enumerate(transcriptions):
+        # Create shallow copies of every transcription to avoid mutating the original payload
+        transcriptions_payload = list(map(dict, transcriptions))
+
+        for (index, transcription) in enumerate(transcriptions_payload):
             element_id = transcription.get("element_id")
             assert element_id and isinstance(
                 element_id, str
@@ -107,22 +110,14 @@ class TranscriptionMixin(object):
             assert orientation and isinstance(
                 orientation, TextOrientation
             ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
-            transcription["orientation"] = orientation
-
-        sent_transcriptions = [
-            {
-                "text": transcription["text"],
-                "score": transcription["score"],
-                "orientation": transcription["orientation"].value,
-            }
-            for transcription in transcriptions
-        ]
+            if orientation:
+                transcription["orientation"] = orientation.value
 
         created_trs = self.request(
             "CreateTranscriptions",
             body={
                 "worker_version": self.worker_version_id,
-                "transcriptions": sent_transcriptions,
+                "transcriptions": transcriptions_payload,
             },
         )["transcriptions"]
 
@@ -165,7 +160,10 @@ class TranscriptionMixin(object):
             transcriptions, list
         ), "transcriptions shouldn't be null and should be of type list"
 
-        for index, transcription in enumerate(transcriptions):
+        # Create shallow copies of every transcription to avoid mutating the original payload
+        transcriptions_payload = list(map(dict, transcriptions))
+
+        for (index, transcription) in enumerate(transcriptions_payload):
             text = transcription.get("text")
             assert text and isinstance(
                 text, str
@@ -182,7 +180,8 @@ class TranscriptionMixin(object):
             assert orientation and isinstance(
                 orientation, TextOrientation
             ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
-            transcription["orientation"] = orientation
+            if orientation:
+                transcription["orientation"] = orientation.value
 
             polygon = transcription.get("polygon")
             assert polygon and isinstance(
@@ -203,23 +202,13 @@ class TranscriptionMixin(object):
             )
             return
 
-        sent_transcriptions = [
-            {
-                "text": transcription["text"],
-                "score": transcription["score"],
-                "orientation": transcription["orientation"].value,
-                "polygon": transcription["polygon"],
-            }
-            for transcription in transcriptions
-        ]
-
         annotations = self.request(
             "CreateElementTranscriptions",
             id=element.id,
             body={
                 "element_type": sub_element_type,
                 "worker_version": self.worker_version_id,
-                "transcriptions": sent_transcriptions,
+                "transcriptions": transcriptions_payload,
                 "return_elements": True,
             },
         )
@@ -267,7 +256,9 @@ class TranscriptionMixin(object):
                         "element_id": annotation["element_id"],
                         "text": transcription["text"],
                         "confidence": transcription["score"],
-                        "orientation": transcription["orientation"].value,
+                        "orientation": transcription["orientation"].value
+                        if "orientation" in transcription
+                        else TextOrientation.HorizontalLeftToRight.value,
                         "worker_version_id": self.worker_version_id,
                     }
                 )
diff --git a/tests/test_elements_worker/test_transcriptions.py b/tests/test_elements_worker/test_transcriptions.py
index 1583d447..2094568d 100644
--- a/tests/test_elements_worker/test_transcriptions.py
+++ b/tests/test_elements_worker/test_transcriptions.py
@@ -4,6 +4,7 @@ from uuid import UUID
 
 import pytest
 from apistar.exceptions import ErrorResponse
+from playhouse.shortcuts import model_to_dict
 
 from arkindex_worker.cache import CachedElement, CachedTranscription
 from arkindex_worker.models import Element
@@ -127,7 +128,6 @@ def test_create_transcription_default_orientation(responses, mock_elements_worke
         json={
             "id": "56785678-5678-5678-5678-567856785678",
             "text": "Animula vagula blandula",
-            "score": 0.42,
             "confidence": 0.42,
             "worker_version_id": "12341234-1234-1234-1234-123412341234",
         },
@@ -154,7 +154,6 @@ def test_create_transcription_orientation(responses, mock_elements_worker):
         json={
             "id": "56785678-5678-5678-5678-567856785678",
             "text": "Animula vagula blandula",
-            "score": 0.42,
             "confidence": 0.42,
             "worker_version_id": "12341234-1234-1234-1234-123412341234",
         },
@@ -313,7 +312,6 @@ def test_create_transcription_orientation_with_cache(
         json={
             "id": "56785678-5678-5678-5678-567856785678",
             "text": "Animula vagula blandula",
-            "score": 0.42,
             "confidence": 0.42,
             "orientation": "vertical-lr",
             "worker_version_id": "12341234-1234-1234-1234-123412341234",
@@ -332,28 +330,20 @@ def test_create_transcription_orientation_with_cache(
         "score": 0.42,
     }
     # Check that the text orientation was properly stored in SQLite cache
-    assert list(CachedTranscription.select()) == [
-        CachedTranscription(
-            id=UUID("56785678-5678-5678-5678-567856785678"),
-        )
-    ]
-    assert [
-        {
-            k: getattr(transcription, k)
-            for k in [
-                "id",
-                "element_id",
-                "text",
-                "confidence",
-                "orientation",
-                "worker_version_id",
-            ]
-        }
-        for transcription in CachedTranscription.select()
-    ] == [
+    assert list(map(model_to_dict, CachedTranscription.select())) == [
         {
             "id": UUID("56785678-5678-5678-5678-567856785678"),
-            "element_id": UUID("12341234-1234-1234-1234-123412341234"),
+            "element": {
+                "id": UUID("12341234-1234-1234-1234-123412341234"),
+                "parent_id": None,
+                "type": "thing",
+                "image": None,
+                "polygon": None,
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": None,
+            },
             "text": "Animula vagula blandula",
             "confidence": 0.42,
             "orientation": TextOrientation.VerticalLeftToRight.value,
@@ -700,16 +690,17 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache):
         ("POST", "http://testserver/api/v1/transcription/bulk/"),
     ]
 
+    trans_response = list(map(dict, trans))
+    for transcription in trans_response:
+        transcription["orientation"] = (
+            transcription["orientation"].value
+            if "orientation" in transcription
+            else TextOrientation.HorizontalLeftToRight.value
+        )
+
     assert json.loads(responses.calls[-1].request.body) == {
         "worker_version": "12341234-1234-1234-1234-123412341234",
-        "transcriptions": [
-            {
-                "text": tr["text"],
-                "score": tr["score"],
-                "orientation": tr["orientation"].value,
-            }
-            for tr in trans
-        ],
+        "transcriptions": trans_response,
     }
 
     # Check that created transcriptions were properly stored in SQLite cache
@@ -779,38 +770,34 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_
         transcriptions=trans,
     )
 
+    trans_response = list(map(dict, trans))
+    for transcription in trans_response:
+        transcription["orientation"] = (
+            transcription["orientation"].value
+            if "orientation" in transcription
+            else TextOrientation.HorizontalLeftToRight.value
+        )
+
     assert json.loads(responses.calls[-1].request.body) == {
         "worker_version": "12341234-1234-1234-1234-123412341234",
-        "transcriptions": [
-            {
-                "text": tr["text"],
-                "score": tr["score"],
-                "orientation": tr["orientation"].value,
-            }
-            for tr in trans
-        ],
+        "transcriptions": trans_response,
     }
 
     # Check that oriented transcriptions were properly stored in SQLite cache
-    assert list(CachedTranscription.select()) == [
-        CachedTranscription(id=UUID("00000000-0000-0000-0000-000000000000")),
-        CachedTranscription(id=UUID("11111111-1111-1111-1111-111111111111")),
-    ]
-    assert [
-        {
-            k: getattr(transcription, k)
-            for k in [
-                "id",
-                "text",
-                "confidence",
-                "orientation",
-                "worker_version_id",
-            ]
-        }
-        for transcription in CachedTranscription.select()
-    ] == [
+    assert list(map(model_to_dict, CachedTranscription.select())) == [
         {
             "id": UUID("00000000-0000-0000-0000-000000000000"),
+            "element": {
+                "id": UUID("11111111-1111-1111-1111-111111111111"),
+                "parent_id": None,
+                "type": "thing",
+                "image": None,
+                "polygon": None,
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": None,
+            },
             "text": "Animula vagula blandula",
             "confidence": 0.12,
             "orientation": TextOrientation.HorizontalRightToLeft.value,
@@ -818,6 +805,17 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_
         },
         {
             "id": UUID("11111111-1111-1111-1111-111111111111"),
+            "element": {
+                "id": UUID("11111111-1111-1111-1111-111111111111"),
+                "parent_id": None,
+                "type": "thing",
+                "image": None,
+                "polygon": None,
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": None,
+            },
             "text": "Hospes comesque corporis",
             "confidence": 0.21,
             "orientation": TextOrientation.VerticalLeftToRight.value,
@@ -1291,20 +1289,18 @@ def test_create_element_transcriptions(responses, mock_elements_worker):
         ("POST", f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/"),
     ]
 
-    sent_transcriptions_sample = [
-        {
-            "text": tr["text"],
-            "score": tr["score"],
-            "orientation": tr["orientation"].value,
-            "polygon": tr["polygon"],
-        }
-        for tr in TRANSCRIPTIONS_SAMPLE
-    ]
+    transcriptions_sample_response = list(map(dict, TRANSCRIPTIONS_SAMPLE))
+    for transcription in transcriptions_sample_response:
+        transcription["orientation"] = (
+            transcription["orientation"].value
+            if "orientation" in transcription
+            else TextOrientation.HorizontalLeftToRight.value
+        )
 
     assert json.loads(responses.calls[-1].request.body) == {
         "element_type": "page",
         "worker_version": "12341234-1234-1234-1234-123412341234",
-        "transcriptions": sent_transcriptions_sample,
+        "transcriptions": transcriptions_sample_response,
         "return_elements": True,
     }
     assert annotations == [
@@ -1367,20 +1363,18 @@ def test_create_element_transcriptions_with_cache(
         ("POST", f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/"),
     ]
 
-    sent_transcriptions_sample = [
-        {
-            "text": tr["text"],
-            "score": tr["score"],
-            "orientation": tr["orientation"].value,
-            "polygon": tr["polygon"],
-        }
-        for tr in TRANSCRIPTIONS_SAMPLE
-    ]
+    transcriptions_sample_response = list(map(dict, TRANSCRIPTIONS_SAMPLE))
+    for transcription in transcriptions_sample_response:
+        transcription["orientation"] = (
+            transcription["orientation"].value
+            if "orientation" in transcription
+            else TextOrientation.HorizontalLeftToRight.value
+        )
 
     assert json.loads(responses.calls[-1].request.body) == {
         "element_type": "page",
         "worker_version": "12341234-1234-1234-1234-123412341234",
-        "transcriptions": sent_transcriptions_sample,
+        "transcriptions": transcriptions_sample_response,
         "return_elements": True,
     }
     assert annotations == [
@@ -1487,7 +1481,7 @@ def test_create_transcriptions_orientation_with_cache(
             "orientation": TextOrientation.VerticalLeftToRight,
         },
         {
-            "polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
+            "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
             "score": 0.9,
             "text": "Quae nunc abibis in loca",
             "orientation": TextOrientation.HorizontalRightToLeft,
@@ -1500,20 +1494,18 @@ def test_create_transcriptions_orientation_with_cache(
         transcriptions=oriented_transcriptions,
     )
 
-    sent_oriented_transcriptions = [
-        {
-            "text": tr["text"],
-            "score": tr["score"],
-            "orientation": tr["orientation"].value,
-            "polygon": tr["polygon"],
-        }
-        for tr in oriented_transcriptions
-    ]
+    oriented_transcriptions_response = list(map(dict, oriented_transcriptions))
+    for transcription in oriented_transcriptions_response:
+        transcription["orientation"] = (
+            transcription["orientation"].value
+            if "orientation" in transcription
+            else TextOrientation.HorizontalLeftToRight.value
+        )
 
     assert json.loads(responses.calls[-1].request.body) == {
         "element_type": "page",
         "worker_version": "12341234-1234-1234-1234-123412341234",
-        "transcriptions": sent_oriented_transcriptions,
+        "transcriptions": oriented_transcriptions_response,
         "return_elements": True,
     }
     assert annotations == [
@@ -1535,34 +1527,20 @@ def test_create_transcriptions_orientation_with_cache(
     ]
 
     # Check that the text orientation was properly stored in SQLite cache
-    assert list(CachedTranscription.select()) == [
-        CachedTranscription(
-            id=UUID("56785678-5678-5678-5678-567856785678"),
-        ),
-        CachedTranscription(
-            id=UUID("67896789-6789-6789-6789-678967896789"),
-        ),
-        CachedTranscription(
-            id=UUID("78907890-7890-7890-7890-789078907890"),
-        ),
-    ]
-    assert [
-        {
-            k: getattr(transcription, k)
-            for k in [
-                "id",
-                "element_id",
-                "text",
-                "confidence",
-                "orientation",
-                "worker_version_id",
-            ]
-        }
-        for transcription in CachedTranscription.select()
-    ] == [
+    assert list(map(model_to_dict, CachedTranscription.select())) == [
         {
             "id": UUID("56785678-5678-5678-5678-567856785678"),
-            "element_id": UUID("11111111-1111-1111-1111-111111111111"),
+            "element": {
+                "id": UUID("11111111-1111-1111-1111-111111111111"),
+                "parent_id": UUID(elt.id),
+                "type": "page",
+                "image": None,
+                "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
+            },
             "text": "Animula vagula blandula",
             "confidence": 0.5,
             "orientation": TextOrientation.HorizontalLeftToRight.value,
@@ -1570,7 +1548,17 @@ def test_create_transcriptions_orientation_with_cache(
         },
         {
             "id": UUID("67896789-6789-6789-6789-678967896789"),
-            "element_id": UUID("22222222-2222-2222-2222-222222222222"),
+            "element": {
+                "id": UUID("22222222-2222-2222-2222-222222222222"),
+                "parent_id": UUID(elt.id),
+                "type": "page",
+                "image": None,
+                "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
+            },
             "text": "Hospes comesque corporis",
             "confidence": 0.75,
             "orientation": TextOrientation.VerticalLeftToRight.value,
@@ -1578,7 +1566,17 @@ def test_create_transcriptions_orientation_with_cache(
         },
         {
             "id": UUID("78907890-7890-7890-7890-789078907890"),
-            "element_id": UUID("11111111-1111-1111-1111-111111111111"),
+            "element": {
+                "id": UUID("11111111-1111-1111-1111-111111111111"),
+                "parent_id": UUID(elt.id),
+                "type": "page",
+                "image": None,
+                "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
+                "rotation_angle": 0,
+                "mirrored": False,
+                "initial": False,
+                "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
+            },
             "text": "Quae nunc abibis in loca",
             "confidence": 0.9,
             "orientation": TextOrientation.HorizontalRightToLeft.value,
-- 
GitLab