From 03e666fd23f6ba6e41c127db08c469e1c4e913b1 Mon Sep 17 00:00:00 2001 From: mlbonhomme <bonhomme@teklia.com> Date: Wed, 10 Nov 2021 18:04:33 +0100 Subject: [PATCH] no more lawlessly mutating things, update tests --- .isort.cfg | 2 +- arkindex_worker/worker/transcription.py | 43 ++-- .../test_transcriptions.py | 228 +++++++++--------- 3 files changed, 131 insertions(+), 142 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index 2fe5c98a..6d30d8e7 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -8,4 +8,4 @@ line_length = 88 default_section=FIRSTPARTY known_first_party = arkindex,arkindex_common -known_third_party =PIL,apistar,gitlab,gnupg,peewee,pytest,requests,setuptools,sh,tenacity,yaml +known_third_party =PIL,apistar,gitlab,gnupg,peewee,playhouse,pytest,requests,setuptools,sh,tenacity,yaml diff --git a/arkindex_worker/worker/transcription.py b/arkindex_worker/worker/transcription.py index c415beb1..98e3f19a 100644 --- a/arkindex_worker/worker/transcription.py +++ b/arkindex_worker/worker/transcription.py @@ -85,7 +85,10 @@ class TranscriptionMixin(object): transcriptions, list ), "transcriptions shouldn't be null and should be of type list" - for index, transcription in enumerate(transcriptions): + # Create shallow copies of every transcription to avoid mutating the original payload + transcriptions_payload = list(map(dict, transcriptions)) + + for (index, transcription) in enumerate(transcriptions_payload): element_id = transcription.get("element_id") assert element_id and isinstance( element_id, str @@ -107,22 +110,14 @@ class TranscriptionMixin(object): assert orientation and isinstance( orientation, TextOrientation ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation" - transcription["orientation"] = orientation - - sent_transcriptions = [ - { - "text": transcription["text"], - "score": transcription["score"], - "orientation": transcription["orientation"].value, - } - for transcription in transcriptions - ] + if orientation: + transcription["orientation"] = orientation.value created_trs = self.request( "CreateTranscriptions", body={ "worker_version": self.worker_version_id, - "transcriptions": sent_transcriptions, + "transcriptions": transcriptions_payload, }, )["transcriptions"] @@ -165,7 +160,10 @@ class TranscriptionMixin(object): transcriptions, list ), "transcriptions shouldn't be null and should be of type list" - for index, transcription in enumerate(transcriptions): + # Create shallow copies of every transcription to avoid mutating the original payload + transcriptions_payload = list(map(dict, transcriptions)) + + for (index, transcription) in enumerate(transcriptions_payload): text = transcription.get("text") assert text and isinstance( text, str @@ -182,7 +180,8 @@ class TranscriptionMixin(object): assert orientation and isinstance( orientation, TextOrientation ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation" - transcription["orientation"] = orientation + if orientation: + transcription["orientation"] = orientation.value polygon = transcription.get("polygon") assert polygon and isinstance( @@ -203,23 +202,13 @@ class TranscriptionMixin(object): ) return - sent_transcriptions = [ - { - "text": transcription["text"], - "score": transcription["score"], - "orientation": transcription["orientation"].value, - "polygon": transcription["polygon"], - } - for transcription in transcriptions - ] - annotations = self.request( "CreateElementTranscriptions", id=element.id, body={ "element_type": sub_element_type, "worker_version": self.worker_version_id, - "transcriptions": sent_transcriptions, + "transcriptions": transcriptions_payload, "return_elements": True, }, ) @@ -267,7 +256,9 @@ class TranscriptionMixin(object): "element_id": annotation["element_id"], "text": transcription["text"], "confidence": transcription["score"], - "orientation": transcription["orientation"].value, + "orientation": transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value, "worker_version_id": self.worker_version_id, } ) diff --git a/tests/test_elements_worker/test_transcriptions.py b/tests/test_elements_worker/test_transcriptions.py index 1583d447..2094568d 100644 --- a/tests/test_elements_worker/test_transcriptions.py +++ b/tests/test_elements_worker/test_transcriptions.py @@ -4,6 +4,7 @@ from uuid import UUID import pytest from apistar.exceptions import ErrorResponse +from playhouse.shortcuts import model_to_dict from arkindex_worker.cache import CachedElement, CachedTranscription from arkindex_worker.models import Element @@ -127,7 +128,6 @@ def test_create_transcription_default_orientation(responses, mock_elements_worke json={ "id": "56785678-5678-5678-5678-567856785678", "text": "Animula vagula blandula", - "score": 0.42, "confidence": 0.42, "worker_version_id": "12341234-1234-1234-1234-123412341234", }, @@ -154,7 +154,6 @@ def test_create_transcription_orientation(responses, mock_elements_worker): json={ "id": "56785678-5678-5678-5678-567856785678", "text": "Animula vagula blandula", - "score": 0.42, "confidence": 0.42, "worker_version_id": "12341234-1234-1234-1234-123412341234", }, @@ -313,7 +312,6 @@ def test_create_transcription_orientation_with_cache( json={ "id": "56785678-5678-5678-5678-567856785678", "text": "Animula vagula blandula", - "score": 0.42, "confidence": 0.42, "orientation": "vertical-lr", "worker_version_id": "12341234-1234-1234-1234-123412341234", @@ -332,28 +330,20 @@ def test_create_transcription_orientation_with_cache( "score": 0.42, } # Check that the text orientation was properly stored in SQLite cache - assert list(CachedTranscription.select()) == [ - CachedTranscription( - id=UUID("56785678-5678-5678-5678-567856785678"), - ) - ] - assert [ - { - k: getattr(transcription, k) - for k in [ - "id", - "element_id", - "text", - "confidence", - "orientation", - "worker_version_id", - ] - } - for transcription in CachedTranscription.select() - ] == [ + assert list(map(model_to_dict, CachedTranscription.select())) == [ { "id": UUID("56785678-5678-5678-5678-567856785678"), - "element_id": UUID("12341234-1234-1234-1234-123412341234"), + "element": { + "id": UUID("12341234-1234-1234-1234-123412341234"), + "parent_id": None, + "type": "thing", + "image": None, + "polygon": None, + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": None, + }, "text": "Animula vagula blandula", "confidence": 0.42, "orientation": TextOrientation.VerticalLeftToRight.value, @@ -700,16 +690,17 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache): ("POST", "http://testserver/api/v1/transcription/bulk/"), ] + trans_response = list(map(dict, trans)) + for transcription in trans_response: + transcription["orientation"] = ( + transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value + ) + assert json.loads(responses.calls[-1].request.body) == { "worker_version": "12341234-1234-1234-1234-123412341234", - "transcriptions": [ - { - "text": tr["text"], - "score": tr["score"], - "orientation": tr["orientation"].value, - } - for tr in trans - ], + "transcriptions": trans_response, } # Check that created transcriptions were properly stored in SQLite cache @@ -779,38 +770,34 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ transcriptions=trans, ) + trans_response = list(map(dict, trans)) + for transcription in trans_response: + transcription["orientation"] = ( + transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value + ) + assert json.loads(responses.calls[-1].request.body) == { "worker_version": "12341234-1234-1234-1234-123412341234", - "transcriptions": [ - { - "text": tr["text"], - "score": tr["score"], - "orientation": tr["orientation"].value, - } - for tr in trans - ], + "transcriptions": trans_response, } # Check that oriented transcriptions were properly stored in SQLite cache - assert list(CachedTranscription.select()) == [ - CachedTranscription(id=UUID("00000000-0000-0000-0000-000000000000")), - CachedTranscription(id=UUID("11111111-1111-1111-1111-111111111111")), - ] - assert [ - { - k: getattr(transcription, k) - for k in [ - "id", - "text", - "confidence", - "orientation", - "worker_version_id", - ] - } - for transcription in CachedTranscription.select() - ] == [ + assert list(map(model_to_dict, CachedTranscription.select())) == [ { "id": UUID("00000000-0000-0000-0000-000000000000"), + "element": { + "id": UUID("11111111-1111-1111-1111-111111111111"), + "parent_id": None, + "type": "thing", + "image": None, + "polygon": None, + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": None, + }, "text": "Animula vagula blandula", "confidence": 0.12, "orientation": TextOrientation.HorizontalRightToLeft.value, @@ -818,6 +805,17 @@ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_ }, { "id": UUID("11111111-1111-1111-1111-111111111111"), + "element": { + "id": UUID("11111111-1111-1111-1111-111111111111"), + "parent_id": None, + "type": "thing", + "image": None, + "polygon": None, + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": None, + }, "text": "Hospes comesque corporis", "confidence": 0.21, "orientation": TextOrientation.VerticalLeftToRight.value, @@ -1291,20 +1289,18 @@ def test_create_element_transcriptions(responses, mock_elements_worker): ("POST", f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/"), ] - sent_transcriptions_sample = [ - { - "text": tr["text"], - "score": tr["score"], - "orientation": tr["orientation"].value, - "polygon": tr["polygon"], - } - for tr in TRANSCRIPTIONS_SAMPLE - ] + transcriptions_sample_response = list(map(dict, TRANSCRIPTIONS_SAMPLE)) + for transcription in transcriptions_sample_response: + transcription["orientation"] = ( + transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value + ) assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", - "transcriptions": sent_transcriptions_sample, + "transcriptions": transcriptions_sample_response, "return_elements": True, } assert annotations == [ @@ -1367,20 +1363,18 @@ def test_create_element_transcriptions_with_cache( ("POST", f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/"), ] - sent_transcriptions_sample = [ - { - "text": tr["text"], - "score": tr["score"], - "orientation": tr["orientation"].value, - "polygon": tr["polygon"], - } - for tr in TRANSCRIPTIONS_SAMPLE - ] + transcriptions_sample_response = list(map(dict, TRANSCRIPTIONS_SAMPLE)) + for transcription in transcriptions_sample_response: + transcription["orientation"] = ( + transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value + ) assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", - "transcriptions": sent_transcriptions_sample, + "transcriptions": transcriptions_sample_response, "return_elements": True, } assert annotations == [ @@ -1487,7 +1481,7 @@ def test_create_transcriptions_orientation_with_cache( "orientation": TextOrientation.VerticalLeftToRight, }, { - "polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]], + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], "score": 0.9, "text": "Quae nunc abibis in loca", "orientation": TextOrientation.HorizontalRightToLeft, @@ -1500,20 +1494,18 @@ def test_create_transcriptions_orientation_with_cache( transcriptions=oriented_transcriptions, ) - sent_oriented_transcriptions = [ - { - "text": tr["text"], - "score": tr["score"], - "orientation": tr["orientation"].value, - "polygon": tr["polygon"], - } - for tr in oriented_transcriptions - ] + oriented_transcriptions_response = list(map(dict, oriented_transcriptions)) + for transcription in oriented_transcriptions_response: + transcription["orientation"] = ( + transcription["orientation"].value + if "orientation" in transcription + else TextOrientation.HorizontalLeftToRight.value + ) assert json.loads(responses.calls[-1].request.body) == { "element_type": "page", "worker_version": "12341234-1234-1234-1234-123412341234", - "transcriptions": sent_oriented_transcriptions, + "transcriptions": oriented_transcriptions_response, "return_elements": True, } assert annotations == [ @@ -1535,34 +1527,20 @@ def test_create_transcriptions_orientation_with_cache( ] # Check that the text orientation was properly stored in SQLite cache - assert list(CachedTranscription.select()) == [ - CachedTranscription( - id=UUID("56785678-5678-5678-5678-567856785678"), - ), - CachedTranscription( - id=UUID("67896789-6789-6789-6789-678967896789"), - ), - CachedTranscription( - id=UUID("78907890-7890-7890-7890-789078907890"), - ), - ] - assert [ - { - k: getattr(transcription, k) - for k in [ - "id", - "element_id", - "text", - "confidence", - "orientation", - "worker_version_id", - ] - } - for transcription in CachedTranscription.select() - ] == [ + assert list(map(model_to_dict, CachedTranscription.select())) == [ { "id": UUID("56785678-5678-5678-5678-567856785678"), - "element_id": UUID("11111111-1111-1111-1111-111111111111"), + "element": { + "id": UUID("11111111-1111-1111-1111-111111111111"), + "parent_id": UUID(elt.id), + "type": "page", + "image": None, + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + }, "text": "Animula vagula blandula", "confidence": 0.5, "orientation": TextOrientation.HorizontalLeftToRight.value, @@ -1570,7 +1548,17 @@ def test_create_transcriptions_orientation_with_cache( }, { "id": UUID("67896789-6789-6789-6789-678967896789"), - "element_id": UUID("22222222-2222-2222-2222-222222222222"), + "element": { + "id": UUID("22222222-2222-2222-2222-222222222222"), + "parent_id": UUID(elt.id), + "type": "page", + "image": None, + "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]], + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + }, "text": "Hospes comesque corporis", "confidence": 0.75, "orientation": TextOrientation.VerticalLeftToRight.value, @@ -1578,7 +1566,17 @@ def test_create_transcriptions_orientation_with_cache( }, { "id": UUID("78907890-7890-7890-7890-789078907890"), - "element_id": UUID("11111111-1111-1111-1111-111111111111"), + "element": { + "id": UUID("11111111-1111-1111-1111-111111111111"), + "parent_id": UUID(elt.id), + "type": "page", + "image": None, + "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]], + "rotation_angle": 0, + "mirrored": False, + "initial": False, + "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"), + }, "text": "Quae nunc abibis in loca", "confidence": 0.9, "orientation": TextOrientation.HorizontalRightToLeft.value, -- GitLab