Skip to content
Snippets Groups Projects
Commit 827abba1 authored by ml bonhomme's avatar ml bonhomme :bee:
Browse files

actually check the text orientation in the cache

parent 82824a6c
No related branches found
No related tags found
1 merge request!139Text orientation in base worker
This commit is part of merge request !139. Comments created here will be created in the context of that merge request.
......@@ -107,12 +107,22 @@ class TranscriptionMixin(object):
assert orientation and isinstance(
orientation, TextOrientation
), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
transcription["orientation"] = orientation
sent_transcriptions = [
{
"text": transcription["text"],
"score": transcription["score"],
"orientation": transcription["orientation"].value,
}
for transcription in transcriptions
]
created_trs = self.request(
"CreateTranscriptions",
body={
"worker_version": self.worker_version_id,
"transcriptions": transcriptions,
"transcriptions": sent_transcriptions,
},
)["transcriptions"]
......@@ -257,7 +267,7 @@ class TranscriptionMixin(object):
"element_id": annotation["element_id"],
"text": transcription["text"],
"confidence": transcription["score"],
"orientation": transcription["orientation"],
"orientation": transcription["orientation"].value,
"worker_version_id": self.worker_version_id,
}
)
......
......@@ -467,6 +467,7 @@ def mock_databases(tmpdir):
element_id=UUID("42424242-4242-4242-4242-424242424242"),
text="Hello!",
confidence=0.42,
orientation=TextOrientation.HorizontalLeftToRight,
worker_version_id=UUID("56785678-5678-5678-5678-567856785678"),
)
......@@ -483,6 +484,7 @@ def mock_databases(tmpdir):
element_id=UUID("42424242-4242-4242-4242-424242424242"),
text="Hello again neighbor !",
confidence=0.42,
orientation=TextOrientation.HorizontalLeftToRight,
worker_version_id=UUID("56785678-5678-5678-5678-567856785678"),
)
......
......@@ -118,6 +118,76 @@ def test_create_transcription_wrong_score(mock_elements_worker):
)
def test_create_transcription_default_orientation(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.POST,
f"http://testserver/api/v1/element/{elt.id}/transcription/",
status=200,
json={
"id": "56785678-5678-5678-5678-567856785678",
"text": "Animula vagula blandula",
"score": 0.42,
"confidence": 0.42,
"worker_version_id": "12341234-1234-1234-1234-123412341234",
},
)
mock_elements_worker.create_transcription(
element=elt,
text="Animula vagula blandula",
score=0.42,
)
assert json.loads(responses.calls[-1].request.body) == {
"text": "Animula vagula blandula",
"worker_version": "12341234-1234-1234-1234-123412341234",
"score": 0.42,
"orientation": "horizontal-lr",
}
def test_create_transcription_orientation(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.POST,
f"http://testserver/api/v1/element/{elt.id}/transcription/",
status=200,
json={
"id": "56785678-5678-5678-5678-567856785678",
"text": "Animula vagula blandula",
"score": 0.42,
"confidence": 0.42,
"worker_version_id": "12341234-1234-1234-1234-123412341234",
},
)
mock_elements_worker.create_transcription(
element=elt,
text="Animula vagula blandula",
orientation=TextOrientation.VerticalLeftToRight,
score=0.42,
)
assert json.loads(responses.calls[-1].request.body) == {
"text": "Animula vagula blandula",
"worker_version": "12341234-1234-1234-1234-123412341234",
"score": 0.42,
"orientation": "vertical-lr",
}
def test_create_transcription_wrong_orientation(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcription(
element=elt,
text="Animula vagula blandula",
score=0.26,
orientation="eliptical",
)
assert (
str(e.value)
== "orientation shouldn't be null and should be of type TextOrientation"
)
def test_create_transcription_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
......@@ -203,7 +273,6 @@ def test_create_transcription_with_cache(responses, mock_elements_worker_with_ca
element=elt,
text="i am a line",
score=0.42,
orientation=TextOrientation.HorizontalLeftToRight,
)
assert len(responses.calls) == len(BASE_API_CALLS) + 1
......@@ -233,6 +302,66 @@ def test_create_transcription_with_cache(responses, mock_elements_worker_with_ca
]
def test_create_transcription_orientation_with_cache(
responses, mock_elements_worker_with_cache
):
elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
responses.add(
responses.POST,
f"http://testserver/api/v1/element/{elt.id}/transcription/",
status=200,
json={
"id": "56785678-5678-5678-5678-567856785678",
"text": "Animula vagula blandula",
"score": 0.42,
"confidence": 0.42,
"orientation": "vertical-lr",
"worker_version_id": "12341234-1234-1234-1234-123412341234",
},
)
mock_elements_worker_with_cache.create_transcription(
element=elt,
text="Animula vagula blandula",
orientation=TextOrientation.VerticalLeftToRight,
score=0.42,
)
assert json.loads(responses.calls[-1].request.body) == {
"text": "Animula vagula blandula",
"worker_version": "12341234-1234-1234-1234-123412341234",
"orientation": "vertical-lr",
"score": 0.42,
}
# Check that the text orientation was properly stored in SQLite cache
assert list(CachedTranscription.select()) == [
CachedTranscription(
id=UUID("56785678-5678-5678-5678-567856785678"),
)
]
assert [
{
k: getattr(transcription, k)
for k in [
"id",
"element_id",
"text",
"confidence",
"orientation",
"worker_version_id",
]
}
for transcription in CachedTranscription.select()
] == [
{
"id": UUID("56785678-5678-5678-5678-567856785678"),
"element_id": UUID("12341234-1234-1234-1234-123412341234"),
"text": "Animula vagula blandula",
"confidence": 0.42,
"orientation": TextOrientation.VerticalLeftToRight.value,
"worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
}
]
def test_create_transcriptions_wrong_transcriptions(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcriptions(
......@@ -463,6 +592,27 @@ def test_create_transcriptions_wrong_transcriptions(mock_elements_worker):
== "Transcription at index 1 in transcriptions: score shouldn't be null and should be a float in [0..1] range"
)
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcriptions(
transcriptions=[
{
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "The",
"score": 0.75,
},
{
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "word",
"score": 0.28,
"orientation": "wobble",
},
],
)
assert (
str(e.value)
== "Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
)
def test_create_transcriptions_api_error(responses, mock_elements_worker):
responses.add(
......@@ -552,7 +702,14 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache):
assert json.loads(responses.calls[-1].request.body) == {
"worker_version": "12341234-1234-1234-1234-123412341234",
"transcriptions": trans,
"transcriptions": [
{
"text": tr["text"],
"score": tr["score"],
"orientation": tr["orientation"].value,
}
for tr in trans
],
}
# Check that created transcriptions were properly stored in SQLite cache
......@@ -576,6 +733,65 @@ def test_create_transcriptions(responses, mock_elements_worker_with_cache):
]
def test_create_transcriptions_orientation(responses, mock_elements_worker_with_cache):
CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
trans = [
{
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "Animula vagula blandula",
"score": 0.12,
"orientation": TextOrientation.HorizontalRightToLeft,
},
{
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "Hospes comesque corporis",
"score": 0.21,
"orientation": TextOrientation.VerticalLeftToRight,
},
]
responses.add(
responses.POST,
"http://testserver/api/v1/transcription/bulk/",
status=200,
json={
"worker_version": "12341234-1234-1234-1234-123412341234",
"transcriptions": [
{
"id": "00000000-0000-0000-0000-000000000000",
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "Animula vagula blandula",
"orientation": "horizontal-rl",
"confidence": 0.12,
},
{
"id": "11111111-1111-1111-1111-111111111111",
"element_id": "11111111-1111-1111-1111-111111111111",
"text": "Hospes comesque corporis",
"orientation": "vertical-lr",
"confidence": 0.21,
},
],
},
)
mock_elements_worker_with_cache.create_transcriptions(
transcriptions=trans,
)
assert json.loads(responses.calls[-1].request.body) == {
"worker_version": "12341234-1234-1234-1234-123412341234",
"transcriptions": [
{
"text": tr["text"],
"score": tr["score"],
"orientation": tr["orientation"].value,
}
for tr in trans
],
}
def test_create_element_transcriptions_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_element_transcriptions(
......@@ -951,6 +1167,29 @@ def test_create_element_transcriptions_wrong_transcriptions(mock_elements_worker
== "Transcription at index 1 in transcriptions: polygon points should be lists of two numbers"
)
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_element_transcriptions(
element=elt,
sub_element_type="page",
transcriptions=[
{
"polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
"score": 0.75,
"text": "The",
},
{
"polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
"score": 0.35,
"text": "word",
"orientation": "uptown",
},
],
)
assert (
str(e.value)
== "Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
)
def test_create_element_transcriptions_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
......@@ -1173,6 +1412,147 @@ def test_create_element_transcriptions_with_cache(
]
def test_create_transcriptions_orientation_with_cache(
responses, mock_elements_worker_with_cache
):
elt = CachedElement(id="12341234-1234-1234-1234-123412341234", type="thing")
responses.add(
responses.POST,
f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/",
status=200,
json=[
{
"id": "56785678-5678-5678-5678-567856785678",
"element_id": "11111111-1111-1111-1111-111111111111",
"created": True,
},
{
"id": "67896789-6789-6789-6789-678967896789",
"element_id": "22222222-2222-2222-2222-222222222222",
"created": False,
},
{
"id": "78907890-7890-7890-7890-789078907890",
"element_id": "11111111-1111-1111-1111-111111111111",
"created": True,
},
],
)
oriented_transcriptions = [
{
"polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
"score": 0.5,
"text": "Animula vagula blandula",
},
{
"polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
"score": 0.75,
"text": "Hospes comesque corporis",
"orientation": TextOrientation.VerticalLeftToRight,
},
{
"polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
"score": 0.9,
"text": "Quae nunc abibis in loca",
"orientation": TextOrientation.HorizontalRightToLeft,
},
]
annotations = mock_elements_worker_with_cache.create_element_transcriptions(
element=elt,
sub_element_type="page",
transcriptions=oriented_transcriptions,
)
sent_oriented_transcriptions = [
{
"text": tr["text"],
"score": tr["score"],
"orientation": tr["orientation"].value,
"polygon": tr["polygon"],
}
for tr in oriented_transcriptions
]
assert json.loads(responses.calls[-1].request.body) == {
"element_type": "page",
"worker_version": "12341234-1234-1234-1234-123412341234",
"transcriptions": sent_oriented_transcriptions,
"return_elements": True,
}
assert annotations == [
{
"id": "56785678-5678-5678-5678-567856785678",
"element_id": "11111111-1111-1111-1111-111111111111",
"created": True,
},
{
"id": "67896789-6789-6789-6789-678967896789",
"element_id": "22222222-2222-2222-2222-222222222222",
"created": False,
},
{
"id": "78907890-7890-7890-7890-789078907890",
"element_id": "11111111-1111-1111-1111-111111111111",
"created": True,
},
]
# Check that the text orientation was properly stored in SQLite cache
assert list(CachedTranscription.select()) == [
CachedTranscription(
id=UUID("56785678-5678-5678-5678-567856785678"),
),
CachedTranscription(
id=UUID("67896789-6789-6789-6789-678967896789"),
),
CachedTranscription(
id=UUID("78907890-7890-7890-7890-789078907890"),
),
]
assert [
{
k: getattr(transcription, k)
for k in [
"id",
"element_id",
"text",
"confidence",
"orientation",
"worker_version_id",
]
}
for transcription in CachedTranscription.select()
] == [
{
"id": UUID("56785678-5678-5678-5678-567856785678"),
"element_id": UUID("11111111-1111-1111-1111-111111111111"),
"text": "Animula vagula blandula",
"confidence": 0.5,
"orientation": TextOrientation.HorizontalLeftToRight.value,
"worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
},
{
"id": UUID("67896789-6789-6789-6789-678967896789"),
"element_id": UUID("22222222-2222-2222-2222-222222222222"),
"text": "Hospes comesque corporis",
"confidence": 0.75,
"orientation": TextOrientation.VerticalLeftToRight.value,
"worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
},
{
"id": UUID("78907890-7890-7890-7890-789078907890"),
"element_id": UUID("11111111-1111-1111-1111-111111111111"),
"text": "Quae nunc abibis in loca",
"confidence": 0.9,
"orientation": TextOrientation.HorizontalRightToLeft.value,
"worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
},
]
def test_list_transcriptions_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_transcriptions(element=None)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment