Skip to content
Snippets Groups Projects

Text orientation in base worker

Merged ml bonhomme requested to merge text-orientation into master
All threads resolved!
7 files
+ 553
11
Compare changes
  • Side-by-side
  • Inline
Files
7
# -*- coding: utf-8 -*-
from enum import Enum
from peewee import IntegrityError
from arkindex_worker import logger
@@ -7,8 +9,17 @@ from arkindex_worker.cache import CachedElement, CachedTranscription
from arkindex_worker.models import Element
class TextOrientation(Enum):
HorizontalLeftToRight = "horizontal-lr"
HorizontalRightToLeft = "horizontal-rl"
VerticalRightToLeft = "vertical-rl"
VerticalLeftToRight = "vertical-lr"
class TranscriptionMixin(object):
def create_transcription(self, element, text, score):
def create_transcription(
self, element, text, score, orientation=TextOrientation.HorizontalLeftToRight
):
"""
Create a transcription on the given element through the API.
"""
@@ -18,7 +29,9 @@ class TranscriptionMixin(object):
assert text and isinstance(
text, str
), "text shouldn't be null and should be of type str"
assert orientation and isinstance(
orientation, TextOrientation
), "orientation shouldn't be null and should be of type TextOrientation"
assert (
isinstance(score, float) and 0 <= score <= 1
), "score shouldn't be null and should be a float in [0..1] range"
@@ -36,6 +49,7 @@ class TranscriptionMixin(object):
"text": text,
"worker_version": self.worker_version_id,
"score": score,
"orientation": orientation.value,
},
)
@@ -50,6 +64,7 @@ class TranscriptionMixin(object):
"element_id": element.id,
"text": created["text"],
"confidence": created["confidence"],
"orientation": created["orientation"],
"worker_version_id": self.worker_version_id,
}
]
@@ -70,7 +85,10 @@ class TranscriptionMixin(object):
transcriptions, list
), "transcriptions shouldn't be null and should be of type list"
for index, transcription in enumerate(transcriptions):
# Create shallow copies of every transcription to avoid mutating the original payload
transcriptions_payload = list(map(dict, transcriptions))
for (index, transcription) in enumerate(transcriptions_payload):
element_id = transcription.get("element_id")
assert element_id and isinstance(
element_id, str
@@ -86,11 +104,20 @@ class TranscriptionMixin(object):
score is not None and isinstance(score, float) and 0 <= score <= 1
), f"Transcription at index {index} in transcriptions: score shouldn't be null and should be a float in [0..1] range"
orientation = transcription.get(
"orientation", TextOrientation.HorizontalLeftToRight
)
assert orientation and isinstance(
orientation, TextOrientation
), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
if orientation:
transcription["orientation"] = orientation.value
created_trs = self.request(
"CreateTranscriptions",
body={
"worker_version": self.worker_version_id,
"transcriptions": transcriptions,
"transcriptions": transcriptions_payload,
},
)["transcriptions"]
@@ -106,6 +133,7 @@ class TranscriptionMixin(object):
"element_id": created_tr["element_id"],
"text": created_tr["text"],
"confidence": created_tr["confidence"],
"orientation": created_tr["orientation"],
"worker_version_id": self.worker_version_id,
}
for created_tr in created_trs
@@ -132,7 +160,10 @@ class TranscriptionMixin(object):
transcriptions, list
), "transcriptions shouldn't be null and should be of type list"
for index, transcription in enumerate(transcriptions):
# Create shallow copies of every transcription to avoid mutating the original payload
transcriptions_payload = list(map(dict, transcriptions))
for (index, transcription) in enumerate(transcriptions_payload):
text = transcription.get("text")
assert text and isinstance(
text, str
@@ -143,6 +174,15 @@ class TranscriptionMixin(object):
score is not None and isinstance(score, float) and 0 <= score <= 1
), f"Transcription at index {index} in transcriptions: score shouldn't be null and should be a float in [0..1] range"
orientation = transcription.get(
"orientation", TextOrientation.HorizontalLeftToRight
)
assert orientation and isinstance(
orientation, TextOrientation
), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
if orientation:
transcription["orientation"] = orientation.value
polygon = transcription.get("polygon")
assert polygon and isinstance(
polygon, list
@@ -168,7 +208,7 @@ class TranscriptionMixin(object):
body={
"element_type": sub_element_type,
"worker_version": self.worker_version_id,
"transcriptions": transcriptions,
"transcriptions": transcriptions_payload,
"return_elements": True,
},
)
@@ -216,6 +256,9 @@ class TranscriptionMixin(object):
"element_id": annotation["element_id"],
"text": transcription["text"],
"confidence": transcription["score"],
"orientation": transcription.get(
"orientation", TextOrientation.HorizontalLeftToRight
).value,
"worker_version_id": self.worker_version_id,
}
)
Loading