Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (2)
# -*- coding: utf-8 -*-
"""
General utility functions and classes.
"""
import datetime
from timeit import default_timer
class Timer(object):
"""
A context manager to help measure execution times.
Example
---
```
with Timer() as t:
# do something interesting
print(t.delta) # X days, X:XX:XX
```
"""
def __init__(self):
self.timer = default_timer
def __enter__(self):
self.start = self.timer()
return self
def __exit__(self, *args):
end = self.timer()
self.elapsed = end - self.start
self.delta = datetime.timedelta(seconds=self.elapsed)
......@@ -227,6 +227,8 @@ class TranscriptionMixin(object):
Required. Confidence score between 0 and 1.
orientation ([TextOrientation][arkindex_worker.worker.transcription.TextOrientation])
Optional. Orientation of the transcription's text.
element_confidence (float)
Optional. Confidence score of the element between 0 and 1.
:returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
"""
......@@ -278,6 +280,12 @@ class TranscriptionMixin(object):
assert all(
isinstance(coord, (int, float)) for point in polygon for coord in point
), f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers"
element_confidence = transcription.get("element_confidence")
assert element_confidence is None or (
isinstance(element_confidence, float) and 0 <= element_confidence <= 1
), f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"
if self.is_read_only:
logger.warning(
"Cannot create transcriptions as this worker is in read-only mode"
......@@ -327,6 +335,7 @@ class TranscriptionMixin(object):
"image_id": element.image_id,
"polygon": transcription["polygon"],
"worker_run_id": self.worker_run_id,
"confidence": transcription.get("element_confidence"),
}
)
......
# Generic Utilities
::: arkindex_worker.utils
......@@ -86,7 +86,6 @@ nav:
- Transcription: ref/api/transcription.md
- WorkerVersion: ref/api/worker_version.md
- Models: ref/models.md
- Generic Utilities: ref/utils.md
- Git & Gitlab support: ref/git.md
- Image utilities: ref/image.md
- Reporting: ref/reporting.md
......
......@@ -22,6 +22,7 @@ TRANSCRIPTIONS_SAMPLE = [
"polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
"confidence": 0.75,
"text": "first",
"element_confidence": 0.75,
},
{
"polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
......@@ -1244,6 +1245,29 @@ def test_create_element_transcriptions_wrong_transcriptions(mock_elements_worker
== "Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
)
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_element_transcriptions(
element=elt,
sub_element_type="page",
transcriptions=[
{
"polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
"confidence": 0.75,
"text": "The",
},
{
"polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
"confidence": 0.75,
"text": "word",
"element_confidence": "not a confidence",
},
],
)
assert (
str(e.value)
== "Transcription at index 1 in transcriptions: element_confidence should be either null or a float in [0..1] range"
)
def test_create_element_transcriptions_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
......@@ -1326,6 +1350,7 @@ def test_create_element_transcriptions(responses, mock_elements_worker):
"confidence": 0.75,
"text": "first",
"orientation": TextOrientation.HorizontalLeftToRight.value,
"element_confidence": 0.75,
},
{
"polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
......@@ -1411,6 +1436,7 @@ def test_create_element_transcriptions_with_cache(
"confidence": 0.75,
"text": "first",
"orientation": TextOrientation.HorizontalLeftToRight.value,
"element_confidence": 0.75,
},
{
"polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
......@@ -1454,6 +1480,7 @@ def test_create_element_transcriptions_with_cache(
type="page",
polygon="[[0, 0], [2000, 0], [2000, 3000], [0, 3000]]",
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
confidence=0.75,
),
]
assert list(CachedTranscription.select()) == [
......
# -*- coding: utf-8 -*-
from datetime import timedelta
from time import sleep
from arkindex_worker.utils import Timer
def test_timer_type():
with Timer() as timer:
pass
assert isinstance(timer.delta, timedelta)
def test_timer():
# Assert the second timer has recorded a longer period
with Timer() as timer:
pass
with Timer() as timer2:
sleep(1 / 100)
assert timer.delta < timer2.delta