Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (5)
Showing
with 357 additions and 850 deletions
......@@ -25,7 +25,7 @@ lint:
- pre-commit run -a
test:
image: python:3
image: python:3.11
stage: test
cache:
......@@ -55,7 +55,7 @@ test:
- tox -- --junitxml=test-report.xml --durations=50
test-cookiecutter:
image: python:3
image: python:3.11
stage: test
cache:
......@@ -136,7 +136,7 @@ pypi-publication:
- twine upload dist/* -r pypi
.docs:
image: python:3
image: python:3.11
artifacts:
paths:
- public
......
# -*- coding: utf-8 -*-
"""
Generator for the ``ml_report.json`` file, to report created worker results and exceptions.
"""
import json
import traceback
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Union
from uuid import UUID
from apistar.exceptions import ErrorResponse
from arkindex_worker import logger
from arkindex_worker.models import Transcription
class Reporter(object):
"""
Helper to generate an ``ml_report.json`` artifact.
"""
def __init__(
self,
name: Optional[str] = "Unknown worker",
slug: Optional[str] = "unknown-slug",
version: Optional[str] = None,
**kwargs,
):
self.report_data = {
"name": name,
"slug": slug,
"version": version,
"started": datetime.utcnow().isoformat(),
"elements": {},
}
logger.info(f"Starting ML report for {name}")
def __repr__(self):
return "{}({})".format(self.__class__.__name__, self.report_data["slug"])
def _get_element(self, element_id):
return self.report_data["elements"].setdefault(
str(element_id),
{
"started": datetime.utcnow().isoformat(),
# Created element counts, by type slug
"elements": {},
# Created transcriptions count
"transcriptions": 0,
# Created classification counts, by class
"classifications": {},
# Created entities ({"id": "", "type": "", "name": ""}) from this element
"entities": [],
# Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element
"transcription_entities": [],
# Created metadata ({"id": "", "type": "", "name": ""}) from this element
"metadata": [],
"errors": [],
},
)
def process(self, element_id: Union[str, UUID]):
"""
Report that a specific element ID is being processed.
:param element_id: ID of the element being processed.
"""
# Just call the element initializer
self._get_element(element_id)
def add_element(self, parent_id: Union[str, UUID], type: str, type_count: int = 1):
"""
Report creating an element as a child of another.
:param parent_id: ID of the parent element.
:param type: Slug of the type of the child element.
:param type_count: How many elements of this type were created.
"""
elements = self._get_element(parent_id)["elements"]
elements.setdefault(type, 0)
elements[type] += type_count
def add_classification(self, element_id: Union[str, UUID], class_name: str):
"""
Report creating a classification on an element.
:param element_id: ID of the element.
:param class_name: Name of the ML class of the new classification.
"""
classifications = self._get_element(element_id)["classifications"]
classifications.setdefault(class_name, 0)
classifications[class_name] += 1
def add_classifications(
self, element_id: Union[str, UUID], classifications: List[Dict[str, str]]
):
"""
Report creating one or more classifications at once on an element.
:param element_id: ID of the element.
:param classifications: List of classifications.
Each classification is represented as a ``dict`` with a ``class_name`` key
holding the name of the ML class being used.
"""
assert isinstance(
classifications, list
), "A list is required for classifications"
element = self._get_element(element_id)
# Retrieve the previous existing classification counts, if any
counter = Counter(**element["classifications"])
# Add the new ones
counter.update(
[classification["class_name"] for classification in classifications]
)
element["classifications"] = dict(counter)
def add_transcription(self, element_id: Union[str, UUID], count=1):
"""
Report creating a transcription on an element.
:param element_id: ID of the element.
:param count: Number of transcriptions created at once
"""
self._get_element(element_id)["transcriptions"] += count
def add_entity(
self,
element_id: Union[str, UUID],
entity_id: Union[str, UUID],
type: str,
name: str,
):
"""
Report creating an entity on an element.
:param element_id: ID of the element.
:param entity_id: ID of the new entity.
:param type: Type of the entity.
:param name: Name of the entity.
"""
entities = self._get_element(element_id)["entities"]
entities.append({"id": entity_id, "type": type, "name": name})
def add_transcription_entity(
self,
entity_id: Union[str, UUID],
transcription: Transcription,
transcription_entity_id: Union[str, UUID],
):
"""
Report creating a transcription entity on an element.
:param entity_id: ID of the entity element.
:param transcription: Transcription to add the entity on
:param transcription_entity_id: ID of the transcription entity that is created.
"""
transcription_entities = self._get_element(transcription.element.id)[
"transcription_entities"
]
transcription_entities.append(
{
"transcription_id": transcription.id,
"entity_id": entity_id,
"transcription_entity_id": transcription_entity_id,
}
)
def add_entity_link(self, *args, **kwargs):
"""
Report creating an entity link. Not currently supported.
:raises NotImplementedError:
"""
raise NotImplementedError
def add_entity_role(self, *args, **kwargs):
"""
Report creating an entity role. Not currently supported.
:raises NotImplementedError:
"""
raise NotImplementedError
def add_metadata(
self,
element_id: Union[str, UUID],
metadata_id: Union[str, UUID],
type: str,
name: str,
):
"""
Report creating a metadata from an element.
:param element_id: ID of the element.
:param metadata_id: ID of the new metadata.
:param type: Type of the metadata.
:param name: Name of the metadata.
"""
metadata = self._get_element(element_id)["metadata"]
metadata.append({"id": metadata_id, "type": type, "name": name})
def error(self, element_id: Union[str, UUID], exception: Exception):
"""
Report that a Python exception occurred when processing an element.
:param element_id: ID of the element.
:param exception: A Python exception.
"""
error_data = {
"class": exception.__class__.__name__,
"message": str(exception),
}
if exception.__traceback__ is not None:
error_data["traceback"] = "\n".join(
traceback.format_tb(exception.__traceback__)
)
if isinstance(exception, ErrorResponse):
error_data["message"] = exception.title
error_data["status_code"] = exception.status_code
error_data["content"] = exception.content
self._get_element(element_id)["errors"].append(error_data)
def save(self, path: Union[str, Path]):
"""
Save the ML report to the specified path.
:param path: Path to save the ML report to.
"""
logger.info(f"Saving ML report to {path}")
with open(path, "w") as f:
json.dump(self.report_data, f)
......@@ -15,7 +15,6 @@ from apistar.exceptions import ErrorResponse
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement
from arkindex_worker.models import Element
from arkindex_worker.reporting import Reporter
from arkindex_worker.worker.base import BaseWorker
from arkindex_worker.worker.classification import ClassificationMixin
from arkindex_worker.worker.element import ElementMixin
......@@ -156,17 +155,12 @@ class ElementsWorker(
super().configure()
super().configure_cache()
# Add report concerning elements
self.report = Reporter(
**self.worker_details, version=getattr(self, "worker_version_id", None)
)
def run(self):
"""
Implements an Arkindex worker that goes through each element returned by
[list_elements][arkindex_worker.worker.ElementsWorker.list_elements]. It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element], catching exceptions
and reporting them using the [Reporter][arkindex_worker.reporting.Reporter], and handles saving the report
once the process is complete as well as WorkerActivity updates when enabled.
[list_elements][arkindex_worker.worker.ElementsWorker.list_elements].
It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element],
catching exceptions, and handles saving WorkerActivity updates when enabled.
"""
self.configure()
......@@ -232,10 +226,6 @@ class ElementsWorker(
self.update_activity(element.id, ActivityState.Error)
except Exception:
pass
self.report.error(element_id, e)
# Save report as local artifact
self.report.save(self.work_dir / "ml_report.json")
if failed:
logger.error(
......
......@@ -140,7 +140,7 @@ class BaseWorker(object):
self.process_information = None
# corpus_id will be updated in configure() using the worker_run's corpus
# or in configure_for_developers() from the environment
self.corpus_id = None
self._corpus_id = None
self.user_configuration = {}
self.model_configuration = {}
self.support_cache = support_cache
......@@ -155,6 +155,17 @@ class BaseWorker(object):
# Define API Client
self.setup_api_client()
@property
def corpus_id(self) -> str:
"""
ID of the corpus on which the worker is executed.
Has to be set through the `ARKINDEX_CORPUS_ID` variable in **read-only** mode.
Raises an Exception when trying to access when unset.
"""
if not self._corpus_id:
raise Exception("Missing ARKINDEX_CORPUS_ID environment variable")
return self._corpus_id
@property
def is_read_only(self) -> bool:
"""
......@@ -199,11 +210,7 @@ class BaseWorker(object):
logger.warning("Running without any extra configuration")
# Define corpus_id from environment
self.corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
if not self.corpus_id:
logger.warning(
"'ARKINDEX_CORPUS_ID' was not set in the environment. Any API request involving a `corpus_id` will fail."
)
self._corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
# Define model_version_id from environment
self.model_version_id = os.environ.get("ARKINDEX_MODEL_VERSION_ID")
......@@ -229,7 +236,7 @@ class BaseWorker(object):
self.process_information = worker_run["process"]
# Load corpus id
self.corpus_id = worker_run["process"]["corpus"]
self._corpus_id = worker_run["process"]["corpus"]
# Load worker version information
worker_version = worker_run["worker_version"]
......
......@@ -176,8 +176,6 @@ class ClassificationMixin(object):
# Propagate any other API error
raise
self.report.add_classification(element.id, ml_class)
return created
def create_classifications(
......@@ -248,7 +246,6 @@ class ClassificationMixin(object):
for created_cl in created_cls:
created_cl["class_name"] = self.retrieve_ml_class(created_cl["ml_class"])
self.report.add_classification(element.id, created_cl["class_name"])
if self.use_cache:
# Store classifications in local cache
......
......@@ -3,6 +3,7 @@
ElementsWorker methods for elements and element types.
"""
from typing import Dict, Iterable, List, NamedTuple, Optional, Union
from uuid import UUID
from peewee import IntegrityError
......@@ -141,7 +142,6 @@ class ElementMixin(object):
"confidence": confidence,
},
)
self.report.add_element(element.id, type)
return sub_element["id"] if slim_output else sub_element
......@@ -237,9 +237,6 @@ class ElementMixin(object):
},
)
for element in elements:
self.report.add_element(parent.id, element["type"])
if self.use_cache:
# Create the image as needed and handle both an Element and a CachedElement
if isinstance(parent, CachedElement):
......@@ -275,33 +272,39 @@ class ElementMixin(object):
return created_ids
def update_element(
self,
element: Union[Element, CachedElement],
type: Optional[str] = None,
name: Optional[str] = None,
polygon: Optional[List[List[Union[int, float]]]] = None,
confidence: Optional[float] = None,
def partial_update_element(
self, element: Union[Element, CachedElement], **kwargs
) -> dict:
"""
Partially update an element through the API.
Partially updates an element through the API.
:param element: The element to update.
:param type: Optional new slug type of the element.
:param name: Optional new name of the element.
:param polygon: Optional new polygon of the element.
:param confidence: Optional new confidence score, between 0.0 and 1.0.
:param **kwargs:
* *type* (``str``): Optional slug type of the element.
* *name* (``str``): Optional name of the element.
* *polygon* (``list``): Optional polygon for this element
* *confidence* (``float``): Optional confidence score of this element
* *rotation_angle* (``int``): Optional rotation angle of this element
* *mirrored* (``bool``): Optional mirror status of this element
* *image* (``UUID``): Optional ID of the image of this element
:returns: A dict from the ``PartialUpdateElement`` API endpoint,
"""
assert element and isinstance(
element, (Element, CachedElement)
), "element shouldn't be null and should be an Element or CachedElement"
assert type is None or isinstance(type, str), "type should be None or a str"
assert name is None or isinstance(name, str), "name should be None or a str"
assert polygon is None or isinstance(
polygon, list
), "polygon should be None or a list"
if polygon:
if "type" in kwargs:
assert isinstance(kwargs["type"], str), "type should be a str"
if "name" in kwargs:
assert isinstance(kwargs["name"], str), "name should be a str"
if "polygon" in kwargs:
polygon = kwargs["polygon"]
assert isinstance(polygon, list), "polygon should be a list"
assert len(polygon) >= 3, "polygon should have at least three points"
assert all(
isinstance(point, list) and len(point) == 2 for point in polygon
......@@ -309,9 +312,27 @@ class ElementMixin(object):
assert all(
isinstance(coord, (int, float)) for point in polygon for coord in point
), "polygon points should be lists of two numbers"
assert confidence is None or (
isinstance(confidence, float) and 0 <= confidence <= 1
), "confidence should be None or a float in [0..1] range"
if "confidence" in kwargs:
confidence = kwargs["confidence"]
assert confidence is None or (
isinstance(confidence, float) and 0 <= confidence <= 1
), "confidence should be None or a float in [0..1] range"
if "rotation_angle" in kwargs:
rotation_angle = kwargs["rotation_angle"]
assert (
isinstance(rotation_angle, int) and rotation_angle >= 0
), "rotation_angle should be a positive integer"
if "mirrored" in kwargs:
assert isinstance(kwargs["mirrored"], bool), "mirrored should be a boolean"
if "image" in kwargs:
image = kwargs["image"]
assert isinstance(image, UUID), "image should be a UUID"
# Cast to string
kwargs["image"] = str(image)
if self.is_read_only:
logger.warning("Cannot update element as this worker is in read-only mode")
......@@ -320,22 +341,24 @@ class ElementMixin(object):
updated_element = self.request(
"PartialUpdateElement",
id=element.id,
body={
"type": type,
"name": name,
"polygon": polygon,
"confidence": confidence,
},
body=kwargs,
)
if self.use_cache:
CachedElement.update(
{
CachedElement.type: type,
CachedElement.polygon: str(polygon),
CachedElement.confidence: confidence,
}
).where(CachedElement.id == element.id).execute()
# Name is not present in CachedElement model
kwargs.pop("name", None)
# Stringify polygon if present
if "polygon" in kwargs:
kwargs["polygon"] = str(kwargs["polygon"])
# Retrieve the right image
if "image" in kwargs:
kwargs["image"] = CachedImage.get_by_id(kwargs["image"])
CachedElement.update(**kwargs).where(
CachedElement.id == element.id
).execute()
return updated_element
......
......@@ -8,7 +8,7 @@ from typing import Dict, List, Optional, TypedDict, Union
from peewee import IntegrityError
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement, CachedEntity, CachedTranscriptionEntity
from arkindex_worker.cache import CachedEntity, CachedTranscriptionEntity
from arkindex_worker.models import Element, Transcription
Entity = TypedDict(
......@@ -68,7 +68,6 @@ class EntityMixin(object):
def create_entity(
self,
element: Union[Element, CachedElement],
name: str,
type: str,
metas=dict(),
......@@ -78,14 +77,9 @@ class EntityMixin(object):
Create an entity on the given corpus.
If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.
:param element: An element on which the entity will be reported with the [Reporter][arkindex_worker.reporting.Reporter].
This does not have any effect on the entity itself.
:param name: Name of the entity.
:param type: Type of the entity.
"""
assert element and isinstance(
element, (Element, CachedElement)
), "element shouldn't be null and should be an Element or CachedElement"
assert name and isinstance(
name, str
), "name shouldn't be null and should be of type str"
......@@ -119,7 +113,6 @@ class EntityMixin(object):
"worker_run_id": self.worker_run_id,
},
)
self.report.add_entity(element.id, entity["id"], entity_type_id, name)
if self.use_cache:
# Store entity in local cache
......@@ -196,7 +189,6 @@ class EntityMixin(object):
id=transcription.id,
body=body,
)
self.report.add_transcription_entity(entity, transcription, transcription_ent)
if self.use_cache:
# Store transcription entity in local cache
......@@ -247,11 +239,6 @@ class EntityMixin(object):
transcription, Transcription
), "transcription shouldn't be null and should be of type Transcription"
# Needed for MLreport
assert (
hasattr(transcription, "element") and transcription.element
), f"No element linked to {transcription}"
assert entities and isinstance(
entities, list
), "entities shouldn't be null and should be of type list"
......@@ -301,22 +288,6 @@ class EntityMixin(object):
},
)
for entity, created_objects in zip(entities, created_ids["entities"]):
# Report entity creation
self.report.add_entity(
transcription.element.id,
created_objects["entity_id"],
entity.get("type_id"),
entity.get("name"),
)
# Report transcription entity creation
self.report.add_transcription_entity(
created_objects["entity_id"],
transcription,
created_objects["transcription_entity_id"],
)
return created_ids["entities"]
def list_transcription_entities(
......
......@@ -105,7 +105,6 @@ class MetaDataMixin(object):
"worker_run_id": self.worker_run_id,
},
)
self.report.add_metadata(element.id, metadata["id"], type.value, name)
return metadata["id"]
......@@ -182,9 +181,6 @@ class MetaDataMixin(object):
},
)["metadata_list"]
for meta in created_metadatas:
self.report.add_metadata(element.id, meta["id"], meta["type"], meta["name"])
return created_metadatas
def list_element_metadata(
......
......@@ -88,8 +88,6 @@ class TranscriptionMixin(object):
},
)
self.report.add_transcription(element.id)
if self.use_cache:
# Store transcription in local cache
try:
......@@ -181,9 +179,6 @@ class TranscriptionMixin(object):
},
)["transcriptions"]
for created_tr in created_trs:
self.report.add_transcription(created_tr["element_id"])
if self.use_cache:
# Store transcriptions in local cache
try:
......@@ -308,8 +303,6 @@ class TranscriptionMixin(object):
logger.debug(
f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation"
)
self.report.add_element(element.id, sub_element_type)
self.report.add_transcription(annotation["element_id"])
if self.use_cache:
# Store transcriptions and their associated element (if created) in local cache
......
......@@ -83,20 +83,6 @@ The multiple configuration sources from the Arkindex-mode are merged into a uniq
One information cannot be retrieved directly from the configuration file and is required in some cases: the ID of the Arkindex corpus which the elements processed belong to. This is retrieved via the `ARKINDEX_CORPUS_ID` environment variable.
## Worker reporter
At the end of a worker execution, a report about the publication done by the worker is generated in JSON-format. This lists
- the starting time,
- the number of elements created, grouped by type,
- the number of transcription created,
- the number of classifications created, grouped by class,
- the number of entities created,
- the number of entities created on transcriptions,
- the number of metadatas created,
- the encountered errors' logs.
This is done by the many helper described in the [reporting module](../../ref/reporting.md). They use the `report` attribute initialized at the configuration stage.
## Setting Debug logging level
There are three ways to activate the debug mode:
......@@ -132,9 +118,6 @@ Many attributes are set on the worker during at the configuration stage. Here is
`process_information`
: The details about the process parent to this worker execution. Only set in Arkindex mode.
`reporter`
: The `Reporter` instance that will generate the `ml_report.json` artifacts which sums up the publication done during this execution and the errors encountered.
`secrets`
: A dictionary mapping the secret name to their parsed content.
......
......@@ -28,7 +28,6 @@ flowchart LR
subgraph id3[Loop over each element]
element_processing --> element_processing
end
element_processing -- Save ML report to disk --> reporting
end
init --> run
end
......
# Reporting
::: arkindex_worker.reporting
......@@ -88,7 +88,6 @@ nav:
- Models: ref/models.md
- Git & Gitlab support: ref/git.md
- Image utilities: ref/image.md
- Reporting: ref/reporting.md
- Cache: ref/cache.md
- Utils: ref/utils.md
- Releases: releases.md
......
arkindex-client==1.0.13
arkindex-client==1.0.14
peewee==3.16.3
Pillow==10.0.0
pymdown-extensions==10.2
......
......@@ -16,6 +16,7 @@ from arkindex_worker.cache import (
MODELS,
SQL_VERSION,
CachedElement,
CachedImage,
CachedTranscription,
Version,
create_version_table,
......@@ -210,6 +211,15 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
return worker
@pytest.fixture
def mock_elements_worker_read_only(monkeypatch):
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
worker = ElementsWorker()
worker.configure()
return worker
@pytest.fixture
def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
"""
......@@ -363,6 +373,18 @@ def mock_cached_elements():
assert CachedElement.select().count() == 5
@pytest.fixture
def mock_cached_images():
"""Insert few elements in local cache"""
CachedImage.create(
id=UUID("99999999-9999-9999-9999-999999999999"),
width=1250,
height=2500,
url="http://testserver/iiif/3/image",
)
assert CachedImage.select().count() == 1
@pytest.fixture
def mock_cached_transcriptions():
"""Insert few transcriptions in local cache, on a shared element"""
......
......@@ -10,7 +10,7 @@ import pytest
from arkindex.mock import MockApiClient
from arkindex_worker import logger
from arkindex_worker.worker import BaseWorker
from arkindex_worker.worker import BaseWorker, ElementsWorker
from arkindex_worker.worker.base import ModelNotFoundError
from tests.conftest import FIXTURES_DIR
......@@ -739,3 +739,25 @@ def test_extract_parent_archives(tmp_path):
)
mode = "rb" if extracted_file.suffix == ".png" else "r"
assert extracted_file.open(mode).read() == expected_file.open(mode).read()
def test_corpus_id_not_set_read_only_mode(
mock_elements_worker_read_only: ElementsWorker,
):
mock_elements_worker_read_only.configure()
with pytest.raises(
Exception, match="Missing ARKINDEX_CORPUS_ID environment variable"
):
mock_elements_worker_read_only.corpus_id
def test_corpus_id_set_read_only_mode(
monkeypatch, mock_elements_worker_read_only: ElementsWorker
):
corpus_id = str(uuid.uuid4())
monkeypatch.setenv("ARKINDEX_CORPUS_ID", corpus_id)
mock_elements_worker_read_only.configure()
assert mock_elements_worker_read_only.corpus_id == corpus_id
......@@ -423,11 +423,6 @@ def test_create_classification(responses, mock_elements_worker):
"high_confidence": True,
}
# Classification has been created and reported
assert mock_elements_worker.report.report_data["elements"][elt.id][
"classifications"
] == {"a_class": 1}
def test_create_classification_with_cache(responses, mock_elements_worker_with_cache):
mock_elements_worker_with_cache.classes = {"a_class": "0000"}
......@@ -470,11 +465,6 @@ def test_create_classification_with_cache(responses, mock_elements_worker_with_c
"high_confidence": True,
}
# Classification has been created and reported
assert mock_elements_worker_with_cache.report.report_data["elements"][elt.id][
"classifications"
] == {"a_class": 1}
# Check that created classification was properly stored in SQLite cache
assert list(CachedClassification.select()) == [
CachedClassification(
......@@ -524,9 +514,6 @@ def test_create_classification_duplicate_worker_run(responses, mock_elements_wor
"high_confidence": True,
}
# Classification has NOT been created
assert mock_elements_worker.report.report_data["elements"] == {}
def test_create_classifications_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
......
......@@ -1210,94 +1210,208 @@ def test_create_elements_integrity_error(
assert list(CachedElement.select()) == []
def test_update_element_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=None,
)
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
@pytest.mark.parametrize(
"payload, error",
(
# Element
(
{"element": None},
"element shouldn't be null and should be an Element or CachedElement",
),
(
{"element": "not element type"},
"element shouldn't be null and should be an Element or CachedElement",
),
),
)
def test_partial_update_element_wrong_param_element(
mock_elements_worker, payload, error
):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element="not element type",
mock_elements_worker.partial_update_element(
**api_payload,
)
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
assert str(e.value) == error
def test_update_element_wrong_type(mock_elements_worker):
@pytest.mark.parametrize(
"payload, error",
(
# Type
({"type": 1234}, "type should be a str"),
({"type": None}, "type should be a str"),
),
)
def test_partial_update_element_wrong_param_type(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=Element({"zone": None}),
type=1234,
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "type should be None or a str"
assert str(e.value) == error
def test_update_element_wrong_name(mock_elements_worker):
@pytest.mark.parametrize(
"payload, error",
(
# Name
({"name": 1234}, "name should be a str"),
({"name": None}, "name should be a str"),
),
)
def test_partial_update_element_wrong_param_name(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=Element({"zone": None}),
name=1234,
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "name should be None or a str"
assert str(e.value) == error
def test_update_element_wrong_polygon(mock_elements_worker):
elt = Element({"zone": None})
@pytest.mark.parametrize(
"payload, error",
(
# Polygon
({"polygon": "not a polygon"}, "polygon should be a list"),
({"polygon": None}, "polygon should be a list"),
({"polygon": [[1, 1], [2, 2]]}, "polygon should have at least three points"),
(
{"polygon": [[1, 1, 1], [2, 2, 1], [2, 1, 1], [1, 2, 1]]},
"polygon points should be lists of two items",
),
(
{"polygon": [[1], [2], [2], [1]]},
"polygon points should be lists of two items",
),
(
{"polygon": [["not a coord", 1], [2, 2], [2, 1], [1, 2]]},
"polygon points should be lists of two numbers",
),
),
)
def test_partial_update_element_wrong_param_polygon(
mock_elements_worker, payload, error
):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=elt,
polygon="not a polygon",
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "polygon should be None or a list"
assert str(e.value) == error
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=elt,
polygon=[[1, 1], [2, 2]],
)
assert str(e.value) == "polygon should have at least three points"
@pytest.mark.parametrize(
"payload, error",
(
# Confidence
({"confidence": "lol"}, "confidence should be None or a float in [0..1] range"),
({"confidence": "0.2"}, "confidence should be None or a float in [0..1] range"),
({"confidence": -1.0}, "confidence should be None or a float in [0..1] range"),
({"confidence": 1.42}, "confidence should be None or a float in [0..1] range"),
(
{"confidence": float("inf")},
"confidence should be None or a float in [0..1] range",
),
),
)
def test_partial_update_element_wrong_param_conf(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=elt,
polygon=[[1, 1, 1], [2, 2, 1], [2, 1, 1], [1, 2, 1]],
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "polygon points should be lists of two items"
assert str(e.value) == error
@pytest.mark.parametrize(
"payload, error",
(
# Rotation angle
({"rotation_angle": "lol"}, "rotation_angle should be a positive integer"),
({"rotation_angle": -1}, "rotation_angle should be a positive integer"),
({"rotation_angle": 0.5}, "rotation_angle should be a positive integer"),
({"rotation_angle": None}, "rotation_angle should be a positive integer"),
),
)
def test_partial_update_element_wrong_param_rota(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=elt,
polygon=[[1], [2], [2], [1]],
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "polygon points should be lists of two items"
assert str(e.value) == error
@pytest.mark.parametrize(
"payload, error",
(
# Mirrored
({"mirrored": "lol"}, "mirrored should be a boolean"),
({"mirrored": 1234}, "mirrored should be a boolean"),
({"mirrored": None}, "mirrored should be a boolean"),
),
)
def test_partial_update_element_wrong_param_mir(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=elt,
polygon=[["not a coord", 1], [2, 2], [2, 1], [1, 2]],
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "polygon points should be lists of two numbers"
assert str(e.value) == error
@pytest.mark.parametrize("confidence", ["lol", "0.2", -1.0, 1.42, float("inf")])
def test_update_element_wrong_confidence(mock_elements_worker, confidence):
@pytest.mark.parametrize(
"payload, error",
(
# Image
({"image": "lol"}, "image should be a UUID"),
({"image": 1234}, "image should be a UUID"),
({"image": None}, "image should be a UUID"),
),
)
def test_partial_update_element_wrong_param_image(mock_elements_worker, payload, error):
api_payload = {
"element": Element({"zone": None}),
**payload,
}
with pytest.raises(AssertionError) as e:
mock_elements_worker.update_element(
element=Element({"zone": None}),
confidence=confidence,
mock_elements_worker.partial_update_element(
**api_payload,
)
assert str(e.value) == "confidence should be None or a float in [0..1] range"
assert str(e.value) == error
def test_update_element_api_error(responses, mock_elements_worker):
def test_partial_update_element_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.PATCH,
......@@ -1306,7 +1420,7 @@ def test_update_element_api_error(responses, mock_elements_worker):
)
with pytest.raises(ErrorResponse):
mock_elements_worker.update_element(
mock_elements_worker.partial_update_element(
element=elt,
type="something",
name="0",
......@@ -1326,26 +1440,56 @@ def test_update_element_api_error(responses, mock_elements_worker):
]
def test_update_element(
responses, mock_elements_worker_with_cache, mock_cached_elements
@pytest.mark.parametrize(
"payload",
(
(
{
"polygon": [[10, 10], [20, 20], [20, 10], [10, 20]],
"confidence": None,
}
),
(
{
"rotation_angle": 45,
"mirrored": False,
}
),
(
{
"polygon": [[10, 10], [20, 20], [20, 10], [10, 20]],
"confidence": None,
"rotation_angle": 45,
"mirrored": False,
}
),
),
)
def test_partial_update_element(
responses,
mock_elements_worker_with_cache,
mock_cached_elements,
mock_cached_images,
payload,
):
elt = CachedElement.select().first()
new_image = CachedImage.select().first()
elt_response = {
"type": "new type",
"name": "new name",
"polygon": [[10, 10], [20, 20], [20, 10], [10, 20]],
"confidence": None,
"image": str(new_image.id),
**payload,
}
responses.add(
responses.PATCH,
f"http://testserver/api/v1/element/{elt.id}/",
status=200,
# UUID not allowed in JSON
json=elt_response,
)
element_update_response = mock_elements_worker_with_cache.update_element(
element_update_response = mock_elements_worker_with_cache.partial_update_element(
element=elt,
**elt_response,
**{**elt_response, "image": new_image.id},
)
assert len(responses.calls) == len(BASE_API_CALLS) + 1
......@@ -1361,20 +1505,25 @@ def test_update_element(
assert element_update_response == elt_response
cached_element = CachedElement.get(CachedElement.id == elt.id)
assert cached_element.type == elt_response["type"]
assert cached_element.polygon == str(elt_response["polygon"])
assert cached_element.confidence == elt_response["confidence"]
# Always present in payload
assert str(cached_element.image_id) == elt_response["image"]
# Optional params
if "polygon" in payload:
# Cast to string as this is the only difference compared to model
elt_response["polygon"] = str(elt_response["polygon"])
for param in payload:
assert getattr(cached_element, param) == elt_response[param]
def test_update_element_confidence(
responses, mock_elements_worker_with_cache, mock_cached_elements
@pytest.mark.parametrize("confidence", (None, 0.42))
def test_partial_update_element_confidence(
responses, mock_elements_worker_with_cache, mock_cached_elements, confidence
):
elt = CachedElement.select().first()
elt_response = {
"type": "new type",
"name": "new name",
"polygon": [[10, 10], [20, 20], [20, 10], [10, 20]],
"confidence": 0.42,
"confidence": confidence,
}
responses.add(
responses.PATCH,
......@@ -1383,7 +1532,7 @@ def test_update_element_confidence(
json=elt_response,
)
element_update_response = mock_elements_worker_with_cache.update_element(
element_update_response = mock_elements_worker_with_cache.partial_update_element(
element=elt,
**elt_response,
)
......@@ -1401,9 +1550,8 @@ def test_update_element_confidence(
assert element_update_response == elt_response
cached_element = CachedElement.get(CachedElement.id == elt.id)
assert cached_element.type == elt_response["type"]
assert cached_element.polygon == str(elt_response["polygon"])
assert cached_element.confidence == elt_response["confidence"]
assert cached_element.confidence == confidence
def test_list_element_children_wrong_element(mock_elements_worker):
......
......@@ -12,43 +12,16 @@ from arkindex_worker.cache import (
CachedTranscription,
CachedTranscriptionEntity,
)
from arkindex_worker.models import Element, Transcription
from arkindex_worker.models import Transcription
from arkindex_worker.worker.entity import MissingEntityType
from arkindex_worker.worker.transcription import TextOrientation
from . import BASE_API_CALLS
def test_create_entity_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=None,
name="Bob Bob",
type="person",
)
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element="not element type",
name="Bob Bob",
type="person",
)
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
def test_create_entity_wrong_name(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name=None,
type="person",
)
......@@ -56,7 +29,6 @@ def test_create_entity_wrong_name(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name=1234,
type="person",
)
......@@ -64,11 +36,8 @@ def test_create_entity_wrong_name(mock_elements_worker):
def test_create_entity_wrong_type(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type=None,
)
......@@ -76,7 +45,6 @@ def test_create_entity_wrong_type(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type=1234,
)
......@@ -84,13 +52,10 @@ def test_create_entity_wrong_type(mock_elements_worker):
def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# Triggering an error on metas param, not giving corpus should work since
# ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="person",
metas="wrong metas",
......@@ -99,11 +64,8 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker):
def test_create_entity_wrong_metas(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="person",
metas="wrong metas",
......@@ -112,11 +74,8 @@ def test_create_entity_wrong_metas(mock_elements_worker):
def test_create_entity_wrong_validated(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="person",
validated="wrong validated",
......@@ -127,7 +86,6 @@ def test_create_entity_wrong_validated(mock_elements_worker):
def test_create_entity_api_error(responses, mock_elements_worker):
# Set one entity type
mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.POST,
"http://testserver/api/v1/entity/",
......@@ -136,7 +94,6 @@ def test_create_entity_api_error(responses, mock_elements_worker):
with pytest.raises(ErrorResponse):
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="person",
)
......@@ -158,7 +115,6 @@ def test_create_entity(responses, mock_elements_worker):
# Set one entity type
mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.POST,
"http://testserver/api/v1/entity/",
......@@ -167,7 +123,6 @@ def test_create_entity(responses, mock_elements_worker):
)
entity_id = mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="person",
)
......@@ -193,8 +148,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker):
"""
Create entity with an unknown type will fail.
"""
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# Call to list entity types
responses.add(
responses.GET,
......@@ -213,7 +166,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker):
AssertionError, match="Entity type `new-entity` not found in the corpus."
):
mock_elements_worker.create_entity(
element=elt,
name="Bob Bob",
type="new-entity",
)
......@@ -232,7 +184,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker):
def test_create_entity_with_cache(responses, mock_elements_worker_with_cache):
# Set one entity type
mock_elements_worker_with_cache.entity_types = {"person": "person-entity-type-id"}
elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
responses.add(
responses.POST,
"http://testserver/api/v1/entity/",
......@@ -241,7 +192,6 @@ def test_create_entity_with_cache(responses, mock_elements_worker_with_cache):
)
entity_id = mock_elements_worker_with_cache.create_entity(
element=elt,
name="Bob Bob",
type="person",
)
......@@ -910,22 +860,11 @@ def test_create_transcription_entities_wrong_transcription(
)
def test_create_transcription_entities_no_transcription_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcription_entities(
transcription=Transcription(id="transcription_id"),
entities=[],
)
assert str(e.value) == "No element linked to Transcription (transcription_id)"
@pytest.mark.parametrize("entities", (None, "not a list of entities", 1))
def test_create_transcription_entities_wrong_entities(mock_elements_worker, entities):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcription_entities(
transcription=Transcription(
id="transcription_id", element={"id": "element_id"}
),
transcription=Transcription(id="transcription_id"),
entities=entities,
)
assert str(e.value) == "entities shouldn't be null and should be of type list"
......@@ -934,9 +873,7 @@ def test_create_transcription_entities_wrong_entities(mock_elements_worker, enti
def test_create_transcription_entities_wrong_entities_subtype(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcription_entities(
transcription=Transcription(
id="transcription_id", element={"id": "element_id"}
),
transcription=Transcription(id="transcription_id"),
entities=["not a dict"],
)
assert str(e.value) == "Entity at index 0 in entities: Should be of type dict"
......@@ -1030,17 +967,14 @@ def test_create_transcription_entities_wrong_entity(
):
with pytest.raises(AssertionError) as e:
mock_elements_worker.create_transcription_entities(
transcription=Transcription(
id="transcription_id", element={"id": "element_id"}
),
transcription=Transcription(id="transcription_id"),
entities=[entity],
)
assert str(e.value) == error
def test_create_transcription_entities(responses, mock_elements_worker):
element_id = "element_id"
transcription = Transcription(id="transcription-id", element={"id": element_id})
transcription = Transcription(id="transcription-id")
# Call to Transcription entities creation in bulk
responses.add(
responses.POST,
......@@ -1091,35 +1025,6 @@ def test_create_transcription_entities(responses, mock_elements_worker):
assert len(created_objects) == 1
assert element_id in mock_elements_worker.report.report_data["elements"]
ml_report = mock_elements_worker.report.report_data["elements"][element_id]
assert "started" in ml_report
del ml_report["started"]
# Check reporting
assert ml_report == {
"elements": {},
"transcriptions": 0,
"classifications": {},
"entities": [
{
"id": "entity-id",
"type": "22222222-2222-2222-2222-222222222222",
"name": "Teklia",
}
],
"transcription_entities": [
{
"transcription_id": "transcription-id",
"entity_id": "entity-id",
"transcription_entity_id": "transc-entity-id",
}
],
"metadata": [],
"errors": [],
}
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
......
# -*- coding: utf-8 -*-
import json
import uuid
from datetime import datetime
from tempfile import NamedTemporaryFile
import pytest
from apistar.exceptions import ErrorResponse
from arkindex_worker.models import Transcription
from arkindex_worker.reporting import Reporter
def test_init():
version_id = str(uuid.uuid4())
reporter = Reporter(name="Worker", slug="worker-slug", version=version_id)
assert "started" in reporter.report_data
del reporter.report_data["started"]
assert reporter.report_data == {
"name": "Worker",
"slug": "worker-slug",
"version": version_id,
"elements": {},
}
def test_process():
reporter = Reporter("worker")
reporter.process("myelement")
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
assert "started" in element_data
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_element():
reporter = Reporter("worker")
reporter.add_element("myelement", type="text_line")
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {"text_line": 1},
"transcriptions": 0,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_element_count():
"""
Report multiple elements with the same parent and type
"""
reporter = Reporter("worker")
reporter.add_element("myelement", type="text_line", type_count=42)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {"text_line": 42},
"transcriptions": 0,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_classification():
reporter = Reporter("worker")
reporter.add_classification("myelement", "three")
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {"three": 1},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_classifications():
reporter = Reporter("worker")
with pytest.raises(AssertionError):
reporter.add_classifications("myelement", {"not": "a list"})
reporter.add_classifications(
"myelement", [{"class_name": "three"}, {"class_name": "two"}]
)
reporter.add_classifications(
"myelement",
[
{"class_name": "three"},
{"class_name": "two", "high_confidence": True},
{"class_name": "three", "confidence": 0.42},
],
)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {"three": 3, "two": 2},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_transcription():
reporter = Reporter("worker")
reporter.add_transcription("myelement")
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 1,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_transcription_count():
"""
Report multiple transcriptions with the same element and type
"""
reporter = Reporter("worker")
reporter.add_transcription("myelement", 1337)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 1337,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_entity():
reporter = Reporter("worker")
reporter.add_entity(
"myelement",
"12341234-1234-1234-1234-123412341234",
"person-entity-type-id",
"Bob Bob",
)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {},
"entities": [
{
"id": "12341234-1234-1234-1234-123412341234",
"type": "person-entity-type-id",
"name": "Bob Bob",
}
],
"transcription_entities": [],
"metadata": [],
"errors": [],
}
def test_add_transcription_entity():
reporter = Reporter("worker")
reporter.add_transcription_entity(
"5678",
Transcription({"id": "1234-5678", "element": {"id": "myelement"}}),
"1234",
)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {},
"entities": [],
"transcription_entities": [
{
"transcription_id": "1234-5678",
"entity_id": "5678",
"transcription_entity_id": "1234",
}
],
"metadata": [],
"errors": [],
}
def test_add_metadata():
reporter = Reporter("worker")
reporter.add_metadata(
"myelement", "12341234-1234-1234-1234-123412341234", "location", "Teklia"
)
assert "myelement" in reporter.report_data["elements"]
element_data = reporter.report_data["elements"]["myelement"]
del element_data["started"]
assert element_data == {
"elements": {},
"transcriptions": 0,
"classifications": {},
"entities": [],
"transcription_entities": [],
"metadata": [
{
"id": "12341234-1234-1234-1234-123412341234",
"type": "location",
"name": "Teklia",
}
],
"errors": [],
}
def test_error():
reporter = Reporter("worker")
reporter.error("myelement", ZeroDivisionError("What have you done"))
reporter.error(
"myelement",
ErrorResponse(
title="I'm a teapot",
status_code=418,
content='{"coffee": "Can\'t touch this"}',
),
)
assert reporter.report_data["elements"]["myelement"]["errors"] == [
{"class": "ZeroDivisionError", "message": "What have you done"},
{
"class": "ErrorResponse",
"message": "I'm a teapot",
"status_code": 418,
"content": '{"coffee": "Can\'t touch this"}',
},
]
def test_reporter_save(mocker):
datetime_mock = mocker.MagicMock()
datetime_mock.utcnow.return_value = datetime(2000, 1, 1)
mocker.patch("arkindex_worker.reporting.datetime", datetime_mock)
version_id = str(uuid.uuid4())
reporter = Reporter(name="Worker", slug="worker-slug", version=version_id)
reporter.add_element("myelement", type="text_line", type_count=4)
with NamedTemporaryFile() as f:
reporter.save(f.name)
exported_data = json.load(f)
assert exported_data == {
"name": "Worker",
"slug": "worker-slug",
"started": "2000-01-01T00:00:00",
"version": version_id,
"elements": {
"myelement": {
"classifications": {},
"elements": {"text_line": 4},
"entities": [],
"transcription_entities": [],
"errors": [],
"metadata": [],
"started": "2000-01-01T00:00:00",
"transcriptions": 0,
}
},
}