diff --git a/arkindex_worker/reporting.py b/arkindex_worker/reporting.py deleted file mode 100644 index b9ee03ef465795cefc377fdaf9f300f3f3d252b1..0000000000000000000000000000000000000000 --- a/arkindex_worker/reporting.py +++ /dev/null @@ -1,236 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Generator for the ``ml_report.json`` file, to report created worker results and exceptions. -""" - -import json -import traceback -from collections import Counter -from datetime import datetime -from pathlib import Path -from typing import Dict, List, Optional, Union -from uuid import UUID - -from apistar.exceptions import ErrorResponse - -from arkindex_worker import logger -from arkindex_worker.models import Transcription - - -class Reporter(object): - """ - Helper to generate an ``ml_report.json`` artifact. - """ - - def __init__( - self, - name: Optional[str] = "Unknown worker", - slug: Optional[str] = "unknown-slug", - version: Optional[str] = None, - **kwargs, - ): - self.report_data = { - "name": name, - "slug": slug, - "version": version, - "started": datetime.utcnow().isoformat(), - "elements": {}, - } - logger.info(f"Starting ML report for {name}") - - def __repr__(self): - return "{}({})".format(self.__class__.__name__, self.report_data["slug"]) - - def _get_element(self, element_id): - return self.report_data["elements"].setdefault( - str(element_id), - { - "started": datetime.utcnow().isoformat(), - # Created element counts, by type slug - "elements": {}, - # Created transcriptions count - "transcriptions": 0, - # Created classification counts, by class - "classifications": {}, - # Created entities ({"id": "", "type": "", "name": ""}) from this element - "entities": [], - # Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element - "transcription_entities": [], - # Created metadata ({"id": "", "type": "", "name": ""}) from this element - "metadata": [], - "errors": [], - }, - ) - - def process(self, element_id: Union[str, UUID]): - """ - Report that a specific element ID is being processed. - - :param element_id: ID of the element being processed. - """ - # Just call the element initializer - self._get_element(element_id) - - def add_element(self, parent_id: Union[str, UUID], type: str, type_count: int = 1): - """ - Report creating an element as a child of another. - - :param parent_id: ID of the parent element. - :param type: Slug of the type of the child element. - :param type_count: How many elements of this type were created. - """ - elements = self._get_element(parent_id)["elements"] - elements.setdefault(type, 0) - elements[type] += type_count - - def add_classification(self, element_id: Union[str, UUID], class_name: str): - """ - Report creating a classification on an element. - - :param element_id: ID of the element. - :param class_name: Name of the ML class of the new classification. - """ - classifications = self._get_element(element_id)["classifications"] - classifications.setdefault(class_name, 0) - classifications[class_name] += 1 - - def add_classifications( - self, element_id: Union[str, UUID], classifications: List[Dict[str, str]] - ): - """ - Report creating one or more classifications at once on an element. - - :param element_id: ID of the element. - :param classifications: List of classifications. - Each classification is represented as a ``dict`` with a ``class_name`` key - holding the name of the ML class being used. - """ - assert isinstance( - classifications, list - ), "A list is required for classifications" - element = self._get_element(element_id) - # Retrieve the previous existing classification counts, if any - counter = Counter(**element["classifications"]) - # Add the new ones - counter.update( - [classification["class_name"] for classification in classifications] - ) - element["classifications"] = dict(counter) - - def add_transcription(self, element_id: Union[str, UUID], count=1): - """ - Report creating a transcription on an element. - - :param element_id: ID of the element. - :param count: Number of transcriptions created at once - """ - self._get_element(element_id)["transcriptions"] += count - - def add_entity( - self, - element_id: Union[str, UUID], - entity_id: Union[str, UUID], - type: str, - name: str, - ): - """ - Report creating an entity on an element. - - :param element_id: ID of the element. - :param entity_id: ID of the new entity. - :param type: Type of the entity. - :param name: Name of the entity. - """ - entities = self._get_element(element_id)["entities"] - entities.append({"id": entity_id, "type": type, "name": name}) - - def add_transcription_entity( - self, - entity_id: Union[str, UUID], - transcription: Transcription, - transcription_entity_id: Union[str, UUID], - ): - """ - Report creating a transcription entity on an element. - - :param entity_id: ID of the entity element. - :param transcription: Transcription to add the entity on - :param transcription_entity_id: ID of the transcription entity that is created. - """ - transcription_entities = self._get_element(transcription.element.id)[ - "transcription_entities" - ] - transcription_entities.append( - { - "transcription_id": transcription.id, - "entity_id": entity_id, - "transcription_entity_id": transcription_entity_id, - } - ) - - def add_entity_link(self, *args, **kwargs): - """ - Report creating an entity link. Not currently supported. - - :raises NotImplementedError: - """ - raise NotImplementedError - - def add_entity_role(self, *args, **kwargs): - """ - Report creating an entity role. Not currently supported. - - :raises NotImplementedError: - """ - raise NotImplementedError - - def add_metadata( - self, - element_id: Union[str, UUID], - metadata_id: Union[str, UUID], - type: str, - name: str, - ): - """ - Report creating a metadata from an element. - - :param element_id: ID of the element. - :param metadata_id: ID of the new metadata. - :param type: Type of the metadata. - :param name: Name of the metadata. - """ - metadata = self._get_element(element_id)["metadata"] - metadata.append({"id": metadata_id, "type": type, "name": name}) - - def error(self, element_id: Union[str, UUID], exception: Exception): - """ - Report that a Python exception occurred when processing an element. - - :param element_id: ID of the element. - :param exception: A Python exception. - """ - error_data = { - "class": exception.__class__.__name__, - "message": str(exception), - } - if exception.__traceback__ is not None: - error_data["traceback"] = "\n".join( - traceback.format_tb(exception.__traceback__) - ) - - if isinstance(exception, ErrorResponse): - error_data["message"] = exception.title - error_data["status_code"] = exception.status_code - error_data["content"] = exception.content - - self._get_element(element_id)["errors"].append(error_data) - - def save(self, path: Union[str, Path]): - """ - Save the ML report to the specified path. - - :param path: Path to save the ML report to. - """ - logger.info(f"Saving ML report to {path}") - with open(path, "w") as f: - json.dump(self.report_data, f) diff --git a/arkindex_worker/worker/__init__.py b/arkindex_worker/worker/__init__.py index 7ff2f70e4fe6a35e06c773615db03530475fec45..e2a6aa8eb32cb0eebfd4b35e692927934fd7c1c1 100644 --- a/arkindex_worker/worker/__init__.py +++ b/arkindex_worker/worker/__init__.py @@ -15,7 +15,6 @@ from apistar.exceptions import ErrorResponse from arkindex_worker import logger from arkindex_worker.cache import CachedElement from arkindex_worker.models import Element -from arkindex_worker.reporting import Reporter from arkindex_worker.worker.base import BaseWorker from arkindex_worker.worker.classification import ClassificationMixin from arkindex_worker.worker.element import ElementMixin @@ -156,17 +155,12 @@ class ElementsWorker( super().configure() super().configure_cache() - # Add report concerning elements - self.report = Reporter( - **self.worker_details, version=getattr(self, "worker_version_id", None) - ) - def run(self): """ Implements an Arkindex worker that goes through each element returned by - [list_elements][arkindex_worker.worker.ElementsWorker.list_elements]. It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element], catching exceptions - and reporting them using the [Reporter][arkindex_worker.reporting.Reporter], and handles saving the report - once the process is complete as well as WorkerActivity updates when enabled. + [list_elements][arkindex_worker.worker.ElementsWorker.list_elements]. + It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element], + catching exceptions, and handles saving WorkerActivity updates when enabled. """ self.configure() @@ -232,10 +226,6 @@ class ElementsWorker( self.update_activity(element.id, ActivityState.Error) except Exception: pass - self.report.error(element_id, e) - - # Save report as local artifact - self.report.save(self.work_dir / "ml_report.json") if failed: logger.error( diff --git a/arkindex_worker/worker/classification.py b/arkindex_worker/worker/classification.py index 98cc6eaeb6808312d39ae9e63da3d573a0c8cf93..e28cefe7604acc525cea6b9824de683bf222df4e 100644 --- a/arkindex_worker/worker/classification.py +++ b/arkindex_worker/worker/classification.py @@ -176,8 +176,6 @@ class ClassificationMixin(object): # Propagate any other API error raise - self.report.add_classification(element.id, ml_class) - return created def create_classifications( @@ -248,7 +246,6 @@ class ClassificationMixin(object): for created_cl in created_cls: created_cl["class_name"] = self.retrieve_ml_class(created_cl["ml_class"]) - self.report.add_classification(element.id, created_cl["class_name"]) if self.use_cache: # Store classifications in local cache diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py index c9ed56974020c0bf37bb1e5d71d76a6feba0edf8..2dee2fed1ab5428c6201ca69e14fb8d401515f01 100644 --- a/arkindex_worker/worker/element.py +++ b/arkindex_worker/worker/element.py @@ -142,7 +142,6 @@ class ElementMixin(object): "confidence": confidence, }, ) - self.report.add_element(element.id, type) return sub_element["id"] if slim_output else sub_element @@ -238,9 +237,6 @@ class ElementMixin(object): }, ) - for element in elements: - self.report.add_element(parent.id, element["type"]) - if self.use_cache: # Create the image as needed and handle both an Element and a CachedElement if isinstance(parent, CachedElement): diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index 32416263bd356c7086eed4379c8973aeacb822e1..ccdf2267bf3698a46da6a2069386ff4783bace9e 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -8,7 +8,7 @@ from typing import Dict, List, Optional, TypedDict, Union from peewee import IntegrityError from arkindex_worker import logger -from arkindex_worker.cache import CachedElement, CachedEntity, CachedTranscriptionEntity +from arkindex_worker.cache import CachedEntity, CachedTranscriptionEntity from arkindex_worker.models import Element, Transcription Entity = TypedDict( @@ -68,7 +68,6 @@ class EntityMixin(object): def create_entity( self, - element: Union[Element, CachedElement], name: str, type: str, metas=dict(), @@ -78,14 +77,9 @@ class EntityMixin(object): Create an entity on the given corpus. If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created. - :param element: An element on which the entity will be reported with the [Reporter][arkindex_worker.reporting.Reporter]. - This does not have any effect on the entity itself. :param name: Name of the entity. :param type: Type of the entity. """ - assert element and isinstance( - element, (Element, CachedElement) - ), "element shouldn't be null and should be an Element or CachedElement" assert name and isinstance( name, str ), "name shouldn't be null and should be of type str" @@ -119,7 +113,6 @@ class EntityMixin(object): "worker_run_id": self.worker_run_id, }, ) - self.report.add_entity(element.id, entity["id"], entity_type_id, name) if self.use_cache: # Store entity in local cache @@ -196,7 +189,6 @@ class EntityMixin(object): id=transcription.id, body=body, ) - self.report.add_transcription_entity(entity, transcription, transcription_ent) if self.use_cache: # Store transcription entity in local cache @@ -247,11 +239,6 @@ class EntityMixin(object): transcription, Transcription ), "transcription shouldn't be null and should be of type Transcription" - # Needed for MLreport - assert ( - hasattr(transcription, "element") and transcription.element - ), f"No element linked to {transcription}" - assert entities and isinstance( entities, list ), "entities shouldn't be null and should be of type list" @@ -301,22 +288,6 @@ class EntityMixin(object): }, ) - for entity, created_objects in zip(entities, created_ids["entities"]): - # Report entity creation - self.report.add_entity( - transcription.element.id, - created_objects["entity_id"], - entity.get("type_id"), - entity.get("name"), - ) - - # Report transcription entity creation - self.report.add_transcription_entity( - created_objects["entity_id"], - transcription, - created_objects["transcription_entity_id"], - ) - return created_ids["entities"] def list_transcription_entities( diff --git a/arkindex_worker/worker/metadata.py b/arkindex_worker/worker/metadata.py index fca1e92b4650458f32b8c403dce85bf0c927dd42..c6bae33c9d5570a44e7523210db0adf909452ee6 100644 --- a/arkindex_worker/worker/metadata.py +++ b/arkindex_worker/worker/metadata.py @@ -105,7 +105,6 @@ class MetaDataMixin(object): "worker_run_id": self.worker_run_id, }, ) - self.report.add_metadata(element.id, metadata["id"], type.value, name) return metadata["id"] @@ -182,9 +181,6 @@ class MetaDataMixin(object): }, )["metadata_list"] - for meta in created_metadatas: - self.report.add_metadata(element.id, meta["id"], meta["type"], meta["name"]) - return created_metadatas def list_element_metadata( diff --git a/arkindex_worker/worker/transcription.py b/arkindex_worker/worker/transcription.py index 5ddab34c86b950a9ec8796947ed93091e2af106a..7ce96c689596486bdcdab3251d4a73c3c6877a9c 100644 --- a/arkindex_worker/worker/transcription.py +++ b/arkindex_worker/worker/transcription.py @@ -88,8 +88,6 @@ class TranscriptionMixin(object): }, ) - self.report.add_transcription(element.id) - if self.use_cache: # Store transcription in local cache try: @@ -181,9 +179,6 @@ class TranscriptionMixin(object): }, )["transcriptions"] - for created_tr in created_trs: - self.report.add_transcription(created_tr["element_id"]) - if self.use_cache: # Store transcriptions in local cache try: @@ -308,8 +303,6 @@ class TranscriptionMixin(object): logger.debug( f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation" ) - self.report.add_element(element.id, sub_element_type) - self.report.add_transcription(annotation["element_id"]) if self.use_cache: # Store transcriptions and their associated element (if created) in local cache diff --git a/docs/contents/implem/configure.md b/docs/contents/implem/configure.md index 59865d18c530c244812cbaa0907b5a625632c065..4901dbe1f9645f59148183313293dc6e42ddc638 100644 --- a/docs/contents/implem/configure.md +++ b/docs/contents/implem/configure.md @@ -83,20 +83,6 @@ The multiple configuration sources from the Arkindex-mode are merged into a uniq One information cannot be retrieved directly from the configuration file and is required in some cases: the ID of the Arkindex corpus which the elements processed belong to. This is retrieved via the `ARKINDEX_CORPUS_ID` environment variable. -## Worker reporter -At the end of a worker execution, a report about the publication done by the worker is generated in JSON-format. This lists - -- the starting time, -- the number of elements created, grouped by type, -- the number of transcription created, -- the number of classifications created, grouped by class, -- the number of entities created, -- the number of entities created on transcriptions, -- the number of metadatas created, -- the encountered errors' logs. - -This is done by the many helper described in the [reporting module](../../ref/reporting.md). They use the `report` attribute initialized at the configuration stage. - ## Setting Debug logging level There are three ways to activate the debug mode: @@ -132,9 +118,6 @@ Many attributes are set on the worker during at the configuration stage. Here is `process_information` : The details about the process parent to this worker execution. Only set in Arkindex mode. -`reporter` -: The `Reporter` instance that will generate the `ml_report.json` artifacts which sums up the publication done during this execution and the errors encountered. - `secrets` : A dictionary mapping the secret name to their parsed content. diff --git a/docs/contents/implem/index.md b/docs/contents/implem/index.md index df755b4ad3742553de4bb38c7d8acb86e7be3881..03c4c8f1611874c75046f448621ce5e5d28e8743 100644 --- a/docs/contents/implem/index.md +++ b/docs/contents/implem/index.md @@ -28,7 +28,6 @@ flowchart LR subgraph id3[Loop over each element] element_processing --> element_processing end - element_processing -- Save ML report to disk --> reporting end init --> run end diff --git a/docs/ref/reporting.md b/docs/ref/reporting.md deleted file mode 100644 index 039553de0a143495fced1974057d997ecfe2799e..0000000000000000000000000000000000000000 --- a/docs/ref/reporting.md +++ /dev/null @@ -1,3 +0,0 @@ -# Reporting - -::: arkindex_worker.reporting diff --git a/mkdocs.yml b/mkdocs.yml index 59b40f390509ad41cc8c9e6e68d39ccd0c027637..994f2c0b65d74915b6669bdae658bbfc2b98a8f6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -88,7 +88,6 @@ nav: - Models: ref/models.md - Git & Gitlab support: ref/git.md - Image utilities: ref/image.md - - Reporting: ref/reporting.md - Cache: ref/cache.md - Utils: ref/utils.md - Releases: releases.md diff --git a/tests/test_elements_worker/test_classifications.py b/tests/test_elements_worker/test_classifications.py index 23db74264b268accc1d120ef0e33673ef59f3642..457f3dc7974d63316ab7e3dd5624a8709131267f 100644 --- a/tests/test_elements_worker/test_classifications.py +++ b/tests/test_elements_worker/test_classifications.py @@ -423,11 +423,6 @@ def test_create_classification(responses, mock_elements_worker): "high_confidence": True, } - # Classification has been created and reported - assert mock_elements_worker.report.report_data["elements"][elt.id][ - "classifications" - ] == {"a_class": 1} - def test_create_classification_with_cache(responses, mock_elements_worker_with_cache): mock_elements_worker_with_cache.classes = {"a_class": "0000"} @@ -470,11 +465,6 @@ def test_create_classification_with_cache(responses, mock_elements_worker_with_c "high_confidence": True, } - # Classification has been created and reported - assert mock_elements_worker_with_cache.report.report_data["elements"][elt.id][ - "classifications" - ] == {"a_class": 1} - # Check that created classification was properly stored in SQLite cache assert list(CachedClassification.select()) == [ CachedClassification( @@ -524,9 +514,6 @@ def test_create_classification_duplicate_worker_run(responses, mock_elements_wor "high_confidence": True, } - # Classification has NOT been created - assert mock_elements_worker.report.report_data["elements"] == {} - def test_create_classifications_wrong_element(mock_elements_worker): with pytest.raises(AssertionError) as e: diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index b989f3b3eede5912db0523b01f911a7fbfe82d08..f64b88659c4707a8b3cb6d03d05acaa537e4b944 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -12,43 +12,16 @@ from arkindex_worker.cache import ( CachedTranscription, CachedTranscriptionEntity, ) -from arkindex_worker.models import Element, Transcription +from arkindex_worker.models import Transcription from arkindex_worker.worker.entity import MissingEntityType from arkindex_worker.worker.transcription import TextOrientation from . import BASE_API_CALLS -def test_create_entity_wrong_element(mock_elements_worker): - with pytest.raises(AssertionError) as e: - mock_elements_worker.create_entity( - element=None, - name="Bob Bob", - type="person", - ) - assert ( - str(e.value) - == "element shouldn't be null and should be an Element or CachedElement" - ) - - with pytest.raises(AssertionError) as e: - mock_elements_worker.create_entity( - element="not element type", - name="Bob Bob", - type="person", - ) - assert ( - str(e.value) - == "element shouldn't be null and should be an Element or CachedElement" - ) - - def test_create_entity_wrong_name(mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name=None, type="person", ) @@ -56,7 +29,6 @@ def test_create_entity_wrong_name(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name=1234, type="person", ) @@ -64,11 +36,8 @@ def test_create_entity_wrong_name(mock_elements_worker): def test_create_entity_wrong_type(mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type=None, ) @@ -76,7 +45,6 @@ def test_create_entity_wrong_type(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type=1234, ) @@ -84,13 +52,10 @@ def test_create_entity_wrong_type(mock_elements_worker): def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - # Triggering an error on metas param, not giving corpus should work since # ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="person", metas="wrong metas", @@ -99,11 +64,8 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker): def test_create_entity_wrong_metas(mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="person", metas="wrong metas", @@ -112,11 +74,8 @@ def test_create_entity_wrong_metas(mock_elements_worker): def test_create_entity_wrong_validated(mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="person", validated="wrong validated", @@ -127,7 +86,6 @@ def test_create_entity_wrong_validated(mock_elements_worker): def test_create_entity_api_error(responses, mock_elements_worker): # Set one entity type mock_elements_worker.entity_types = {"person": "person-entity-type-id"} - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "http://testserver/api/v1/entity/", @@ -136,7 +94,6 @@ def test_create_entity_api_error(responses, mock_elements_worker): with pytest.raises(ErrorResponse): mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="person", ) @@ -158,7 +115,6 @@ def test_create_entity(responses, mock_elements_worker): # Set one entity type mock_elements_worker.entity_types = {"person": "person-entity-type-id"} - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "http://testserver/api/v1/entity/", @@ -167,7 +123,6 @@ def test_create_entity(responses, mock_elements_worker): ) entity_id = mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="person", ) @@ -193,8 +148,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker): """ Create entity with an unknown type will fail. """ - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - # Call to list entity types responses.add( responses.GET, @@ -213,7 +166,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker): AssertionError, match="Entity type `new-entity` not found in the corpus." ): mock_elements_worker.create_entity( - element=elt, name="Bob Bob", type="new-entity", ) @@ -232,7 +184,6 @@ def test_create_entity_missing_type(responses, mock_elements_worker): def test_create_entity_with_cache(responses, mock_elements_worker_with_cache): # Set one entity type mock_elements_worker_with_cache.entity_types = {"person": "person-entity-type-id"} - elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing") responses.add( responses.POST, "http://testserver/api/v1/entity/", @@ -241,7 +192,6 @@ def test_create_entity_with_cache(responses, mock_elements_worker_with_cache): ) entity_id = mock_elements_worker_with_cache.create_entity( - element=elt, name="Bob Bob", type="person", ) @@ -910,22 +860,11 @@ def test_create_transcription_entities_wrong_transcription( ) -def test_create_transcription_entities_no_transcription_element(mock_elements_worker): - with pytest.raises(AssertionError) as e: - mock_elements_worker.create_transcription_entities( - transcription=Transcription(id="transcription_id"), - entities=[], - ) - assert str(e.value) == "No element linked to Transcription (transcription_id)" - - @pytest.mark.parametrize("entities", (None, "not a list of entities", 1)) def test_create_transcription_entities_wrong_entities(mock_elements_worker, entities): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entities( - transcription=Transcription( - id="transcription_id", element={"id": "element_id"} - ), + transcription=Transcription(id="transcription_id"), entities=entities, ) assert str(e.value) == "entities shouldn't be null and should be of type list" @@ -934,9 +873,7 @@ def test_create_transcription_entities_wrong_entities(mock_elements_worker, enti def test_create_transcription_entities_wrong_entities_subtype(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entities( - transcription=Transcription( - id="transcription_id", element={"id": "element_id"} - ), + transcription=Transcription(id="transcription_id"), entities=["not a dict"], ) assert str(e.value) == "Entity at index 0 in entities: Should be of type dict" @@ -1030,17 +967,14 @@ def test_create_transcription_entities_wrong_entity( ): with pytest.raises(AssertionError) as e: mock_elements_worker.create_transcription_entities( - transcription=Transcription( - id="transcription_id", element={"id": "element_id"} - ), + transcription=Transcription(id="transcription_id"), entities=[entity], ) assert str(e.value) == error def test_create_transcription_entities(responses, mock_elements_worker): - element_id = "element_id" - transcription = Transcription(id="transcription-id", element={"id": element_id}) + transcription = Transcription(id="transcription-id") # Call to Transcription entities creation in bulk responses.add( responses.POST, @@ -1091,35 +1025,6 @@ def test_create_transcription_entities(responses, mock_elements_worker): assert len(created_objects) == 1 - assert element_id in mock_elements_worker.report.report_data["elements"] - ml_report = mock_elements_worker.report.report_data["elements"][element_id] - - assert "started" in ml_report - del ml_report["started"] - - # Check reporting - assert ml_report == { - "elements": {}, - "transcriptions": 0, - "classifications": {}, - "entities": [ - { - "id": "entity-id", - "type": "22222222-2222-2222-2222-222222222222", - "name": "Teklia", - } - ], - "transcription_entities": [ - { - "transcription_id": "transcription-id", - "entity_id": "entity-id", - "transcription_entity_id": "transc-entity-id", - } - ], - "metadata": [], - "errors": [], - } - assert len(responses.calls) == len(BASE_API_CALLS) + 1 assert [ (call.request.method, call.request.url) for call in responses.calls diff --git a/tests/test_reporting.py b/tests/test_reporting.py deleted file mode 100644 index af214f011978d1df5c0a07b142ace8ab75493acf..0000000000000000000000000000000000000000 --- a/tests/test_reporting.py +++ /dev/null @@ -1,296 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import uuid -from datetime import datetime -from tempfile import NamedTemporaryFile - -import pytest -from apistar.exceptions import ErrorResponse - -from arkindex_worker.models import Transcription -from arkindex_worker.reporting import Reporter - - -def test_init(): - version_id = str(uuid.uuid4()) - reporter = Reporter(name="Worker", slug="worker-slug", version=version_id) - assert "started" in reporter.report_data - del reporter.report_data["started"] - assert reporter.report_data == { - "name": "Worker", - "slug": "worker-slug", - "version": version_id, - "elements": {}, - } - - -def test_process(): - reporter = Reporter("worker") - reporter.process("myelement") - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - assert "started" in element_data - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_element(): - reporter = Reporter("worker") - reporter.add_element("myelement", type="text_line") - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {"text_line": 1}, - "transcriptions": 0, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_element_count(): - """ - Report multiple elements with the same parent and type - """ - reporter = Reporter("worker") - reporter.add_element("myelement", type="text_line", type_count=42) - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {"text_line": 42}, - "transcriptions": 0, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_classification(): - reporter = Reporter("worker") - reporter.add_classification("myelement", "three") - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {"three": 1}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_classifications(): - reporter = Reporter("worker") - with pytest.raises(AssertionError): - reporter.add_classifications("myelement", {"not": "a list"}) - - reporter.add_classifications( - "myelement", [{"class_name": "three"}, {"class_name": "two"}] - ) - reporter.add_classifications( - "myelement", - [ - {"class_name": "three"}, - {"class_name": "two", "high_confidence": True}, - {"class_name": "three", "confidence": 0.42}, - ], - ) - - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {"three": 3, "two": 2}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_transcription(): - reporter = Reporter("worker") - reporter.add_transcription("myelement") - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 1, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_transcription_count(): - """ - Report multiple transcriptions with the same element and type - """ - reporter = Reporter("worker") - reporter.add_transcription("myelement", 1337) - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 1337, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_entity(): - reporter = Reporter("worker") - reporter.add_entity( - "myelement", - "12341234-1234-1234-1234-123412341234", - "person-entity-type-id", - "Bob Bob", - ) - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {}, - "entities": [ - { - "id": "12341234-1234-1234-1234-123412341234", - "type": "person-entity-type-id", - "name": "Bob Bob", - } - ], - "transcription_entities": [], - "metadata": [], - "errors": [], - } - - -def test_add_transcription_entity(): - reporter = Reporter("worker") - reporter.add_transcription_entity( - "5678", - Transcription({"id": "1234-5678", "element": {"id": "myelement"}}), - "1234", - ) - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {}, - "entities": [], - "transcription_entities": [ - { - "transcription_id": "1234-5678", - "entity_id": "5678", - "transcription_entity_id": "1234", - } - ], - "metadata": [], - "errors": [], - } - - -def test_add_metadata(): - reporter = Reporter("worker") - reporter.add_metadata( - "myelement", "12341234-1234-1234-1234-123412341234", "location", "Teklia" - ) - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 0, - "classifications": {}, - "entities": [], - "transcription_entities": [], - "metadata": [ - { - "id": "12341234-1234-1234-1234-123412341234", - "type": "location", - "name": "Teklia", - } - ], - "errors": [], - } - - -def test_error(): - reporter = Reporter("worker") - reporter.error("myelement", ZeroDivisionError("What have you done")) - reporter.error( - "myelement", - ErrorResponse( - title="I'm a teapot", - status_code=418, - content='{"coffee": "Can\'t touch this"}', - ), - ) - assert reporter.report_data["elements"]["myelement"]["errors"] == [ - {"class": "ZeroDivisionError", "message": "What have you done"}, - { - "class": "ErrorResponse", - "message": "I'm a teapot", - "status_code": 418, - "content": '{"coffee": "Can\'t touch this"}', - }, - ] - - -def test_reporter_save(mocker): - datetime_mock = mocker.MagicMock() - datetime_mock.utcnow.return_value = datetime(2000, 1, 1) - mocker.patch("arkindex_worker.reporting.datetime", datetime_mock) - version_id = str(uuid.uuid4()) - reporter = Reporter(name="Worker", slug="worker-slug", version=version_id) - reporter.add_element("myelement", type="text_line", type_count=4) - with NamedTemporaryFile() as f: - reporter.save(f.name) - exported_data = json.load(f) - assert exported_data == { - "name": "Worker", - "slug": "worker-slug", - "started": "2000-01-01T00:00:00", - "version": version_id, - "elements": { - "myelement": { - "classifications": {}, - "elements": {"text_line": 4}, - "entities": [], - "transcription_entities": [], - "errors": [], - "metadata": [], - "started": "2000-01-01T00:00:00", - "transcriptions": 0, - } - }, - }