diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index ee8b05692cfd0c36e2fe70d38c3f97480b1f98b2..d6381b752b78137af75bd30ae41ba8538ad48c67 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -5,11 +5,11 @@ import logging import os import sys import uuid +from enum import Enum from apistar.exceptions import ErrorResponse from arkindex import ArkindexClient, options_from_env -from arkindex_common.enums import EntityType, TranscriptionType from arkindex_worker import logger from arkindex_worker.models import Element from arkindex_worker.reporting import Reporter @@ -73,6 +73,24 @@ class BaseWorker(object): """Override this method to implement your own process""" +class TranscriptionType(Enum): + Page = "page" + Paragraph = "paragraph" + Line = "line" + Word = "word" + Character = "character" + + +class EntityType(Enum): + Person = "person" + Location = "location" + Subject = "subject" + Organization = "organization" + Misc = "misc" + Number = "number" + Date = "date" + + class ElementsWorker(BaseWorker): def __init__(self, description="Arkindex Elements Worker"): super().__init__(description) diff --git a/tests/test_elements_worker.py b/tests/test_elements_worker.py index b94312fbb2d64218d11a8384a9b6b5f4c41a3263..7f44936ff37a13984a46dfbc7dffe5bfe4c65e17 100644 --- a/tests/test_elements_worker.py +++ b/tests/test_elements_worker.py @@ -379,3 +379,402 @@ def test_create_sub_element(responses): responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/elements/create/" ) + + +def test_create_transcription_wrong_element(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=None, text="i am a line", type="line", score=0.42, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element="not element type", text="i am a line", type="line", score=0.42, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + +def test_create_transcription_wrong_type(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type=None, score=0.42, + ) + assert str(e.value) == "type shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type=1234, score=0.42, + ) + assert str(e.value) == "type shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, + text="i am a line", + type="not_a_transcription_type", + score=0.42, + ) + assert str(e.value) == "type should be an allowed transcription type" + + +def test_create_transcription_wrong_text(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text=None, type="line", score=0.42, + ) + assert str(e.value) == "text shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text=1234, type="line", score=0.42, + ) + assert str(e.value) == "text shouldn't be null and should be of type str" + + +def test_create_transcription_wrong_score(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type="line", score=None, + ) + assert ( + str(e.value) == "score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type="line", score="wrong score", + ) + assert ( + str(e.value) == "score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type="line", score=0, + ) + assert ( + str(e.value) == "score shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_transcription( + element=elt, text="i am a line", type="line", score=2.00, + ) + assert ( + str(e.value) == "score shouldn't be null and should be a float in [0..1] range" + ) + + +def test_create_transcription_api_error(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/", + status=500, + ) + + with pytest.raises(ErrorResponse): + worker.create_transcription( + element=elt, text="i am a line", type="line", score=0.42, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/" + ) + + +def test_create_transcription(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/", + status=200, + ) + + worker.create_transcription( + element=elt, text="i am a line", type="line", score=0.42, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/" + ) + + +def test_create_classification_wrong_element(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=None, ml_class="a_class", confidence=0.42, high_confidence=True, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element="not element type", + ml_class="a_class", + confidence=0.42, + high_confidence=True, + ) + assert str(e.value) == "element shouldn't be null and should be of type Element" + + +def test_create_classification_wrong_ml_class(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class=None, confidence=0.42, high_confidence=True, + ) + assert str(e.value) == "ml_class shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class=1234, confidence=0.42, high_confidence=True, + ) + assert str(e.value) == "ml_class shouldn't be null and should be of type str" + + +def test_create_classification_wrong_confidence(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class="a_class", confidence=None, high_confidence=True, + ) + assert ( + str(e.value) + == "confidence shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, + ml_class="a_class", + confidence="wrong confidence", + high_confidence=True, + ) + assert ( + str(e.value) + == "confidence shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class="a_class", confidence=0, high_confidence=True, + ) + assert ( + str(e.value) + == "confidence shouldn't be null and should be a float in [0..1] range" + ) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class="a_class", confidence=2.00, high_confidence=True, + ) + assert ( + str(e.value) + == "confidence shouldn't be null and should be a float in [0..1] range" + ) + + +def test_create_classification_wrong_high_confidence(): + worker = ElementsWorker() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, ml_class="a_class", confidence=0.42, high_confidence=None, + ) + assert ( + str(e.value) == "high_confidence shouldn't be null and should be of type bool" + ) + + with pytest.raises(AssertionError) as e: + worker.create_classification( + element=elt, + ml_class="a_class", + confidence=0.42, + high_confidence="wrong high_confidence", + ) + assert ( + str(e.value) == "high_confidence shouldn't be null and should be of type bool" + ) + + +def test_create_classification_api_error(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + "https://arkindex.teklia.com/api/v1/classifications/", + status=500, + ) + + with pytest.raises(ErrorResponse): + worker.create_classification( + element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == "https://arkindex.teklia.com/api/v1/classifications/" + ) + + +def test_create_classification(responses): + worker = ElementsWorker() + worker.configure() + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + responses.add( + responses.POST, + "https://arkindex.teklia.com/api/v1/classifications/", + status=200, + ) + + worker.create_classification( + element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url + == "https://arkindex.teklia.com/api/v1/classifications/" + ) + + +def test_create_entity_wrong_name(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_entity( + name=None, type="person", corpus="12341234-1234-1234-1234-123412341234", + ) + assert str(e.value) == "name shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_entity( + name=1234, type="person", corpus="12341234-1234-1234-1234-123412341234", + ) + assert str(e.value) == "name shouldn't be null and should be of type str" + + +def test_create_entity_wrong_type(): + worker = ElementsWorker() + + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", type=None, corpus="12341234-1234-1234-1234-123412341234", + ) + assert str(e.value) == "type shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", type=1234, corpus="12341234-1234-1234-1234-123412341234", + ) + assert str(e.value) == "type shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", + type="not_an_entity_type", + corpus="12341234-1234-1234-1234-123412341234", + ) + assert str(e.value) == "type should be an allowed entity type" + + +def test_create_entity_wrong_corpus(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", type="person", corpus=None, + ) + assert str(e.value) == "corpus shouldn't be null and should be of type str" + + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", type="person", corpus=1234, + ) + assert str(e.value) == "corpus shouldn't be null and should be of type str" + + +def test_create_entity_wrong_metas(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", + type="person", + corpus="12341234-1234-1234-1234-123412341234", + metas="wrong metas", + ) + assert str(e.value) == "metas should be of type dict" + + +def test_create_entity_wrong_validated(): + worker = ElementsWorker() + with pytest.raises(AssertionError) as e: + worker.create_entity( + name="Bob Bob", + type="person", + corpus="12341234-1234-1234-1234-123412341234", + validated="wrong validated", + ) + assert str(e.value) == "validated should be of type bool" + + +def test_create_entity_api_error(responses): + worker = ElementsWorker() + worker.configure() + responses.add( + responses.POST, "https://arkindex.teklia.com/api/v1/entity/", status=500, + ) + + with pytest.raises(ErrorResponse): + worker.create_entity( + name="Bob Bob", + type="person", + corpus="12341234-1234-1234-1234-123412341234", + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/entity/" + ) + + +def test_create_entity(responses): + worker = ElementsWorker() + worker.configure() + responses.add( + responses.POST, "https://arkindex.teklia.com/api/v1/entity/", status=200, + ) + + worker.create_entity( + name="Bob Bob", type="person", corpus="12341234-1234-1234-1234-123412341234", + ) + + assert len(responses.calls) == 1 + assert ( + responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/entity/" + )