# -*- coding: utf-8 -*- import json import os import sys import tempfile from argparse import Namespace from uuid import UUID import pytest from apistar.exceptions import ErrorResponse from arkindex_worker.models import Element from arkindex_worker.worker import ElementsWorker, EntityType, TranscriptionType def test_cli_default(monkeypatch): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump( [ {"id": "volumeid", "type": "volume"}, {"id": "pageid", "type": "page"}, {"id": "actid", "type": "act"}, {"id": "surfaceid", "type": "surface"}, ], f, ) monkeypatch.setenv("TASK_ELEMENTS", path) worker = ElementsWorker() worker.configure() assert worker.args.elements_list.name == path assert not worker.args.element os.unlink(path) def test_cli_arg_elements_list_given(mocker): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump( [ {"id": "volumeid", "type": "volume"}, {"id": "pageid", "type": "page"}, {"id": "actid", "type": "act"}, {"id": "surfaceid", "type": "surface"}, ], f, ) mocker.patch.object(sys, "argv", ["worker", "--elements-list", path]) worker = ElementsWorker() worker.configure() assert worker.args.elements_list.name == path assert not worker.args.element os.unlink(path) def test_cli_arg_element_one_given_not_uuid(mocker): mocker.patch.object(sys, "argv", ["worker", "--element", "1234"]) worker = ElementsWorker() with pytest.raises(SystemExit): worker.configure() def test_cli_arg_element_one_given(mocker): mocker.patch.object( sys, "argv", ["worker", "--element", "12341234-1234-1234-1234-123412341234"] ) worker = ElementsWorker() worker.configure() assert worker.args.element == [UUID("12341234-1234-1234-1234-123412341234")] # elements_list is None because TASK_ELEMENTS environment variable isn't set assert not worker.args.elements_list def test_cli_arg_element_many_given(mocker): mocker.patch.object( sys, "argv", [ "worker", "--element", "12341234-1234-1234-1234-123412341234", "43214321-4321-4321-4321-432143214321", ], ) worker = ElementsWorker() worker.configure() assert worker.args.element == [ UUID("12341234-1234-1234-1234-123412341234"), UUID("43214321-4321-4321-4321-432143214321"), ] # elements_list is None because TASK_ELEMENTS environment variable isn't set assert not worker.args.elements_list def test_list_elements_elements_list_arg_wrong_type(monkeypatch): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump({}, f) monkeypatch.setenv("TASK_ELEMENTS", path) worker = ElementsWorker() worker.configure() os.unlink(path) with pytest.raises(AssertionError) as e: worker.list_elements() assert str(e.value) == "Elements list must be a list" def test_list_elements_elements_list_arg_empty_list(monkeypatch): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump([], f) monkeypatch.setenv("TASK_ELEMENTS", path) worker = ElementsWorker() worker.configure() os.unlink(path) with pytest.raises(AssertionError) as e: worker.list_elements() assert str(e.value) == "No elements in elements list" def test_list_elements_elements_list_arg_missing_id(monkeypatch): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump([{"type": "volume"}], f) monkeypatch.setenv("TASK_ELEMENTS", path) worker = ElementsWorker() worker.configure() os.unlink(path) elt_list = worker.list_elements() assert elt_list == [] def test_list_elements_elements_list_arg(monkeypatch): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump( [ {"id": "volumeid", "type": "volume"}, {"id": "pageid", "type": "page"}, {"id": "actid", "type": "act"}, {"id": "surfaceid", "type": "surface"}, ], f, ) monkeypatch.setenv("TASK_ELEMENTS", path) worker = ElementsWorker() worker.configure() os.unlink(path) elt_list = worker.list_elements() assert elt_list == ["volumeid", "pageid", "actid", "surfaceid"] def test_list_elements_element_arg(mocker): mocker.patch( "arkindex_worker.worker.argparse.ArgumentParser.parse_args", return_value=Namespace( element=["volumeid", "pageid"], verbose=False, elements_list=None ), ) worker = ElementsWorker() worker.configure() elt_list = worker.list_elements() assert elt_list == ["volumeid", "pageid"] def test_list_elements_both_args_error(mocker): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump( [ {"id": "volumeid", "type": "volume"}, {"id": "pageid", "type": "page"}, {"id": "actid", "type": "act"}, {"id": "surfaceid", "type": "surface"}, ], f, ) mocker.patch( "arkindex_worker.worker.argparse.ArgumentParser.parse_args", return_value=Namespace( element=["anotherid", "againanotherid"], verbose=False, elements_list=open(path), ), ) worker = ElementsWorker() worker.configure() os.unlink(path) with pytest.raises(AssertionError) as e: worker.list_elements() assert str(e.value) == "elements-list and element CLI args shouldn't be both set" def test_create_sub_element_wrong_element(): worker = ElementsWorker() with pytest.raises(AssertionError) as e: worker.create_sub_element( element=None, type="something", name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "element shouldn't be null and should be of type Element" with pytest.raises(AssertionError) as e: worker.create_sub_element( element="not element type", type="something", name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "element shouldn't be null and should be of type Element" def test_create_sub_element_wrong_type(): worker = ElementsWorker() elt = Element({"zone": None}) with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type=None, name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "type shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type=1234, name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "type shouldn't be null and should be of type str" def test_create_sub_element_wrong_name(): worker = ElementsWorker() elt = Element({"zone": None}) with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name=None, polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "name shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name=1234, polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "name shouldn't be null and should be of type str" def test_create_sub_element_wrong_polygon(): worker = ElementsWorker() elt = Element({"zone": None}) with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="0", polygon=None, ) assert str(e.value) == "polygon shouldn't be null and should be of type list" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="O", polygon="not a polygon", ) assert str(e.value) == "polygon shouldn't be null and should be of type list" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="O", polygon=[[1, 1], [2, 2]], ) assert str(e.value) == "polygon should have at least three points" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="O", polygon=[[1, 1, 1], [2, 2, 1], [2, 1, 1], [1, 2, 1]], ) assert str(e.value) == "polygon points should be lists of two items" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="O", polygon=[[1], [2], [2], [1]], ) assert str(e.value) == "polygon points should be lists of two items" with pytest.raises(AssertionError) as e: worker.create_sub_element( element=elt, type="something", name="O", polygon=[["not a coord", 1], [2, 2], [2, 1], [1, 2]], ) assert str(e.value) == "polygon points should be lists of two numbers" def test_create_sub_element_api_error(responses): worker = ElementsWorker() worker.configure() elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}}, } ) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/elements/create/", status=500, ) with pytest.raises(ErrorResponse): worker.create_sub_element( element=elt, type="something", name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/elements/create/" ) def test_create_sub_element(responses): worker = ElementsWorker() worker.configure() elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}}, } ) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/elements/create/", status=200, ) worker.create_sub_element( element=elt, type="something", name="0", polygon=[[1, 1], [2, 2], [2, 1], [1, 2]], ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/elements/create/" ) assert json.loads(responses.calls[0].request.body) == { "type": "something", "name": "0", "image": "22222222-2222-2222-2222-222222222222", "corpus": "11111111-1111-1111-1111-111111111111", "polygon": [[1, 1], [2, 2], [2, 1], [1, 2]], "parent": "12341234-1234-1234-1234-123412341234", "worker_version": "12341234-1234-1234-1234-123412341234", } def test_create_transcription_wrong_element(): worker = ElementsWorker() with pytest.raises(AssertionError) as e: worker.create_transcription( element=None, text="i am a line", type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "element shouldn't be null and should be of type Element" with pytest.raises(AssertionError) as e: worker.create_transcription( element="not element type", text="i am a line", type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "element shouldn't be null and should be of type Element" def test_create_transcription_wrong_type(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=None, score=0.42, ) assert ( str(e.value) == "type shouldn't be null and should be of type TranscriptionType" ) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=1234, score=0.42, ) assert ( str(e.value) == "type shouldn't be null and should be of type TranscriptionType" ) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type="not_a_transcription_type", score=0.42, ) assert ( str(e.value) == "type shouldn't be null and should be of type TranscriptionType" ) def test_create_transcription_wrong_text(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text=None, type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "text shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text=1234, type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "text shouldn't be null and should be of type str" def test_create_transcription_wrong_score(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score=None, ) assert ( str(e.value) == "score shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score="wrong score", ) assert ( str(e.value) == "score shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score=0, ) assert ( str(e.value) == "score shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score=2.00, ) assert ( str(e.value) == "score shouldn't be null and should be a float in [0..1] range" ) def test_create_transcription_api_error(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/", status=500, ) with pytest.raises(ErrorResponse): worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score=0.42, ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/" ) def test_create_transcription(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/", status=200, ) worker.create_transcription( element=elt, text="i am a line", type=TranscriptionType.Line, score=0.42, ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == f"https://arkindex.teklia.com/api/v1/element/{elt.id}/transcription/" ) assert json.loads(responses.calls[0].request.body) == { "text": "i am a line", "type": "line", "worker_version": "12341234-1234-1234-1234-123412341234", "score": 0.42, } def test_create_classification_wrong_element(): worker = ElementsWorker() with pytest.raises(AssertionError) as e: worker.create_classification( element=None, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert str(e.value) == "element shouldn't be null and should be of type Element" with pytest.raises(AssertionError) as e: worker.create_classification( element="not element type", ml_class="a_class", confidence=0.42, high_confidence=True, ) assert str(e.value) == "element shouldn't be null and should be of type Element" def test_create_classification_wrong_ml_class(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class=None, confidence=0.42, high_confidence=True, ) assert str(e.value) == "ml_class shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class=1234, confidence=0.42, high_confidence=True, ) assert str(e.value) == "ml_class shouldn't be null and should be of type str" def test_create_classification_wrong_confidence(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence=None, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence="wrong confidence", high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence=0, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence=2.00, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) def test_create_classification_wrong_high_confidence(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=None, ) assert ( str(e.value) == "high_confidence shouldn't be null and should be of type bool" ) with pytest.raises(AssertionError) as e: worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence="wrong high_confidence", ) assert ( str(e.value) == "high_confidence shouldn't be null and should be of type bool" ) def test_create_classification_api_error(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/classifications/", status=500, ) with pytest.raises(ErrorResponse): worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/classifications/" ) def test_create_classification(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/classifications/", status=200, ) worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/classifications/" ) assert json.loads(responses.calls[0].request.body) == { "element": "12341234-1234-1234-1234-123412341234", "ml_class": "a_class", "worker_version": "12341234-1234-1234-1234-123412341234", "confidence": 0.42, "high_confidence": True, } def test_create_entity_wrong_element(): worker = ElementsWorker() with pytest.raises(AssertionError) as e: worker.create_entity( element="not element type", name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "element shouldn't be null and should be of type Element" with pytest.raises(AssertionError) as e: worker.create_entity( element="not element type", name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "element shouldn't be null and should be of type Element" def test_create_entity_wrong_name(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name=None, type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "name shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name=1234, type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "name shouldn't be null and should be of type str" def test_create_entity_wrong_type(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=None, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "type shouldn't be null and should be of type EntityType" with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=1234, corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "type shouldn't be null and should be of type EntityType" with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type="not_an_entity_type", corpus="12341234-1234-1234-1234-123412341234", ) assert str(e.value) == "type shouldn't be null and should be of type EntityType" def test_create_entity_wrong_corpus(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus=None, ) assert str(e.value) == "corpus shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus=1234, ) assert str(e.value) == "corpus shouldn't be null and should be of type str" def test_create_entity_wrong_metas(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", metas="wrong metas", ) assert str(e.value) == "metas should be of type dict" def test_create_entity_wrong_validated(): worker = ElementsWorker() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError) as e: worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", validated="wrong validated", ) assert str(e.value) == "validated should be of type bool" def test_create_entity_api_error(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/entity/", status=500, ) with pytest.raises(ErrorResponse): worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/entity/" ) def test_create_entity(responses): worker = ElementsWorker() worker.configure() elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, "https://arkindex.teklia.com/api/v1/entity/", status=200, json={"id": "12345678-1234-1234-1234-123456789123"}, ) worker.create_entity( element=elt, name="Bob Bob", type=EntityType.Person, corpus="12341234-1234-1234-1234-123412341234", ) assert len(responses.calls) == 1 assert ( responses.calls[0].request.url == "https://arkindex.teklia.com/api/v1/entity/" ) assert json.loads(responses.calls[0].request.body) == { "name": "Bob Bob", "type": "person", "metas": None, "validated": None, "corpus": "12341234-1234-1234-1234-123412341234", "worker_version": "12341234-1234-1234-1234-123412341234", }