# -*- coding: utf-8 -*- import json import pytest from apistar.exceptions import ErrorResponse from arkindex_worker.models import Element def test_get_ml_class_id_load_classes(responses, mock_elements_worker): corpus_id = "12341234-1234-1234-1234-123412341234" responses.add( responses.GET, f"http://testserver/api/v1/corpus/{corpus_id}/classes/", status=200, json={ "count": 1, "next": None, "results": [ { "id": "0000", "name": "good", "nb_best": 0, } ], }, ) assert not mock_elements_worker.classes ml_class_id = mock_elements_worker.get_ml_class_id(corpus_id, "good") assert len(responses.calls) == 3 assert [call.request.url for call in responses.calls] == [ "http://testserver/api/v1/user/", "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", f"http://testserver/api/v1/corpus/{corpus_id}/classes/", ] assert mock_elements_worker.classes == { "12341234-1234-1234-1234-123412341234": {"good": "0000"} } assert ml_class_id == "0000" def test_get_ml_class_id_inexistant_class(mock_elements_worker, responses): # A missing class is now created automatically corpus_id = "12341234-1234-1234-1234-123412341234" mock_elements_worker.classes = { "12341234-1234-1234-1234-123412341234": {"good": "0000"} } responses.add( responses.POST, f"http://testserver/api/v1/corpus/{corpus_id}/classes/", status=201, json={"id": "new-ml-class-1234"}, ) # Missing class at first assert mock_elements_worker.classes == { "12341234-1234-1234-1234-123412341234": {"good": "0000"} } ml_class_id = mock_elements_worker.get_ml_class_id(corpus_id, "bad") assert ml_class_id == "new-ml-class-1234" # Now it's available assert mock_elements_worker.classes == { "12341234-1234-1234-1234-123412341234": { "good": "0000", "bad": "new-ml-class-1234", } } def test_get_ml_class_id(mock_elements_worker): corpus_id = "12341234-1234-1234-1234-123412341234" mock_elements_worker.classes = { "12341234-1234-1234-1234-123412341234": {"good": "0000"} } ml_class_id = mock_elements_worker.get_ml_class_id(corpus_id, "good") assert ml_class_id == "0000" def test_get_ml_class_reload(responses, mock_elements_worker): corpus_id = "12341234-1234-1234-1234-123412341234" # Add some initial classes responses.add( responses.GET, f"http://testserver/api/v1/corpus/{corpus_id}/classes/", json={ "count": 1, "next": None, "results": [ { "id": "class1_id", "name": "class1", } ], }, ) # Invalid response when trying to create class2 responses.add( responses.POST, f"http://testserver/api/v1/corpus/{corpus_id}/classes/", status=400, json={"non_field_errors": "Already exists"}, ) # Add both classes (class2 is created by another process) responses.add( responses.GET, f"http://testserver/api/v1/corpus/{corpus_id}/classes/", json={ "count": 2, "next": None, "results": [ { "id": "class1_id", "name": "class1", }, { "id": "class2_id", "name": "class2", }, ], }, ) # Simply request class 2, it should be reloaded assert mock_elements_worker.get_ml_class_id(corpus_id, "class2") == "class2_id" assert len(responses.calls) == 5 assert mock_elements_worker.classes == { corpus_id: { "class1": "class1_id", "class2": "class2_id", } } assert [(call.request.method, call.request.url) for call in responses.calls] == [ ("GET", "http://testserver/api/v1/user/"), ( "GET", "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", ), ("GET", f"http://testserver/api/v1/corpus/{corpus_id}/classes/"), ("POST", f"http://testserver/api/v1/corpus/{corpus_id}/classes/"), ("GET", f"http://testserver/api/v1/corpus/{corpus_id}/classes/"), ] def test_create_classification_wrong_element(mock_elements_worker): with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=None, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert str(e.value) == "element shouldn't be null and should be of type Element" with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element="not element type", ml_class="a_class", confidence=0.42, high_confidence=True, ) assert str(e.value) == "element shouldn't be null and should be of type Element" def test_create_classification_wrong_ml_class(mock_elements_worker, responses): elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class=None, confidence=0.42, high_confidence=True, ) assert str(e.value) == "ml_class shouldn't be null and should be of type str" with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class=1234, confidence=0.42, high_confidence=True, ) assert str(e.value) == "ml_class shouldn't be null and should be of type str" # Automatically create a missing class ! responses.add( responses.POST, "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/", status=201, json={"id": "new-ml-class-1234"}, ) responses.add( responses.POST, "http://testserver/api/v1/classifications/", status=201, json={"id": "new-classification-1234"}, ) mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"another_class": "0000"} } mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) # Check a class & classification has been created for call in responses.calls: print(call.request.url, call.request.body) assert [ (call.request.url, json.loads(call.request.body)) for call in responses.calls[-2:] ] == [ ( "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/", {"name": "a_class"}, ), ( "http://testserver/api/v1/classifications/", { "element": "12341234-1234-1234-1234-123412341234", "ml_class": "new-ml-class-1234", "worker_version": "12341234-1234-1234-1234-123412341234", "confidence": 0.42, "high_confidence": True, }, ), ] def test_create_classification_wrong_confidence(mock_elements_worker): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=None, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence="wrong confidence", high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=2.00, high_confidence=True, ) assert ( str(e.value) == "confidence shouldn't be null and should be a float in [0..1] range" ) def test_create_classification_wrong_high_confidence(mock_elements_worker): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=None, ) assert ( str(e.value) == "high_confidence shouldn't be null and should be of type bool" ) with pytest.raises(AssertionError) as e: mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence="wrong high_confidence", ) assert ( str(e.value) == "high_confidence shouldn't be null and should be of type bool" ) def test_create_classification_api_error(responses, mock_elements_worker): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) responses.add( responses.POST, "http://testserver/api/v1/classifications/", status=500, ) with pytest.raises(ErrorResponse): mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert len(responses.calls) == 7 assert [call.request.url for call in responses.calls] == [ "http://testserver/api/v1/user/", "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", # We retry 5 times the API call "http://testserver/api/v1/classifications/", "http://testserver/api/v1/classifications/", "http://testserver/api/v1/classifications/", "http://testserver/api/v1/classifications/", "http://testserver/api/v1/classifications/", ] def test_create_classification(responses, mock_elements_worker): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) responses.add( responses.POST, "http://testserver/api/v1/classifications/", status=200, ) mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert len(responses.calls) == 3 assert [call.request.url for call in responses.calls] == [ "http://testserver/api/v1/user/", "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", "http://testserver/api/v1/classifications/", ] assert json.loads(responses.calls[2].request.body) == { "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", "confidence": 0.42, "high_confidence": True, } # Classification has been created and reported assert mock_elements_worker.report.report_data["elements"][elt.id][ "classifications" ] == {"a_class": 1} def test_create_classification_duplicate(responses, mock_elements_worker): mock_elements_worker.classes = { "11111111-1111-1111-1111-111111111111": {"a_class": "0000"} } elt = Element( { "id": "12341234-1234-1234-1234-123412341234", "corpus": {"id": "11111111-1111-1111-1111-111111111111"}, } ) responses.add( responses.POST, "http://testserver/api/v1/classifications/", status=400, json={ "non_field_errors": [ "The fields element, worker_version, ml_class must make a unique set." ] }, ) mock_elements_worker.create_classification( element=elt, ml_class="a_class", confidence=0.42, high_confidence=True, ) assert len(responses.calls) == 3 assert [call.request.url for call in responses.calls] == [ "http://testserver/api/v1/user/", "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", "http://testserver/api/v1/classifications/", ] assert json.loads(responses.calls[2].request.body) == { "element": "12341234-1234-1234-1234-123412341234", "ml_class": "0000", "worker_version": "12341234-1234-1234-1234-123412341234", "confidence": 0.42, "high_confidence": True, } # Classification has NOT been created assert mock_elements_worker.report.report_data["elements"] == {}