diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py index b25e8c5160f04aedf99eca997761dba0d5654d0e..32b9694978924e2d479b67b376cb406900ed7c50 100644 --- a/arkindex_worker/worker/element.py +++ b/arkindex_worker/worker/element.py @@ -8,7 +8,34 @@ from arkindex_worker.cache import CachedElement, CachedImage from arkindex_worker.models import Element +class MissingTypeError(Exception): + """ + A required element type was not found in a corpus. + """ + + class ElementMixin(object): + def check_required_types(self, corpus_id: str, *type_slugs: str) -> bool: + """ + Check that a corpus has a list of required element types, + and raise an exception if any of them are missing. + """ + assert len(type_slugs), "At least one element type slug is required." + assert all( + isinstance(slug, str) for slug in type_slugs + ), "Element type slugs must be strings." + + corpus = self.request("RetrieveCorpus", id=corpus_id) + available_slugs = {element_type["slug"] for element_type in corpus["types"]} + missing_slugs = set(type_slugs) - available_slugs + + if missing_slugs: + raise MissingTypeError( + f'Element type(s) {", ".join(missing_slugs)} were not found in the {corpus["name"]} corpus ({corpus["id"]}).' + ) + + return True + def create_sub_element(self, element, type, name, polygon): """ Create a child element on the given element through API diff --git a/tests/test_elements_worker/test_elements.py b/tests/test_elements_worker/test_elements.py index ddb49f3a6e6c3fcc1808236e2b233ef57958a4b7..d17a4a5a6d83443d559cbc244618d9b27d40ce44 100644 --- a/tests/test_elements_worker/test_elements.py +++ b/tests/test_elements_worker/test_elements.py @@ -9,10 +9,53 @@ from apistar.exceptions import ErrorResponse from arkindex_worker.cache import CachedElement, CachedImage from arkindex_worker.models import Element from arkindex_worker.worker import ElementsWorker +from arkindex_worker.worker.element import MissingTypeError from . import BASE_API_CALLS +def test_check_required_types_argument_types(mock_elements_worker): + corpus_id = "12341234-1234-1234-1234-123412341234" + worker = ElementsWorker() + + with pytest.raises(AssertionError) as e: + worker.check_required_types(corpus_id) + assert str(e.value) == "At least one element type slug is required." + + with pytest.raises(AssertionError) as e: + worker.check_required_types(corpus_id, "lol", 42) + assert str(e.value) == "Element type slugs must be strings." + + +def test_check_required_types(monkeypatch, tmp_path, mock_elements_worker, responses): + elements_path = tmp_path / "elements.json" + elements_path.write_text("[]") + monkeypatch.setenv("TASK_ELEMENTS", str(elements_path)) + + corpus_id = "12341234-1234-1234-1234-123412341234" + responses.add( + responses.GET, + f"http://testserver/api/v1/corpus/{corpus_id}/", + json={ + "id": corpus_id, + "name": "Some Corpus", + "types": [{"slug": "folder"}, {"slug": "page"}], + }, + ) + worker = ElementsWorker() + worker.configure() + + assert worker.check_required_types(corpus_id, "page") + assert worker.check_required_types(corpus_id, "page", "folder") + + with pytest.raises(MissingTypeError) as e: + assert worker.check_required_types(corpus_id, "page", "text_line", "act") + assert ( + str(e.value) + == "Element type(s) text_line, act were not found in the Some Corpus corpus (12341234-1234-1234-1234-123412341234)." + ) + + def test_list_elements_elements_list_arg_wrong_type( monkeypatch, tmp_path, mock_elements_worker ):