diff --git a/arkindex_worker/worker/base.py b/arkindex_worker/worker/base.py index f0a571da95e2a209906e361ce498b1d9e4fa505f..a893689c4d51e291a1c6c19ca704f6195f6e0136 100644 --- a/arkindex_worker/worker/base.py +++ b/arkindex_worker/worker/base.py @@ -119,6 +119,7 @@ class BaseWorker(object): logger.info(f"Worker will use {self.work_dir} as working directory") self.process_information = None + self.corpus_id = None self.user_configuration = {} self.support_cache = support_cache # use_cache will be updated in configure() if the cache is supported and if there @@ -188,6 +189,9 @@ class BaseWorker(object): # Load process information self.process_information = worker_run["process"] + # Load corpus id + self.corpus_id = worker_run["process"]["corpus"] + # Load worker version information worker_version = worker_run["worker_version"] self.worker_details = worker_version["worker"] diff --git a/arkindex_worker/worker/classification.py b/arkindex_worker/worker/classification.py index c9fdd086bf33939fefcf9574e64183fe4703c818..5cdb63e669d38ac8c720dc9a0aa068b5c6778933 100644 --- a/arkindex_worker/worker/classification.py +++ b/arkindex_worker/worker/classification.py @@ -2,9 +2,6 @@ """ ElementsWorker methods for classifications and ML classes. """ - -import os - from apistar.exceptions import ErrorResponse from peewee import IntegrityError @@ -45,7 +42,7 @@ class ClassificationMixin(object): :returns str: ID of the retrieved or created MLClass. """ if corpus_id is None: - corpus_id = os.environ.get("ARKINDEX_CORPUS_ID") + corpus_id = self.corpus_id if not self.classes.get(corpus_id): self.load_corpus_classes(corpus_id) diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py index bf263e5c0947d9822def002e15c4d84ff1ee2420..daa53433c1e83e809d9d0884ee878d07351bc350 100644 --- a/arkindex_worker/worker/element.py +++ b/arkindex_worker/worker/element.py @@ -66,6 +66,7 @@ class ElementMixin(object): ), "Element type slugs must be strings." corpus = Corpus(self.request("RetrieveCorpus", id=corpus_id)) + # corpus = self.corpus_id available_slugs = {element_type.slug for element_type in corpus.types} missing_slugs = set(type_slugs) - available_slugs diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index 7b561c3ebea5bd566ca378d3275ec015698b0537..b0b529038c35a9a0925b2cf25054aea134741d15 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -2,8 +2,6 @@ """ ElementsWorker methods for entities. """ - -import os from enum import Enum from peewee import IntegrityError @@ -49,7 +47,7 @@ class EntityMixin(object): :type corpus: str or None """ if corpus is None: - corpus = os.environ.get("ARKINDEX_CORPUS_ID") + corpus = self.corpus_id assert element and isinstance( element, (Element, CachedElement) diff --git a/tests/conftest.py b/tests/conftest.py index f00ae5e75571a79b20588939cedd1415c407bb25..703575452a5b784f597e4b6a72f83a2be290d866 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -106,7 +106,6 @@ def give_env_variable(request, monkeypatch): """Defines required environment variables""" monkeypatch.setenv("WORKER_VERSION_ID", "12341234-1234-1234-1234-123412341234") monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678") - monkeypatch.setenv("ARKINDEX_CORPUS_ID", "11111111-1111-1111-1111-111111111111") @pytest.fixture @@ -207,7 +206,6 @@ def mock_activity_calls(responses): def mock_elements_worker(monkeypatch, mock_worker_run_api): """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest""" monkeypatch.setattr(sys, "argv", ["worker"]) - worker = ElementsWorker() worker.configure() return worker diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py index a3262ee949979b3f8bc5b59fb09c8fbbe079f716..90b89736dd153760e27b5aa392dacc48d433854c 100644 --- a/tests/test_base_worker.py +++ b/tests/test_base_worker.py @@ -194,7 +194,10 @@ def test_configure_worker_run(mocker, monkeypatch, responses): "configuration": {"configuration": {}}, }, "configuration": user_configuration, - "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"}, + "process": { + "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff", + "corpus": "11111111-1111-1111-1111-111111111111", + }, } responses.add( @@ -257,7 +260,10 @@ def test_configure_user_configuration_defaults( "param_5": True, }, }, - "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"}, + "process": { + "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff", + "corpus": "11111111-1111-1111-1111-111111111111", + }, } responses.add( responses.GET, @@ -310,7 +316,10 @@ def test_configure_user_config_debug(mocker, monkeypatch, responses, debug): "name": "BBB", "configuration": {"debug": debug}, }, - "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"}, + "process": { + "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff", + "corpus": "11111111-1111-1111-1111-111111111111", + }, } responses.add( responses.GET, @@ -357,7 +366,10 @@ def test_configure_worker_run_missing_conf(mocker, monkeypatch, responses): "configuration": {"configuration": {}}, }, "configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"}, - "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"}, + "process": { + "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff", + "corpus": "11111111-1111-1111-1111-111111111111", + }, } responses.add( responses.GET, @@ -404,7 +416,10 @@ def test_configure_worker_run_no_worker_run_conf(mocker, monkeypatch, responses) "configuration": {}, }, "configuration": None, - "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"}, + "process": { + "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff", + "corpus": "11111111-1111-1111-1111-111111111111", + }, } responses.add( responses.GET, diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index 57ea05af869c63bb4859b37dbf536e525d3aa1e0..71ed34ab29e5e6f18ddcab31d94165125e0a23bb 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -101,7 +101,6 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) # Triggering an error on metas param, not giving corpus should work since - # ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( element=elt, @@ -111,8 +110,8 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker): ) assert str(e.value) == "metas should be of type dict" - # Removing ARKINDEX_CORPUS_ID environment variable should give an error when corpus=None - monkeypatch.delenv("ARKINDEX_CORPUS_ID") + # # Removing corpus_id variable should give an error when corpus=None + mock_elements_worker.corpus_id = None with pytest.raises(AssertionError) as e: mock_elements_worker.create_entity( element=elt, @@ -130,6 +129,7 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker): corpus=1234, ) assert str(e.value) == "corpus shouldn't be null and should be of type str" + mock_elements_worker.corpus_id = ("11111111-1111-1111-1111-111111111111",) def test_create_entity_wrong_metas(mock_elements_worker):