From f08a8e4265e9da7c51fbb1ac1ebe99d4eeb29ce8 Mon Sep 17 00:00:00 2001 From: Yoann Schneider <yschneider@teklia.com> Date: Thu, 5 Oct 2023 15:00:59 +0000 Subject: [PATCH] More explicit error message when corpus_id is not set in environment --- arkindex_worker/worker/base.py | 21 ++++++++++++++------- tests/conftest.py | 9 +++++++++ tests/test_base_worker.py | 24 +++++++++++++++++++++++- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/arkindex_worker/worker/base.py b/arkindex_worker/worker/base.py index 4a2a93e8..e4252287 100644 --- a/arkindex_worker/worker/base.py +++ b/arkindex_worker/worker/base.py @@ -140,7 +140,7 @@ class BaseWorker(object): self.process_information = None # corpus_id will be updated in configure() using the worker_run's corpus # or in configure_for_developers() from the environment - self.corpus_id = None + self._corpus_id = None self.user_configuration = {} self.model_configuration = {} self.support_cache = support_cache @@ -155,6 +155,17 @@ class BaseWorker(object): # Define API Client self.setup_api_client() + @property + def corpus_id(self) -> str: + """ + ID of the corpus on which the worker is executed. + Has to be set through the `ARKINDEX_CORPUS_ID` variable in **read-only** mode. + Raises an Exception when trying to access when unset. + """ + if not self._corpus_id: + raise Exception("Missing ARKINDEX_CORPUS_ID environment variable") + return self._corpus_id + @property def is_read_only(self) -> bool: """ @@ -199,11 +210,7 @@ class BaseWorker(object): logger.warning("Running without any extra configuration") # Define corpus_id from environment - self.corpus_id = os.environ.get("ARKINDEX_CORPUS_ID") - if not self.corpus_id: - logger.warning( - "'ARKINDEX_CORPUS_ID' was not set in the environment. Any API request involving a `corpus_id` will fail." - ) + self._corpus_id = os.environ.get("ARKINDEX_CORPUS_ID") # Define model_version_id from environment self.model_version_id = os.environ.get("ARKINDEX_MODEL_VERSION_ID") @@ -229,7 +236,7 @@ class BaseWorker(object): self.process_information = worker_run["process"] # Load corpus id - self.corpus_id = worker_run["process"]["corpus"] + self._corpus_id = worker_run["process"]["corpus"] # Load worker version information worker_version = worker_run["worker_version"] diff --git a/tests/conftest.py b/tests/conftest.py index d41e242d..f4209660 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -211,6 +211,15 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api): return worker +@pytest.fixture +def mock_elements_worker_read_only(monkeypatch): + """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest""" + monkeypatch.setattr(sys, "argv", ["worker", "--dev"]) + worker = ElementsWorker() + worker.configure() + return worker + + @pytest.fixture def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker): """ diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py index d229944a..d9fb77e9 100644 --- a/tests/test_base_worker.py +++ b/tests/test_base_worker.py @@ -10,7 +10,7 @@ import pytest from arkindex.mock import MockApiClient from arkindex_worker import logger -from arkindex_worker.worker import BaseWorker +from arkindex_worker.worker import BaseWorker, ElementsWorker from arkindex_worker.worker.base import ModelNotFoundError from tests.conftest import FIXTURES_DIR @@ -739,3 +739,25 @@ def test_extract_parent_archives(tmp_path): ) mode = "rb" if extracted_file.suffix == ".png" else "r" assert extracted_file.open(mode).read() == expected_file.open(mode).read() + + +def test_corpus_id_not_set_read_only_mode( + mock_elements_worker_read_only: ElementsWorker, +): + mock_elements_worker_read_only.configure() + + with pytest.raises( + Exception, match="Missing ARKINDEX_CORPUS_ID environment variable" + ): + mock_elements_worker_read_only.corpus_id + + +def test_corpus_id_set_read_only_mode( + monkeypatch, mock_elements_worker_read_only: ElementsWorker +): + corpus_id = str(uuid.uuid4()) + monkeypatch.setenv("ARKINDEX_CORPUS_ID", corpus_id) + + mock_elements_worker_read_only.configure() + + assert mock_elements_worker_read_only.corpus_id == corpus_id -- GitLab