From f08a8e4265e9da7c51fbb1ac1ebe99d4eeb29ce8 Mon Sep 17 00:00:00 2001
From: Yoann Schneider <yschneider@teklia.com>
Date: Thu, 5 Oct 2023 15:00:59 +0000
Subject: [PATCH] More explicit error message when corpus_id is not set in
 environment

---
 arkindex_worker/worker/base.py | 21 ++++++++++++++-------
 tests/conftest.py              |  9 +++++++++
 tests/test_base_worker.py      | 24 +++++++++++++++++++++++-
 3 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/arkindex_worker/worker/base.py b/arkindex_worker/worker/base.py
index 4a2a93e8..e4252287 100644
--- a/arkindex_worker/worker/base.py
+++ b/arkindex_worker/worker/base.py
@@ -140,7 +140,7 @@ class BaseWorker(object):
         self.process_information = None
         # corpus_id will be updated in configure() using the worker_run's corpus
         # or in configure_for_developers() from the environment
-        self.corpus_id = None
+        self._corpus_id = None
         self.user_configuration = {}
         self.model_configuration = {}
         self.support_cache = support_cache
@@ -155,6 +155,17 @@ class BaseWorker(object):
         # Define API Client
         self.setup_api_client()
 
+    @property
+    def corpus_id(self) -> str:
+        """
+        ID of the corpus on which the worker is executed.
+        Has to be set through the `ARKINDEX_CORPUS_ID` variable in **read-only** mode.
+        Raises an Exception when trying to access when unset.
+        """
+        if not self._corpus_id:
+            raise Exception("Missing ARKINDEX_CORPUS_ID environment variable")
+        return self._corpus_id
+
     @property
     def is_read_only(self) -> bool:
         """
@@ -199,11 +210,7 @@ class BaseWorker(object):
             logger.warning("Running without any extra configuration")
 
         # Define corpus_id from environment
-        self.corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
-        if not self.corpus_id:
-            logger.warning(
-                "'ARKINDEX_CORPUS_ID' was not set in the environment. Any API request involving a `corpus_id` will fail."
-            )
+        self._corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
 
         # Define model_version_id from environment
         self.model_version_id = os.environ.get("ARKINDEX_MODEL_VERSION_ID")
@@ -229,7 +236,7 @@ class BaseWorker(object):
         self.process_information = worker_run["process"]
 
         # Load corpus id
-        self.corpus_id = worker_run["process"]["corpus"]
+        self._corpus_id = worker_run["process"]["corpus"]
 
         # Load worker version information
         worker_version = worker_run["worker_version"]
diff --git a/tests/conftest.py b/tests/conftest.py
index d41e242d..f4209660 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -211,6 +211,15 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
     return worker
 
 
+@pytest.fixture
+def mock_elements_worker_read_only(monkeypatch):
+    """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
+    monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
+    worker = ElementsWorker()
+    worker.configure()
+    return worker
+
+
 @pytest.fixture
 def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
     """
diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py
index d229944a..d9fb77e9 100644
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -10,7 +10,7 @@ import pytest
 
 from arkindex.mock import MockApiClient
 from arkindex_worker import logger
-from arkindex_worker.worker import BaseWorker
+from arkindex_worker.worker import BaseWorker, ElementsWorker
 from arkindex_worker.worker.base import ModelNotFoundError
 from tests.conftest import FIXTURES_DIR
 
@@ -739,3 +739,25 @@ def test_extract_parent_archives(tmp_path):
         )
         mode = "rb" if extracted_file.suffix == ".png" else "r"
         assert extracted_file.open(mode).read() == expected_file.open(mode).read()
+
+
+def test_corpus_id_not_set_read_only_mode(
+    mock_elements_worker_read_only: ElementsWorker,
+):
+    mock_elements_worker_read_only.configure()
+
+    with pytest.raises(
+        Exception, match="Missing ARKINDEX_CORPUS_ID environment variable"
+    ):
+        mock_elements_worker_read_only.corpus_id
+
+
+def test_corpus_id_set_read_only_mode(
+    monkeypatch, mock_elements_worker_read_only: ElementsWorker
+):
+    corpus_id = str(uuid.uuid4())
+    monkeypatch.setenv("ARKINDEX_CORPUS_ID", corpus_id)
+
+    mock_elements_worker_read_only.configure()
+
+    assert mock_elements_worker_read_only.corpus_id == corpus_id
-- 
GitLab