From 35caa7ad2bd4e66ec840a4e86343aaa153976c76 Mon Sep 17 00:00:00 2001
From: NolanB <nboukachab@teklia.com>
Date: Fri, 2 Sep 2022 17:26:15 +0200
Subject: [PATCH] Replace ARKINDEX_CORPUS_ID .env variable with an attribute of
 base_worker corpus_id

---
 arkindex_worker/worker/base.py              |  4 ++++
 arkindex_worker/worker/classification.py    |  5 +----
 arkindex_worker/worker/element.py           |  1 +
 arkindex_worker/worker/entity.py            |  4 +---
 tests/conftest.py                           |  2 --
 tests/test_base_worker.py                   | 25 ++++++++++++++++-----
 tests/test_elements_worker/test_entities.py |  6 ++---
 7 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/arkindex_worker/worker/base.py b/arkindex_worker/worker/base.py
index f0a571da..a893689c 100644
--- a/arkindex_worker/worker/base.py
+++ b/arkindex_worker/worker/base.py
@@ -119,6 +119,7 @@ class BaseWorker(object):
         logger.info(f"Worker will use {self.work_dir} as working directory")
 
         self.process_information = None
+        self.corpus_id = None
         self.user_configuration = {}
         self.support_cache = support_cache
         # use_cache will be updated in configure() if the cache is supported and if there
@@ -188,6 +189,9 @@ class BaseWorker(object):
         # Load process information
         self.process_information = worker_run["process"]
 
+        # Load corpus id
+        self.corpus_id = worker_run["process"]["corpus"]
+
         # Load worker version information
         worker_version = worker_run["worker_version"]
         self.worker_details = worker_version["worker"]
diff --git a/arkindex_worker/worker/classification.py b/arkindex_worker/worker/classification.py
index c9fdd086..5cdb63e6 100644
--- a/arkindex_worker/worker/classification.py
+++ b/arkindex_worker/worker/classification.py
@@ -2,9 +2,6 @@
 """
 ElementsWorker methods for classifications and ML classes.
 """
-
-import os
-
 from apistar.exceptions import ErrorResponse
 from peewee import IntegrityError
 
@@ -45,7 +42,7 @@ class ClassificationMixin(object):
         :returns str: ID of the retrieved or created MLClass.
         """
         if corpus_id is None:
-            corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
+            corpus_id = self.corpus_id
 
         if not self.classes.get(corpus_id):
             self.load_corpus_classes(corpus_id)
diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py
index bf263e5c..daa53433 100644
--- a/arkindex_worker/worker/element.py
+++ b/arkindex_worker/worker/element.py
@@ -66,6 +66,7 @@ class ElementMixin(object):
         ), "Element type slugs must be strings."
 
         corpus = Corpus(self.request("RetrieveCorpus", id=corpus_id))
+        # corpus = self.corpus_id
         available_slugs = {element_type.slug for element_type in corpus.types}
         missing_slugs = set(type_slugs) - available_slugs
 
diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py
index 7b561c3e..b0b52903 100644
--- a/arkindex_worker/worker/entity.py
+++ b/arkindex_worker/worker/entity.py
@@ -2,8 +2,6 @@
 """
 ElementsWorker methods for entities.
 """
-
-import os
 from enum import Enum
 
 from peewee import IntegrityError
@@ -49,7 +47,7 @@ class EntityMixin(object):
         :type corpus: str or None
         """
         if corpus is None:
-            corpus = os.environ.get("ARKINDEX_CORPUS_ID")
+            corpus = self.corpus_id
 
         assert element and isinstance(
             element, (Element, CachedElement)
diff --git a/tests/conftest.py b/tests/conftest.py
index f00ae5e7..70357545 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -106,7 +106,6 @@ def give_env_variable(request, monkeypatch):
     """Defines required environment variables"""
     monkeypatch.setenv("WORKER_VERSION_ID", "12341234-1234-1234-1234-123412341234")
     monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")
-    monkeypatch.setenv("ARKINDEX_CORPUS_ID", "11111111-1111-1111-1111-111111111111")
 
 
 @pytest.fixture
@@ -207,7 +206,6 @@ def mock_activity_calls(responses):
 def mock_elements_worker(monkeypatch, mock_worker_run_api):
     """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
     monkeypatch.setattr(sys, "argv", ["worker"])
-
     worker = ElementsWorker()
     worker.configure()
     return worker
diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py
index a3262ee9..90b89736 100644
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -194,7 +194,10 @@ def test_configure_worker_run(mocker, monkeypatch, responses):
             "configuration": {"configuration": {}},
         },
         "configuration": user_configuration,
-        "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"},
+        "process": {
+            "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
+            "corpus": "11111111-1111-1111-1111-111111111111",
+        },
     }
 
     responses.add(
@@ -257,7 +260,10 @@ def test_configure_user_configuration_defaults(
                 "param_5": True,
             },
         },
-        "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"},
+        "process": {
+            "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
+            "corpus": "11111111-1111-1111-1111-111111111111",
+        },
     }
     responses.add(
         responses.GET,
@@ -310,7 +316,10 @@ def test_configure_user_config_debug(mocker, monkeypatch, responses, debug):
             "name": "BBB",
             "configuration": {"debug": debug},
         },
-        "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"},
+        "process": {
+            "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
+            "corpus": "11111111-1111-1111-1111-111111111111",
+        },
     }
     responses.add(
         responses.GET,
@@ -357,7 +366,10 @@ def test_configure_worker_run_missing_conf(mocker, monkeypatch, responses):
             "configuration": {"configuration": {}},
         },
         "configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
-        "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"},
+        "process": {
+            "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
+            "corpus": "11111111-1111-1111-1111-111111111111",
+        },
     }
     responses.add(
         responses.GET,
@@ -404,7 +416,10 @@ def test_configure_worker_run_no_worker_run_conf(mocker, monkeypatch, responses)
             "configuration": {},
         },
         "configuration": None,
-        "process": {"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"},
+        "process": {
+            "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
+            "corpus": "11111111-1111-1111-1111-111111111111",
+        },
     }
     responses.add(
         responses.GET,
diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py
index 57ea05af..71ed34ab 100644
--- a/tests/test_elements_worker/test_entities.py
+++ b/tests/test_elements_worker/test_entities.py
@@ -101,7 +101,6 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker):
     elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
 
     # Triggering an error on metas param, not giving corpus should work since
-    # ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_entity(
             element=elt,
@@ -111,8 +110,8 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker):
         )
     assert str(e.value) == "metas should be of type dict"
 
-    # Removing ARKINDEX_CORPUS_ID environment variable should give an error when corpus=None
-    monkeypatch.delenv("ARKINDEX_CORPUS_ID")
+    # # Removing corpus_id variable should give an error when corpus=None
+    mock_elements_worker.corpus_id = None
     with pytest.raises(AssertionError) as e:
         mock_elements_worker.create_entity(
             element=elt,
@@ -130,6 +129,7 @@ def test_create_entity_wrong_corpus(monkeypatch, mock_elements_worker):
             corpus=1234,
         )
     assert str(e.value) == "corpus shouldn't be null and should be of type str"
+    mock_elements_worker.corpus_id = ("11111111-1111-1111-1111-111111111111",)
 
 
 def test_create_entity_wrong_metas(mock_elements_worker):
-- 
GitLab