diff --git a/arkindex_worker/cache.py b/arkindex_worker/cache.py index 89014daa77e51f1dbff6c376f06bdd3bad2c9c1d..dbcb8dd210af40aae3b17064e58f163b1f02cc4a 100644 --- a/arkindex_worker/cache.py +++ b/arkindex_worker/cache.py @@ -10,12 +10,15 @@ SQL_ELEMENTS_TABLE_CREATION = """CREATE TABLE IF NOT EXISTS elements ( name TEXT NOT NULL, type TEXT NOT NULL, polygon TEXT, + initial BOOLEAN DEFAULT 0 NOT NULL, worker_version_id VARCHAR(32) )""" CachedElement = namedtuple( - "CachedElement", ["id", "parent_id", "name", "type", "polygon", "worker_version_id"] + "CachedElement", + ["id", "parent_id", "name", "type", "polygon", "worker_version_id", "initial"], + defaults=[0], ) diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 5d32fff5bd981c2c48a8c390f1db237afe26467f..839ee99f9d0514f7732ac5eacd197cd0bdac27c4 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -27,7 +27,7 @@ CACHE_DIR = f"/data/{os.environ.get('TASK_ID')}" class BaseWorker(object): - def __init__(self, description="Arkindex Base Worker"): + def __init__(self, description="Arkindex Base Worker", use_cache=False): self.parser = argparse.ArgumentParser(description=description) # Setup workdir either in Ponos environment or on host's home @@ -50,13 +50,16 @@ class BaseWorker(object): logger.info(f"Worker will use {self.work_dir} as working directory") - if os.path.isdir(CACHE_DIR): - cache_path = os.path.join(CACHE_DIR, "db.sqlite") - else: - cache_path = os.path.join(os.getcwd(), "db.sqlite") + self.use_cache = use_cache + + if self.use_cache: + if os.environ.get("TASK_ID") and os.path.isdir(CACHE_DIR): + cache_path = os.path.join(CACHE_DIR, "db.sqlite") + else: + cache_path = os.path.join(os.getcwd(), "db.sqlite") - self.cache = LocalDB(cache_path) - self.cache.create_tables() + self.cache = LocalDB(cache_path) + self.cache.create_tables() @property def is_read_only(self): @@ -214,8 +217,8 @@ class ActivityState(Enum): class ElementsWorker(BaseWorker): - def __init__(self, description="Arkindex Elements Worker"): - super().__init__(description) + def __init__(self, description="Arkindex Elements Worker", use_cache=False): + super().__init__(description, use_cache) # Add report concerning elements self.report = Reporter("unknown worker") @@ -463,24 +466,25 @@ class ElementsWorker(BaseWorker): for element in elements: self.report.add_element(parent.id, element["type"]) - # Store elements in local cache - try: - parent_id_hex = convert_str_uuid_to_hex(parent.id) - worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id) - to_insert = [ - CachedElement( - id=convert_str_uuid_to_hex(created_ids[idx]["id"]), - parent_id=parent_id_hex, - name=element["name"], - type=element["type"], - polygon=json.dumps(element["polygon"]), - worker_version_id=worker_version_id_hex, - ) - for idx, element in enumerate(elements) - ] - self.cache.insert("elements", to_insert) - except sqlite3.IntegrityError as e: - logger.warning(f"Couldn't save created elements in local cache: {e}") + if self.use_cache: + # Store elements in local cache + try: + parent_id_hex = convert_str_uuid_to_hex(parent.id) + worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id) + to_insert = [ + CachedElement( + id=convert_str_uuid_to_hex(created_ids[idx]["id"]), + parent_id=parent_id_hex, + name=element["name"], + type=element["type"], + polygon=json.dumps(element["polygon"]), + worker_version_id=worker_version_id_hex, + ) + for idx, element in enumerate(elements) + ] + self.cache.insert("elements", to_insert) + except sqlite3.IntegrityError as e: + logger.warning(f"Couldn't save created elements in local cache: {e}") return created_ids diff --git a/tests/conftest.py b/tests/conftest.py index 107a2567db83c8da0c6e2fbd70bcc1babbc5c716..b7ff454b633b0313ce656b48a9fdaa738edbb849 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -164,6 +164,16 @@ def mock_elements_worker(monkeypatch, mock_worker_version_api): return worker +@pytest.fixture +def mock_elements_worker_with_cache(monkeypatch, mock_worker_version_api): + """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest""" + monkeypatch.setattr(sys, "argv", ["worker"]) + + worker = ElementsWorker(use_cache=True) + worker.configure() + return worker + + @pytest.fixture def fake_page_element(): with open(FIXTURES_DIR / "page_element.json", "r") as f: diff --git a/tests/data/cache/lines.sqlite b/tests/data/cache/lines.sqlite index 09343ef605e02b268a5e5d7d2de00c244ca1e7db..4294b9b0f3789629fb466debb0d8931f589f9c59 100644 Binary files a/tests/data/cache/lines.sqlite and b/tests/data/cache/lines.sqlite differ diff --git a/tests/data/cache/tables.sqlite b/tests/data/cache/tables.sqlite index 590b560894da841ce30465b76e2e3c11d773820c..ab1d47aeef67ef2c16d6ca3a2aec4b1b2c1cceee 100644 Binary files a/tests/data/cache/tables.sqlite and b/tests/data/cache/tables.sqlite differ diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py index 72029c4685e27b9c7fb0c1ecccb04bd39281baa2..72a0590cbc54b2b71f17652655c7a09fedc82660 100644 --- a/tests/test_base_worker.py +++ b/tests/test_base_worker.py @@ -17,7 +17,6 @@ def test_init_default_local_share(monkeypatch): assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex") assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" - assert worker.cache def test_init_default_xdg_data_home(monkeypatch): @@ -27,6 +26,14 @@ def test_init_default_xdg_data_home(monkeypatch): assert worker.work_dir == f"{path}/arkindex" assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" + + +def test_init_with_local_cache(monkeypatch): + worker = BaseWorker(use_cache=True) + + assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex") + assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" + assert worker.use_cache assert worker.cache @@ -37,7 +44,6 @@ def test_init_var_ponos_data_given(monkeypatch): assert worker.work_dir == f"{path}/current" assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" - assert worker.cache def test_init_var_worker_version_id_missing(monkeypatch, mock_user_api): @@ -48,7 +54,6 @@ def test_init_var_worker_version_id_missing(monkeypatch, mock_user_api): assert worker.worker_version_id is None assert worker.is_read_only is True assert worker.config == {} # default empty case - assert worker.cache def test_init_var_worker_local_file(monkeypatch, tmp_path, mock_user_api): @@ -63,7 +68,6 @@ def test_init_var_worker_local_file(monkeypatch, tmp_path, mock_user_api): assert worker.worker_version_id is None assert worker.is_read_only is True assert worker.config == {"localKey": "abcdef123"} # Use a local file for devs - assert worker.cache config.unlink() diff --git a/tests/test_elements_worker/test_elements.py b/tests/test_elements_worker/test_elements.py index 83a88bc8cc325be5a7754f65896931bf191fe8be..776885fab29a040d43ec9969f16f9d0e65b66913 100644 --- a/tests/test_elements_worker/test_elements.py +++ b/tests/test_elements_worker/test_elements.py @@ -642,7 +642,7 @@ def test_create_elements_api_error(responses, mock_elements_worker): ] -def test_create_elements(responses, mock_elements_worker): +def test_create_elements(responses, mock_elements_worker_with_cache): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( responses.POST, @@ -651,7 +651,7 @@ def test_create_elements(responses, mock_elements_worker): json=[{"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"}], ) - created_ids = mock_elements_worker.create_elements( + created_ids = mock_elements_worker_with_cache.create_elements( parent=elt, elements=[ { @@ -684,7 +684,7 @@ def test_create_elements(responses, mock_elements_worker): cache_path = f"{CACHE_DIR}/db.sqlite" assert os.path.isfile(cache_path) - rows = mock_elements_worker.cache.cursor.execute( + rows = mock_elements_worker_with_cache.cache.cursor.execute( "SELECT * FROM elements" ).fetchall() assert [CachedElement(**dict(row)) for row in rows] == [