From 8c61c048662869059f580ab17a64ef5b7ec8aea5 Mon Sep 17 00:00:00 2001
From: Eva Bardou <ebardou@teklia.com>
Date: Fri, 26 Mar 2021 15:53:51 +0100
Subject: [PATCH] Add tests

---
 arkindex_worker/worker.py |   5 +-
 tests/conftest.py         |  50 ++++++-
 tests/test_base_worker.py | 295 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 347 insertions(+), 3 deletions(-)

diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py
index 882e2010..ca744349 100644
--- a/arkindex_worker/worker.py
+++ b/arkindex_worker/worker.py
@@ -22,6 +22,7 @@ from arkindex_worker.reporting import Reporter
 from arkindex_worker.utils import convert_str_uuid_to_hex
 
 MANUAL_SLUG = "manual"
+DATA_DIR = "/data"
 CACHE_DIR = f"/data/{os.environ.get('TASK_ID')}"
 
 
@@ -137,7 +138,7 @@ class BaseWorker(object):
 
             parents_cache_paths = []
             for parent in task["parents"]:
-                parent_cache_path = f"/data/{parent}/db.sqlite"
+                parent_cache_path = f"{DATA_DIR}/{parent}/db.sqlite"
                 if os.path.isfile(parent_cache_path):
                     parents_cache_paths.append(parent_cache_path)
 
@@ -150,7 +151,7 @@ class BaseWorker(object):
                     cache_file.write(parent_cache_file.read())
             # Many parents caches, we have to merge all of them in our current task local cache
             elif len(parents_cache_paths) > 1:
-                self.cache.merge_parent_caches(parents_cache_paths)
+                self.cache.merge_parents_caches(parents_cache_paths)
 
     def load_secret(self, name):
         """Load all secrets described in the worker configuration"""
diff --git a/tests/conftest.py b/tests/conftest.py
index b7ff454b..e15ed36c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,8 +9,9 @@ import pytest
 import yaml
 
 from arkindex.mock import MockApiClient
+from arkindex_worker.cache import LocalDB
 from arkindex_worker.git import GitHelper, GitlabHelper
-from arkindex_worker.worker import ElementsWorker
+from arkindex_worker.worker import BaseWorker, ElementsWorker
 
 FIXTURES_DIR = Path(__file__).resolve().parent / "data"
 CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache")
@@ -92,6 +93,42 @@ def handle_cache_file(monkeypatch):
         os.remove(CACHE_FILE)
 
 
+@pytest.fixture
+def first_parent_folder():
+    cache_dir = f"{CACHE_DIR}/first_parent_id"
+    os.mkdir(cache_dir)
+    yield
+    if os.path.isdir(cache_dir):
+        os.rmdir(cache_dir)
+
+
+@pytest.fixture
+def second_parent_folder():
+    cache_dir = f"{CACHE_DIR}/second_parent_id"
+    os.mkdir(cache_dir)
+    yield
+    if os.path.isdir(cache_dir):
+        os.rmdir(cache_dir)
+
+
+@pytest.fixture
+def first_parent_cache(first_parent_folder):
+    parent_cache = LocalDB(f"{CACHE_DIR}/first_parent_id/db.sqlite")
+    parent_cache.create_tables()
+    yield
+    if os.path.isfile(parent_cache.path):
+        os.remove(parent_cache.path)
+
+
+@pytest.fixture
+def second_parent_cache(second_parent_folder):
+    parent_cache = LocalDB(f"{CACHE_DIR}/second_parent_id/db.sqlite")
+    parent_cache.create_tables()
+    yield
+    if os.path.isfile(parent_cache.path):
+        os.remove(parent_cache.path)
+
+
 @pytest.fixture(autouse=True)
 def give_worker_version_id_env_variable(monkeypatch):
     monkeypatch.setenv("WORKER_VERSION_ID", "12341234-1234-1234-1234-123412341234")
@@ -164,6 +201,17 @@ def mock_elements_worker(monkeypatch, mock_worker_version_api):
     return worker
 
 
+@pytest.fixture
+def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_version_api):
+    """Build a BaseWorker using SQLite cache"""
+    monkeypatch.setattr(sys, "argv", ["worker"])
+
+    worker = BaseWorker(use_cache=True)
+    monkeypatch.setenv("TASK_ID", "my_task")
+    mocker.patch("arkindex_worker.worker.DATA_DIR", CACHE_DIR)
+    return worker
+
+
 @pytest.fixture
 def mock_elements_worker_with_cache(monkeypatch, mock_worker_version_api):
     """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py
index 47a7bef9..446c91a7 100644
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import json
 import logging
 import os
 import sys
@@ -9,8 +10,42 @@ import pytest
 
 from arkindex.mock import MockApiClient
 from arkindex_worker import logger
+from arkindex_worker.cache import CachedElement, CachedTranscription, LocalDB
+from arkindex_worker.utils import convert_str_uuid_to_hex
 from arkindex_worker.worker import BaseWorker
 
+CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache")
+FIRST_PARENT_CACHE = f"{CACHE_DIR}/first_parent_id/db.sqlite"
+SECOND_PARENT_CACHE = f"{CACHE_DIR}/second_parent_id/db.sqlite"
+FIRST_ELEM_TO_INSERT = CachedElement(
+    id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
+    parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
+    type="something",
+    polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
+    worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
+)
+SECOND_ELEM_TO_INSERT = CachedElement(
+    id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
+    parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
+    type="something",
+    polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
+    worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
+)
+FIRST_TR_TO_INSERT = CachedTranscription(
+    id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
+    element_id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
+    text="Hello!",
+    confidence=0.42,
+    worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
+)
+SECOND_TR_TO_INSERT = CachedTranscription(
+    id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
+    element_id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
+    text="How are you?",
+    confidence=0.42,
+    worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
+)
+
 
 def test_init_default_local_share(monkeypatch):
     worker = BaseWorker()
@@ -115,6 +150,266 @@ def test_cli_arg_verbose_given(mocker, mock_worker_version_api, mock_user_api):
     logger.setLevel(logging.NOTSET)
 
 
+def test_configure_cache_merging_no_parent(responses, mock_base_worker_with_cache):
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": []},
+    )
+
+    cache_path = mock_base_worker_with_cache.cache.path
+    with open(cache_path, "rb") as before_file:
+        before = before_file.read()
+
+    mock_base_worker_with_cache.configure()
+
+    with open(cache_path, "rb") as after_file:
+        after = after_file.read()
+
+    assert before == after, "Cache was modified"
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
+def test_configure_cache_merging_one_parent_without_file(
+    responses, mock_base_worker_with_cache, first_parent_folder
+):
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": ["first_parent_id"]},
+    )
+
+    cache_path = mock_base_worker_with_cache.cache.path
+    with open(cache_path, "rb") as before_file:
+        before = before_file.read()
+
+    mock_base_worker_with_cache.configure()
+
+    with open(cache_path, "rb") as after_file:
+        after = after_file.read()
+
+    assert before == after, "Cache was modified"
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
+def test_configure_cache_merging_one_parent(
+    responses, mock_base_worker_with_cache, first_parent_cache
+):
+    parent_cache = LocalDB(FIRST_PARENT_CACHE)
+    parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
+    parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
+
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": ["first_parent_id"]},
+    )
+
+    mock_base_worker_with_cache.configure()
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM elements"
+    ).fetchall()
+    assert (
+        stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+    )
+    assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM transcriptions"
+    ).fetchall()
+    assert (
+        stored_rows
+        == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+    )
+    assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
+        FIRST_TR_TO_INSERT
+    ]
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
+def test_configure_cache_merging_multiple_parents_one_file(
+    responses, mock_base_worker_with_cache, first_parent_cache, second_parent_folder
+):
+    parent_cache = LocalDB(FIRST_PARENT_CACHE)
+    parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
+    parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
+
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": ["first_parent_id", "second_parent_id"]},
+    )
+
+    mock_base_worker_with_cache.configure()
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM elements"
+    ).fetchall()
+    assert (
+        stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+    )
+    assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM transcriptions"
+    ).fetchall()
+    assert (
+        stored_rows
+        == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+    )
+    assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
+        FIRST_TR_TO_INSERT
+    ]
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
+def test_configure_cache_merging_multiple_parents_differing_lines(
+    responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
+):
+    # Inserting differing lines in both parents caches
+    parent_cache = LocalDB(FIRST_PARENT_CACHE)
+    parent_cache = LocalDB(FIRST_PARENT_CACHE)
+    parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
+    parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
+    second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
+    second_parent_cache.insert("elements", [SECOND_ELEM_TO_INSERT])
+    second_parent_cache.insert("transcriptions", [SECOND_TR_TO_INSERT])
+
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": ["first_parent_id", "second_parent_id"]},
+    )
+
+    mock_base_worker_with_cache.configure()
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM elements"
+    ).fetchall()
+    assert (
+        stored_rows
+        == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+        + second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+    )
+    assert [CachedElement(**dict(row)) for row in stored_rows] == [
+        FIRST_ELEM_TO_INSERT,
+        SECOND_ELEM_TO_INSERT,
+    ]
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM transcriptions"
+    ).fetchall()
+    assert (
+        stored_rows
+        == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+        + second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+    )
+    assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
+        FIRST_TR_TO_INSERT,
+        SECOND_TR_TO_INSERT,
+    ]
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
+def test_configure_cache_merging_multiple_parents_identical_lines(
+    responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
+):
+    # Inserting identical lines in both parents caches
+    parent_cache = LocalDB(FIRST_PARENT_CACHE)
+    parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT])
+    parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT])
+    second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
+    second_parent_cache.insert(
+        "elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT]
+    )
+    second_parent_cache.insert(
+        "transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT]
+    )
+
+    responses.add(
+        responses.GET,
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+        status=200,
+        json={"parents": ["first_parent_id", "second_parent_id"]},
+    )
+
+    mock_base_worker_with_cache.configure()
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM elements"
+    ).fetchall()
+    assert (
+        stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+    )
+    assert (
+        stored_rows
+        == second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+    )
+    assert [CachedElement(**dict(row)) for row in stored_rows] == [
+        FIRST_ELEM_TO_INSERT,
+        SECOND_ELEM_TO_INSERT,
+    ]
+
+    stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
+        "SELECT * FROM transcriptions"
+    ).fetchall()
+    assert (
+        stored_rows
+        == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+    )
+    assert (
+        stored_rows
+        == second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+    )
+    assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
+        FIRST_TR_TO_INSERT,
+        SECOND_TR_TO_INSERT,
+    ]
+
+    assert len(responses.calls) == 3
+    assert [call.request.url for call in responses.calls] == [
+        "http://testserver/api/v1/user/",
+        "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
+        "http://testserver/ponos/v1/task/my_task/from-agent/",
+    ]
+
+
 def test_load_missing_secret():
     worker = BaseWorker()
     worker.api_client = MockApiClient()
-- 
GitLab