From 8c61c048662869059f580ab17a64ef5b7ec8aea5 Mon Sep 17 00:00:00 2001 From: Eva Bardou <ebardou@teklia.com> Date: Fri, 26 Mar 2021 15:53:51 +0100 Subject: [PATCH] Add tests --- arkindex_worker/worker.py | 5 +- tests/conftest.py | 50 ++++++- tests/test_base_worker.py | 295 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 347 insertions(+), 3 deletions(-) diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 882e2010..ca744349 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -22,6 +22,7 @@ from arkindex_worker.reporting import Reporter from arkindex_worker.utils import convert_str_uuid_to_hex MANUAL_SLUG = "manual" +DATA_DIR = "/data" CACHE_DIR = f"/data/{os.environ.get('TASK_ID')}" @@ -137,7 +138,7 @@ class BaseWorker(object): parents_cache_paths = [] for parent in task["parents"]: - parent_cache_path = f"/data/{parent}/db.sqlite" + parent_cache_path = f"{DATA_DIR}/{parent}/db.sqlite" if os.path.isfile(parent_cache_path): parents_cache_paths.append(parent_cache_path) @@ -150,7 +151,7 @@ class BaseWorker(object): cache_file.write(parent_cache_file.read()) # Many parents caches, we have to merge all of them in our current task local cache elif len(parents_cache_paths) > 1: - self.cache.merge_parent_caches(parents_cache_paths) + self.cache.merge_parents_caches(parents_cache_paths) def load_secret(self, name): """Load all secrets described in the worker configuration""" diff --git a/tests/conftest.py b/tests/conftest.py index b7ff454b..e15ed36c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,8 +9,9 @@ import pytest import yaml from arkindex.mock import MockApiClient +from arkindex_worker.cache import LocalDB from arkindex_worker.git import GitHelper, GitlabHelper -from arkindex_worker.worker import ElementsWorker +from arkindex_worker.worker import BaseWorker, ElementsWorker FIXTURES_DIR = Path(__file__).resolve().parent / "data" CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache") @@ -92,6 +93,42 @@ def handle_cache_file(monkeypatch): os.remove(CACHE_FILE) +@pytest.fixture +def first_parent_folder(): + cache_dir = f"{CACHE_DIR}/first_parent_id" + os.mkdir(cache_dir) + yield + if os.path.isdir(cache_dir): + os.rmdir(cache_dir) + + +@pytest.fixture +def second_parent_folder(): + cache_dir = f"{CACHE_DIR}/second_parent_id" + os.mkdir(cache_dir) + yield + if os.path.isdir(cache_dir): + os.rmdir(cache_dir) + + +@pytest.fixture +def first_parent_cache(first_parent_folder): + parent_cache = LocalDB(f"{CACHE_DIR}/first_parent_id/db.sqlite") + parent_cache.create_tables() + yield + if os.path.isfile(parent_cache.path): + os.remove(parent_cache.path) + + +@pytest.fixture +def second_parent_cache(second_parent_folder): + parent_cache = LocalDB(f"{CACHE_DIR}/second_parent_id/db.sqlite") + parent_cache.create_tables() + yield + if os.path.isfile(parent_cache.path): + os.remove(parent_cache.path) + + @pytest.fixture(autouse=True) def give_worker_version_id_env_variable(monkeypatch): monkeypatch.setenv("WORKER_VERSION_ID", "12341234-1234-1234-1234-123412341234") @@ -164,6 +201,17 @@ def mock_elements_worker(monkeypatch, mock_worker_version_api): return worker +@pytest.fixture +def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_version_api): + """Build a BaseWorker using SQLite cache""" + monkeypatch.setattr(sys, "argv", ["worker"]) + + worker = BaseWorker(use_cache=True) + monkeypatch.setenv("TASK_ID", "my_task") + mocker.patch("arkindex_worker.worker.DATA_DIR", CACHE_DIR) + return worker + + @pytest.fixture def mock_elements_worker_with_cache(monkeypatch, mock_worker_version_api): """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest""" diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py index 47a7bef9..446c91a7 100644 --- a/tests/test_base_worker.py +++ b/tests/test_base_worker.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import json import logging import os import sys @@ -9,8 +10,42 @@ import pytest from arkindex.mock import MockApiClient from arkindex_worker import logger +from arkindex_worker.cache import CachedElement, CachedTranscription, LocalDB +from arkindex_worker.utils import convert_str_uuid_to_hex from arkindex_worker.worker import BaseWorker +CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache") +FIRST_PARENT_CACHE = f"{CACHE_DIR}/first_parent_id/db.sqlite" +SECOND_PARENT_CACHE = f"{CACHE_DIR}/second_parent_id/db.sqlite" +FIRST_ELEM_TO_INSERT = CachedElement( + id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"), + parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"), + type="something", + polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]), + worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"), +) +SECOND_ELEM_TO_INSERT = CachedElement( + id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"), + parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"), + type="something", + polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]), + worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"), +) +FIRST_TR_TO_INSERT = CachedTranscription( + id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"), + element_id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"), + text="Hello!", + confidence=0.42, + worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"), +) +SECOND_TR_TO_INSERT = CachedTranscription( + id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"), + element_id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"), + text="How are you?", + confidence=0.42, + worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"), +) + def test_init_default_local_share(monkeypatch): worker = BaseWorker() @@ -115,6 +150,266 @@ def test_cli_arg_verbose_given(mocker, mock_worker_version_api, mock_user_api): logger.setLevel(logging.NOTSET) +def test_configure_cache_merging_no_parent(responses, mock_base_worker_with_cache): + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": []}, + ) + + cache_path = mock_base_worker_with_cache.cache.path + with open(cache_path, "rb") as before_file: + before = before_file.read() + + mock_base_worker_with_cache.configure() + + with open(cache_path, "rb") as after_file: + after = after_file.read() + + assert before == after, "Cache was modified" + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + +def test_configure_cache_merging_one_parent_without_file( + responses, mock_base_worker_with_cache, first_parent_folder +): + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": ["first_parent_id"]}, + ) + + cache_path = mock_base_worker_with_cache.cache.path + with open(cache_path, "rb") as before_file: + before = before_file.read() + + mock_base_worker_with_cache.configure() + + with open(cache_path, "rb") as after_file: + after = after_file.read() + + assert before == after, "Cache was modified" + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + +def test_configure_cache_merging_one_parent( + responses, mock_base_worker_with_cache, first_parent_cache +): + parent_cache = LocalDB(FIRST_PARENT_CACHE) + parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT]) + parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT]) + + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": ["first_parent_id"]}, + ) + + mock_base_worker_with_cache.configure() + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM elements" + ).fetchall() + assert ( + stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + ) + assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT] + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM transcriptions" + ).fetchall() + assert ( + stored_rows + == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + assert [CachedTranscription(**dict(row)) for row in stored_rows] == [ + FIRST_TR_TO_INSERT + ] + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + +def test_configure_cache_merging_multiple_parents_one_file( + responses, mock_base_worker_with_cache, first_parent_cache, second_parent_folder +): + parent_cache = LocalDB(FIRST_PARENT_CACHE) + parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT]) + parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT]) + + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": ["first_parent_id", "second_parent_id"]}, + ) + + mock_base_worker_with_cache.configure() + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM elements" + ).fetchall() + assert ( + stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + ) + assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT] + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM transcriptions" + ).fetchall() + assert ( + stored_rows + == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + assert [CachedTranscription(**dict(row)) for row in stored_rows] == [ + FIRST_TR_TO_INSERT + ] + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + +def test_configure_cache_merging_multiple_parents_differing_lines( + responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache +): + # Inserting differing lines in both parents caches + parent_cache = LocalDB(FIRST_PARENT_CACHE) + parent_cache = LocalDB(FIRST_PARENT_CACHE) + parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT]) + parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT]) + second_parent_cache = LocalDB(SECOND_PARENT_CACHE) + second_parent_cache.insert("elements", [SECOND_ELEM_TO_INSERT]) + second_parent_cache.insert("transcriptions", [SECOND_TR_TO_INSERT]) + + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": ["first_parent_id", "second_parent_id"]}, + ) + + mock_base_worker_with_cache.configure() + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM elements" + ).fetchall() + assert ( + stored_rows + == parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + + second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + ) + assert [CachedElement(**dict(row)) for row in stored_rows] == [ + FIRST_ELEM_TO_INSERT, + SECOND_ELEM_TO_INSERT, + ] + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM transcriptions" + ).fetchall() + assert ( + stored_rows + == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + + second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + assert [CachedTranscription(**dict(row)) for row in stored_rows] == [ + FIRST_TR_TO_INSERT, + SECOND_TR_TO_INSERT, + ] + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + +def test_configure_cache_merging_multiple_parents_identical_lines( + responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache +): + # Inserting identical lines in both parents caches + parent_cache = LocalDB(FIRST_PARENT_CACHE) + parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT]) + parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT]) + second_parent_cache = LocalDB(SECOND_PARENT_CACHE) + second_parent_cache.insert( + "elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT] + ) + second_parent_cache.insert( + "transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT] + ) + + responses.add( + responses.GET, + "http://testserver/ponos/v1/task/my_task/from-agent/", + status=200, + json={"parents": ["first_parent_id", "second_parent_id"]}, + ) + + mock_base_worker_with_cache.configure() + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM elements" + ).fetchall() + assert ( + stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + ) + assert ( + stored_rows + == second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall() + ) + assert [CachedElement(**dict(row)) for row in stored_rows] == [ + FIRST_ELEM_TO_INSERT, + SECOND_ELEM_TO_INSERT, + ] + + stored_rows = mock_base_worker_with_cache.cache.cursor.execute( + "SELECT * FROM transcriptions" + ).fetchall() + assert ( + stored_rows + == parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + assert ( + stored_rows + == second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + assert [CachedTranscription(**dict(row)) for row in stored_rows] == [ + FIRST_TR_TO_INSERT, + SECOND_TR_TO_INSERT, + ] + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/ponos/v1/task/my_task/from-agent/", + ] + + def test_load_missing_secret(): worker = BaseWorker() worker.api_client = MockApiClient() -- GitLab