Skip to content
Snippets Groups Projects
Commit 39d57818 authored by Eva Bardou's avatar Eva Bardou
Browse files

Add tests

parent 00a573f6
No related branches found
No related tags found
No related merge requests found
Pipeline #78347 passed
......@@ -22,6 +22,7 @@ from arkindex_worker.reporting import Reporter
from arkindex_worker.utils import convert_str_uuid_to_hex
MANUAL_SLUG = "manual"
DATA_DIR = "/data"
CACHE_DIR = f"/data/{os.environ.get('TASK_ID')}"
......@@ -137,7 +138,7 @@ class BaseWorker(object):
parents_cache_paths = []
for parent in task["parents"]:
parent_cache_path = f"/data/{parent}/db.sqlite"
parent_cache_path = f"{DATA_DIR}/{parent}/db.sqlite"
if os.path.isfile(parent_cache_path):
parents_cache_paths.append(parent_cache_path)
......@@ -150,7 +151,7 @@ class BaseWorker(object):
cache_file.write(parent_cache_file.read())
# Many parents caches, we have to merge all of them in our current task local cache
elif len(parents_cache_paths) > 1:
self.cache.merge_parent_caches(parents_cache_paths)
self.cache.merge_parents_caches(parents_cache_paths)
def load_secret(self, name):
"""Load all secrets described in the worker configuration"""
......
......@@ -9,8 +9,9 @@ import pytest
import yaml
from arkindex.mock import MockApiClient
from arkindex_worker.cache import LocalDB
from arkindex_worker.git import GitHelper, GitlabHelper
from arkindex_worker.worker import ElementsWorker
from arkindex_worker.worker import BaseWorker, ElementsWorker
FIXTURES_DIR = Path(__file__).resolve().parent / "data"
CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache")
......@@ -92,6 +93,42 @@ def handle_cache_file(monkeypatch):
os.remove(CACHE_FILE)
@pytest.fixture
def first_parent_folder():
cache_dir = f"{CACHE_DIR}/first_parent_id"
os.mkdir(cache_dir)
yield
if os.path.isdir(cache_dir):
os.rmdir(cache_dir)
@pytest.fixture
def second_parent_folder():
cache_dir = f"{CACHE_DIR}/second_parent_id"
os.mkdir(cache_dir)
yield
if os.path.isdir(cache_dir):
os.rmdir(cache_dir)
@pytest.fixture
def first_parent_cache(first_parent_folder):
parent_cache = LocalDB(f"{CACHE_DIR}/first_parent_id/db.sqlite")
parent_cache.create_tables()
yield
if os.path.isfile(parent_cache.path):
os.remove(parent_cache.path)
@pytest.fixture
def second_parent_cache(second_parent_folder):
parent_cache = LocalDB(f"{CACHE_DIR}/second_parent_id/db.sqlite")
parent_cache.create_tables()
yield
if os.path.isfile(parent_cache.path):
os.remove(parent_cache.path)
@pytest.fixture(autouse=True)
def give_worker_version_id_env_variable(monkeypatch):
monkeypatch.setenv("WORKER_VERSION_ID", "12341234-1234-1234-1234-123412341234")
......@@ -164,6 +201,17 @@ def mock_elements_worker(monkeypatch, mock_worker_version_api):
return worker
@pytest.fixture
def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_version_api):
"""Build a BaseWorker using SQLite cache"""
monkeypatch.setattr(sys, "argv", ["worker"])
worker = BaseWorker(use_cache=True)
monkeypatch.setenv("TASK_ID", "my_task")
mocker.patch("arkindex_worker.worker.DATA_DIR", CACHE_DIR)
return worker
@pytest.fixture
def mock_elements_worker_with_cache(monkeypatch, mock_worker_version_api):
"""Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
......
# -*- coding: utf-8 -*-
import json
import logging
import os
import sys
......@@ -9,8 +10,42 @@ import pytest
from arkindex.mock import MockApiClient
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement, CachedTranscription, LocalDB
from arkindex_worker.utils import convert_str_uuid_to_hex
from arkindex_worker.worker import BaseWorker
CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache")
FIRST_PARENT_CACHE = f"{CACHE_DIR}/first_parent_id/db.sqlite"
SECOND_PARENT_CACHE = f"{CACHE_DIR}/second_parent_id/db.sqlite"
FIRST_ELEM_TO_INSERT = CachedElement(
id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
type="something",
polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
SECOND_ELEM_TO_INSERT = CachedElement(
id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
type="something",
polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
FIRST_TR_TO_INSERT = CachedTranscription(
id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
element_id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
text="Hello!",
confidence=0.42,
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
SECOND_TR_TO_INSERT = CachedTranscription(
id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
element_id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
text="How are you?",
confidence=0.42,
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
def test_init_default_local_share(monkeypatch):
worker = BaseWorker()
......@@ -115,6 +150,266 @@ def test_cli_arg_verbose_given(mocker, mock_worker_version_api, mock_user_api):
logger.setLevel(logging.NOTSET)
def test_configure_cache_merging_no_parent(responses, mock_base_worker_with_cache):
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": []},
)
cache_path = mock_base_worker_with_cache.cache.path
with open(cache_path, "rb") as before_file:
before = before_file.read()
mock_base_worker_with_cache.configure()
with open(cache_path, "rb") as after_file:
after = after_file.read()
assert before == after, "Cache was modified"
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_one_parent_without_file(
responses, mock_base_worker_with_cache, first_parent_folder
):
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id"]},
)
cache_path = mock_base_worker_with_cache.cache.path
with open(cache_path, "rb") as before_file:
before = before_file.read()
mock_base_worker_with_cache.configure()
with open(cache_path, "rb") as after_file:
after = after_file.read()
assert before == after, "Cache was modified"
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_one_parent(
responses, mock_base_worker_with_cache, first_parent_cache
):
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_one_file(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_folder
):
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_differing_lines(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
):
# Inserting differing lines in both parents caches
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
second_parent_cache.insert("elements", [SECOND_ELEM_TO_INSERT])
second_parent_cache.insert("transcriptions", [SECOND_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+ second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [
FIRST_ELEM_TO_INSERT,
SECOND_ELEM_TO_INSERT,
]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+ second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT,
SECOND_TR_TO_INSERT,
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_identical_lines(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
):
# Inserting identical lines in both parents caches
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT])
second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
second_parent_cache.insert(
"elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT]
)
second_parent_cache.insert(
"transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT]
)
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert (
stored_rows
== second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [
FIRST_ELEM_TO_INSERT,
SECOND_ELEM_TO_INSERT,
]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert (
stored_rows
== second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT,
SECOND_TR_TO_INSERT,
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_load_missing_secret():
worker = BaseWorker()
worker.api_client = MockApiClient()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment