Skip to content
Snippets Groups Projects

Merge parents caches into the current task one

Merged Eva Bardou requested to merge merge-parents-cache into master
All threads resolved!
2 files
+ 298
295
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 0
295
# -*- coding: utf-8 -*-
import json
import logging
import os
import sys
@@ -10,42 +9,8 @@ import pytest
from arkindex.mock import MockApiClient
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement, CachedTranscription, LocalDB
from arkindex_worker.utils import convert_str_uuid_to_hex
from arkindex_worker.worker import BaseWorker
CACHE_DIR = str(Path(__file__).resolve().parent / "data/cache")
FIRST_PARENT_CACHE = f"{CACHE_DIR}/first_parent_id/db.sqlite"
SECOND_PARENT_CACHE = f"{CACHE_DIR}/second_parent_id/db.sqlite"
FIRST_ELEM_TO_INSERT = CachedElement(
id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
type="something",
polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
SECOND_ELEM_TO_INSERT = CachedElement(
id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
parent_id=convert_str_uuid_to_hex("12341234-1234-1234-1234-123412341234"),
type="something",
polygon=json.dumps([[1, 1], [2, 2], [2, 1], [1, 2]]),
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
FIRST_TR_TO_INSERT = CachedTranscription(
id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
element_id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"),
text="Hello!",
confidence=0.42,
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
SECOND_TR_TO_INSERT = CachedTranscription(
id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
element_id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"),
text="How are you?",
confidence=0.42,
worker_version_id=convert_str_uuid_to_hex("56785678-5678-5678-5678-567856785678"),
)
def test_init_default_local_share(monkeypatch):
worker = BaseWorker()
@@ -150,266 +115,6 @@ def test_cli_arg_verbose_given(mocker, mock_worker_version_api, mock_user_api):
logger.setLevel(logging.NOTSET)
def test_configure_cache_merging_no_parent(responses, mock_base_worker_with_cache):
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": []},
)
cache_path = mock_base_worker_with_cache.cache.path
with open(cache_path, "rb") as before_file:
before = before_file.read()
mock_base_worker_with_cache.configure()
with open(cache_path, "rb") as after_file:
after = after_file.read()
assert before == after, "Cache was modified"
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_one_parent_without_file(
responses, mock_base_worker_with_cache, first_parent_folder
):
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id"]},
)
cache_path = mock_base_worker_with_cache.cache.path
with open(cache_path, "rb") as before_file:
before = before_file.read()
mock_base_worker_with_cache.configure()
with open(cache_path, "rb") as after_file:
after = after_file.read()
assert before == after, "Cache was modified"
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_one_parent(
responses, mock_base_worker_with_cache, first_parent_cache
):
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_one_file(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_folder
):
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [FIRST_ELEM_TO_INSERT]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_differing_lines(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
):
# Inserting differing lines in both parents caches
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT])
second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
second_parent_cache.insert("elements", [SECOND_ELEM_TO_INSERT])
second_parent_cache.insert("transcriptions", [SECOND_TR_TO_INSERT])
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
+ second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [
FIRST_ELEM_TO_INSERT,
SECOND_ELEM_TO_INSERT,
]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
+ second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT,
SECOND_TR_TO_INSERT,
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_configure_cache_merging_multiple_parents_identical_lines(
responses, mock_base_worker_with_cache, first_parent_cache, second_parent_cache
):
# Inserting identical lines in both parents caches
parent_cache = LocalDB(FIRST_PARENT_CACHE)
parent_cache.insert("elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT])
parent_cache.insert("transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT])
second_parent_cache = LocalDB(SECOND_PARENT_CACHE)
second_parent_cache.insert(
"elements", [FIRST_ELEM_TO_INSERT, SECOND_ELEM_TO_INSERT]
)
second_parent_cache.insert(
"transcriptions", [FIRST_TR_TO_INSERT, SECOND_TR_TO_INSERT]
)
responses.add(
responses.GET,
"http://testserver/ponos/v1/task/my_task/from-agent/",
status=200,
json={"parents": ["first_parent_id", "second_parent_id"]},
)
mock_base_worker_with_cache.configure()
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM elements"
).fetchall()
assert (
stored_rows == parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert (
stored_rows
== second_parent_cache.cursor.execute("SELECT * FROM elements").fetchall()
)
assert [CachedElement(**dict(row)) for row in stored_rows] == [
FIRST_ELEM_TO_INSERT,
SECOND_ELEM_TO_INSERT,
]
stored_rows = mock_base_worker_with_cache.cache.cursor.execute(
"SELECT * FROM transcriptions"
).fetchall()
assert (
stored_rows
== parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert (
stored_rows
== second_parent_cache.cursor.execute("SELECT * FROM transcriptions").fetchall()
)
assert [CachedTranscription(**dict(row)) for row in stored_rows] == [
FIRST_TR_TO_INSERT,
SECOND_TR_TO_INSERT,
]
assert len(responses.calls) == 3
assert [call.request.url for call in responses.calls] == [
"http://testserver/api/v1/user/",
"http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/",
"http://testserver/ponos/v1/task/my_task/from-agent/",
]
def test_load_missing_secret():
worker = BaseWorker()
worker.api_client = MockApiClient()
Loading