Skip to content
Snippets Groups Projects
Verified Commit 82630075 authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

Add database option

parent d2821a65
No related branches found
No related tags found
1 merge request!82Add database option
Pipeline #78401 passed
...@@ -57,18 +57,6 @@ class BaseWorker(object): ...@@ -57,18 +57,6 @@ class BaseWorker(object):
logger.info(f"Worker will use {self.work_dir} as working directory") logger.info(f"Worker will use {self.work_dir} as working directory")
self.use_cache = use_cache self.use_cache = use_cache
if self.use_cache is True:
if os.environ.get("TASK_ID"):
cache_dir = f"/data/{os.environ.get('TASK_ID')}"
assert os.path.isdir(cache_dir), f"Missing task cache in {cache_dir}"
self.cache_path = os.path.join(cache_dir, "db.sqlite")
else:
self.cache_path = os.path.join(os.getcwd(), "db.sqlite")
init_cache_db(self.cache_path)
create_tables()
else:
logger.debug("Cache is disabled")
@property @property
def is_read_only(self): def is_read_only(self):
...@@ -85,6 +73,13 @@ class BaseWorker(object): ...@@ -85,6 +73,13 @@ class BaseWorker(object):
help="Alternative configuration file when running without a Worker Version ID", help="Alternative configuration file when running without a Worker Version ID",
type=open, type=open,
) )
self.parser.add_argument(
"-d",
"--database",
help="Alternative SQLite database to use for worker caching",
type=str,
default=None,
)
self.parser.add_argument( self.parser.add_argument(
"-v", "-v",
"--verbose", "--verbose",
...@@ -138,9 +133,32 @@ class BaseWorker(object): ...@@ -138,9 +133,32 @@ class BaseWorker(object):
# Load all required secrets # Load all required secrets
self.secrets = {name: self.load_secret(name) for name in required_secrets} self.secrets = {name: self.load_secret(name) for name in required_secrets}
# Merging parents caches (if there are any) in the current task local cache if self.args.database is not None:
self.use_cache = True
if self.use_cache is True:
if self.args.database is not None:
assert os.path.isfile(
self.args.database
), f"Database in {self.args.database} does not exist"
self.cache_path = self.args.database
elif os.environ.get("TASK_ID"):
cache_dir = os.path.join(
os.environ.get("PONOS_DATA", "/data"), os.environ.get("TASK_ID")
)
assert os.path.isdir(cache_dir), f"Missing task cache in {cache_dir}"
self.cache_path = os.path.join(cache_dir, "db.sqlite")
else:
self.cache_path = os.path.join(os.getcwd(), "db.sqlite")
init_cache_db(self.cache_path)
create_tables()
else:
logger.debug("Cache is disabled")
# Merging parents caches (if there are any) in the current task local cache, unless the database got overridden
task_id = os.environ.get("TASK_ID") task_id = os.environ.get("TASK_ID")
if self.use_cache and task_id is not None: if self.use_cache and self.args.database is None and task_id is not None:
task = self.request("RetrieveTaskFromAgent", id=task_id) task = self.request("RetrieveTaskFromAgent", id=task_id)
merge_parents_cache( merge_parents_cache(
task["parents"], task["parents"],
......
...@@ -85,7 +85,10 @@ def test_list_elements_element_arg(mocker, mock_elements_worker): ...@@ -85,7 +85,10 @@ def test_list_elements_element_arg(mocker, mock_elements_worker):
mocker.patch( mocker.patch(
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args", "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
return_value=Namespace( return_value=Namespace(
element=["volumeid", "pageid"], verbose=False, elements_list=None element=["volumeid", "pageid"],
verbose=False,
elements_list=None,
database=None,
), ),
) )
...@@ -115,6 +118,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker): ...@@ -115,6 +118,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker):
element=["anotherid", "againanotherid"], element=["anotherid", "againanotherid"],
verbose=False, verbose=False,
elements_list=open(path), elements_list=open(path),
database=None,
), ),
) )
...@@ -127,6 +131,27 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker): ...@@ -127,6 +131,27 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker):
assert str(e.value) == "elements-list and element CLI args shouldn't be both set" assert str(e.value) == "elements-list and element CLI args shouldn't be both set"
def test_database_arg(mocker, mock_elements_worker, tmp_path):
database_path = tmp_path / "my_database.sqlite"
database_path.touch()
mocker.patch(
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
return_value=Namespace(
element=["volumeid", "pageid"],
verbose=False,
elements_list=None,
database=str(database_path),
),
)
worker = ElementsWorker()
worker.configure()
assert worker.use_cache is True
assert worker.cache_path == str(database_path)
def test_load_corpus_classes_api_error(responses, mock_elements_worker): def test_load_corpus_classes_api_error(responses, mock_elements_worker):
corpus_id = "12341234-1234-1234-1234-123412341234" corpus_id = "12341234-1234-1234-1234-123412341234"
responses.add( responses.add(
......
...@@ -152,6 +152,8 @@ def test_merge_from_worker( ...@@ -152,6 +152,8 @@ def test_merge_from_worker(
# Configure worker with a specific data directory # Configure worker with a specific data directory
monkeypatch.setenv("PONOS_DATA", str(tmpdir)) monkeypatch.setenv("PONOS_DATA", str(tmpdir))
# Create the task's output dir, so that it can create its own database
(tmpdir / "my_task").mkdir()
mock_base_worker_with_cache.configure() mock_base_worker_with_cache.configure()
# Then we have 2 elements and a transcription # Then we have 2 elements and a transcription
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment