Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Showing
with 245 additions and 211 deletions
# -*- coding: utf-8 -*-
"""
ElementsWorker methods for transcriptions.
"""
from collections.abc import Iterable
from enum import Enum
from typing import Dict, Iterable, List, Optional, Union
from peewee import IntegrityError
......@@ -40,14 +39,14 @@ class TextOrientation(Enum):
"""
class TranscriptionMixin(object):
class TranscriptionMixin:
def create_transcription(
self,
element: Union[Element, CachedElement],
element: Element | CachedElement,
text: str,
confidence: float,
orientation: TextOrientation = TextOrientation.HorizontalLeftToRight,
) -> Optional[Dict[str, Union[str, float]]]:
) -> dict[str, str | float] | None:
"""
Create a transcription on the given element through the API.
......@@ -59,7 +58,7 @@ class TranscriptionMixin(object):
or None if the worker is in read-only mode.
"""
assert element and isinstance(
element, (Element, CachedElement)
element, Element | CachedElement
), "element shouldn't be null and should be an Element or CachedElement"
assert text and isinstance(
text, str
......@@ -111,8 +110,8 @@ class TranscriptionMixin(object):
def create_transcriptions(
self,
transcriptions: List[Dict[str, Union[str, float, Optional[TextOrientation]]]],
) -> List[Dict[str, Union[str, float]]]:
transcriptions: list[dict[str, str | float | TextOrientation | None]],
) -> list[dict[str, str | float]]:
"""
Create multiple transcriptions at once on existing elements through the API,
and creates [CachedTranscription][arkindex_worker.cache.CachedTranscription] instances if cache support is enabled.
......@@ -140,13 +139,13 @@ class TranscriptionMixin(object):
for index, transcription in enumerate(transcriptions_payload):
element_id = transcription.get("element_id")
assert element_id and isinstance(
element_id, str
assert (
element_id and isinstance(element_id, str)
), f"Transcription at index {index} in transcriptions: element_id shouldn't be null and should be of type str"
text = transcription.get("text")
assert text and isinstance(
text, str
assert (
text and isinstance(text, str)
), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
confidence = transcription.get("confidence")
......@@ -159,8 +158,8 @@ class TranscriptionMixin(object):
orientation = transcription.get(
"orientation", TextOrientation.HorizontalLeftToRight
)
assert orientation and isinstance(
orientation, TextOrientation
assert (
orientation and isinstance(orientation, TextOrientation)
), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
if orientation:
transcription["orientation"] = orientation.value
......@@ -203,10 +202,10 @@ class TranscriptionMixin(object):
def create_element_transcriptions(
self,
element: Union[Element, CachedElement],
element: Element | CachedElement,
sub_element_type: str,
transcriptions: List[Dict[str, Union[str, float]]],
) -> Dict[str, Union[str, bool]]:
transcriptions: list[dict[str, str | float]],
) -> dict[str, str | bool]:
"""
Create multiple elements and transcriptions at once on a single parent element through the API.
......@@ -228,7 +227,7 @@ class TranscriptionMixin(object):
:returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
"""
assert element and isinstance(
element, (Element, CachedElement)
element, Element | CachedElement
), "element shouldn't be null and should be an Element or CachedElement"
assert sub_element_type and isinstance(
sub_element_type, str
......@@ -242,8 +241,8 @@ class TranscriptionMixin(object):
for index, transcription in enumerate(transcriptions_payload):
text = transcription.get("text")
assert text and isinstance(
text, str
assert (
text and isinstance(text, str)
), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"
confidence = transcription.get("confidence")
......@@ -256,15 +255,15 @@ class TranscriptionMixin(object):
orientation = transcription.get(
"orientation", TextOrientation.HorizontalLeftToRight
)
assert orientation and isinstance(
orientation, TextOrientation
assert (
orientation and isinstance(orientation, TextOrientation)
), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
if orientation:
transcription["orientation"] = orientation.value
polygon = transcription.get("polygon")
assert polygon and isinstance(
polygon, list
assert (
polygon and isinstance(polygon, list)
), f"Transcription at index {index} in transcriptions: polygon shouldn't be null and should be of type list"
assert (
len(polygon) >= 3
......@@ -273,12 +272,16 @@ class TranscriptionMixin(object):
isinstance(point, list) and len(point) == 2 for point in polygon
), f"Transcription at index {index} in transcriptions: polygon points should be lists of two items"
assert all(
isinstance(coord, (int, float)) for point in polygon for coord in point
isinstance(coord, int | float) for point in polygon for coord in point
), f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers"
element_confidence = transcription.get("element_confidence")
assert element_confidence is None or (
isinstance(element_confidence, float) and 0 <= element_confidence <= 1
assert (
element_confidence is None
or (
isinstance(element_confidence, float)
and 0 <= element_confidence <= 1
)
), f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"
if self.is_read_only:
......@@ -359,11 +362,11 @@ class TranscriptionMixin(object):
def list_transcriptions(
self,
element: Union[Element, CachedElement],
element_type: Optional[str] = None,
recursive: Optional[bool] = None,
worker_version: Optional[Union[str, bool]] = None,
) -> Union[Iterable[dict], Iterable[CachedTranscription]]:
element: Element | CachedElement,
element_type: str | None = None,
recursive: bool | None = None,
worker_version: str | bool | None = None,
) -> Iterable[dict] | Iterable[CachedTranscription]:
"""
List transcriptions on an element.
......@@ -375,7 +378,7 @@ class TranscriptionMixin(object):
or an iterable of CachedTranscription when cache support is enabled.
"""
assert element and isinstance(
element, (Element, CachedElement)
element, Element | CachedElement
), "element shouldn't be null and should be an Element or CachedElement"
query_params = {}
if element_type:
......@@ -386,7 +389,7 @@ class TranscriptionMixin(object):
query_params["recursive"] = recursive
if worker_version is not None:
assert isinstance(
worker_version, (str, bool)
worker_version, str | bool
), "worker_version should be of type str or bool"
if isinstance(worker_version, bool):
assert (
......
# -*- coding: utf-8 -*-
"""
ElementsWorker methods for worker versions.
"""
class WorkerVersionMixin(object):
class WorkerVersionMixin:
def get_worker_version(self, worker_version_id: str) -> dict:
"""
Retrieve a worker version, using the [ElementsWorker][arkindex_worker.worker.ElementsWorker]'s internal cache when possible.
......
......@@ -3,6 +3,6 @@
"name": "Demo",
"description": "Demo ML worker for Arkindex",
"worker_type": "demo",
"author": "",
"email": ""
"author": "John Doe",
"email": "john.doe@company.com"
}
# -*- coding: utf-8 -*-
from arkindex_worker.worker import ElementsWorker
......
black==23.11.0
black==23.12.0
doc8==1.1.1
mkdocs==1.5.3
mkdocs-material==9.4.8
......
......@@ -24,7 +24,8 @@ At Teklia, we use a simple version of [Git Flow][gitflow]:
- Developments should happen in branches, with merge requests to enable code
review and Gitlab CI pipelines.
- Project maintainers should use Git tags to create official releases, by
updating the `VERSION` file and using the same version string as the tag name.
updating the `project.version` key of the `pyproject.toml` file and using
the same version string as the tag name.
This process is reflected the template's `.gitlab-ci.yml` file.
......
......@@ -131,9 +131,9 @@ to get a basic structure for your worker.
Cookiecutter will ask you for several options:
`slug`
: A slug for the worker. This should use lowercase alphanumeric characters or
underscores to meet the code formatting requirements that the template
automatically enforces via [black].
: A slug for the worker. This should use lowercase alphanumeric characters,
underscores or hyphens to meet the code formatting requirements that the
template automatically enforces via [black].
`name`
: A name for the worker, purely used for display purposes.
......@@ -159,6 +159,16 @@ Cookiecutter will ask you for several options:
`email`
: Your e-mail address. This will be used to contact you if any administrative need arise
Cookiecutter will also automatically normalize your worker's `slug` in new parameters:
`__package`
: The name of the Python package for your worker, generated by normalizing the `slug`
with characters' lowering and replacing underscores with hyphens.
`__module`
: The name of the Python module for your worker, generated by normalizing the `slug`
with characters' lowering and replacing hyphens with underscores.
### Pushing to GitLab
This section guides you through pushing the newly created worker from your
......@@ -169,7 +179,7 @@ This section assumes you have Maintainer or Owner access to the GitLab project.
#### To push to GitLab
1. Enter the newly created directory, starting in `worker-` and ending with your
worker's slug.
worker's `slug`.
2. Add your GitLab project as a Git remote:
......
......@@ -115,6 +115,6 @@ in the browser's address bar when browsing an element on Arkindex.
1. Activate the Python environment: run `workon X` where `X` is the name of
your Python environment.
2. Run `worker-X`, where `X` is the slug of your worker, followed by
2. Run `worker-X`, where `X` is the `__package` name of your worker, followed by
`--element=Y` where `Y` is the ID of an element. You can repeat `--element`
as many times as you need to process multiple elements.
......@@ -53,8 +53,8 @@ package, a Docker build, with the best development practices:
`setup.py`
: Configures the worker's Python package.
`VERSION`
: Official version number of your worker. Defaults to `0.1.0`.
`pyproject.toml`
: Configures the worker's Python package.
`ci/build.sh`
: Script that gets run by [CI](ci/index.md) pipelines
......@@ -68,10 +68,10 @@ package, a Docker build, with the best development practices:
TODO: For more information, see [Writing tests for your worker](tests).
-->
`worker_[slug]/__init__.py`
`worker_[__module]/__init__.py`
: Declares the folder as a Python package.
`worker_[slug]/worker.py`
`worker_[__module]/worker.py`
: The core part of the worker. This is where you can write code that processes
Arkindex elements.
......
docs/contents/workers/user_configuration/model_config.png

50.4 KiB

......@@ -54,7 +54,7 @@ All attributes are optional unless explicitly specified.
: Mandatory. Name of the worker, for display purposes.
`slug`
: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or dashes.
: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or hyphens.
`type`
: Mandatory. Type of the worker, for display purposes only. Some common values
......@@ -80,7 +80,16 @@ include:
: This worker does not support GPUs. It may run on a host that has a GPU, but it will ignore it.
`model_usage`
: Boolean. Whether or not this worker requires a model version to run. Defaults to `false`.
: Whether or not this worker requires a model version to run. Defaults to `disabled`. May take one of the following values:
`required`
: This worker requires a model version, and will only be run on processes with a model.
`supported`
: This worker supports a model version, but may run on any processes, including those without model.
`disabled`
: This worker does not support model version. It may run on a process that has a model, but it will ignore it.
`docker`
: Regroups Docker-related configuration attributes:
......@@ -137,6 +146,7 @@ A parameter is defined using the following settings:
- `enum`
- `list`
- `dict`
- `model`
`default`
: Optional. A default value for the parameter. Must be of the defined parameter `type`.
......@@ -272,7 +282,7 @@ Which will result in the following display for the user:
#### Dictionary parameters
Dictionary-type parameters must be defined using a `title`, the `dict` `type`. You can also set a `default` value for this parameter, which must be one a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.
Dictionary-type parameters must be defined using a `title` and the `dict` `type`. You can also set a `default` value for this parameter, which must be a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.
Dictionary-type parameters only accept strings as values.
......@@ -293,6 +303,26 @@ Which will result in the following display for the user:
![Dictionary-type parameter](user_configuration/dict_config.png "Example dictionary-type parameter.")
#### Model parameters
Model-type parameters must be defined using a `title` and the `model` type. You can also set a `default` value for this parameter, which must be the UUID of an existing Model, and make it a `required` parameter, which prevents users from leaving it blank. You can use a model parameter to specify to which Model the Model Version that is created by a Training process will be attached.
Model-type parameters only accept Model UUIDs as values.
In the configuration form, model parameters are displayed as an input field. Users can select a model from a list of available Models: what they type into the input field filters that list, allowing them to search for a model using its name or UUID.
For example, a model-type parameter can be defined like this:
```yaml
model_param:
title: Training Model
type: model
```
Which will result in the following display for the user:
![Model-type parameter](user_configuration/model_config.png "Example model-type parameter.")
#### Example user_configuration
```yaml
......@@ -318,6 +348,9 @@ user_configuration:
- 23
- 56
title: Another Parameter
a_model_parameter:
type: model
title: Model to train
```
#### Fallback to free JSON input
......
# Git & Gitlab support
::: arkindex_worker.git
......@@ -115,7 +115,7 @@ Released on **8 November 2022** &bull; View on [Gitlab](https://gitlab.teklia.co
- A new version of the cache was released with the updated Django models.
- Improvements to our Machine Learning training API to allow workers to use models published on Arkindex.
- Support workers that have no configuration.
- Allow publishing metadatas with falsy but non-null values.
- Allow publishing metadata with falsy but non-null values.
- Add `.polygon` attribute shortcut on `Element`.
- Add a major test speedup on our worker template.
- Support cache usage on our metadata API endpoint helpers.
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#!/usr/bin/env bash
# File generated by pre-commit: https://pre-commit.com
# ID: 138fd403232d2ddd5efb44317e38bf03
import os
import sys
# we try our best, but the shebang of this script is difficult to determine:
# - macos doesn't ship with python3
# - windows executables are almost always `python.exe`
# therefore we continue to support python2 for this small script
if sys.version_info < (3, 3):
from distutils.spawn import find_executable as which
else:
from shutil import which
# work around https://github.com/Homebrew/homebrew-core/issues/30445
os.environ.pop("__PYVENV_LAUNCHER__", None)
# start templated
INSTALL_PYTHON = "/usr/bin/python3"
ARGS = [
"hook-impl",
"--config=.pre-commit-config.yaml",
"--hook-type=pre-commit",
"--skip-on-missing-config",
]
INSTALL_PYTHON=/usr/bin/python3
ARGS=(hook-impl --config=.pre-commit-config.yaml --hook-type=pre-commit --skip-on-missing-config)
# end templated
ARGS.extend(("--hook-dir", os.path.realpath(os.path.dirname(__file__))))
ARGS.append("--")
ARGS.extend(sys.argv[1:])
DONE = "`pre-commit` not found. Did you forget to activate your virtualenv?"
if os.access(INSTALL_PYTHON, os.X_OK):
CMD = [INSTALL_PYTHON, "-mpre_commit"]
elif which("pre-commit"):
CMD = ["pre-commit"]
else:
raise SystemExit(DONE)
CMD.extend(ARGS)
if sys.platform == "win32": # https://bugs.python.org/issue19124
import subprocess
HERE="$(cd "$(dirname "$0")" && pwd)"
ARGS+=(--hook-dir "$HERE" -- "$@")
if sys.version_info < (3, 7): # https://bugs.python.org/issue25942
raise SystemExit(subprocess.Popen(CMD).wait())
else:
raise SystemExit(subprocess.call(CMD))
else:
os.execvp(CMD[0], CMD)
if [ -x "$INSTALL_PYTHON" ]; then
exec "$INSTALL_PYTHON" -mpre_commit "${ARGS[@]}"
elif command -v pre-commit > /dev/null; then
exec pre-commit "${ARGS[@]}"
else
echo '`pre-commit` not found. Did you forget to activate your virtualenv?' 1>&2
exit 1
fi
# Normalize the slug to generate __package and __module private variables
{{cookiecutter.update({"__package": cookiecutter.slug.lower().replace("_", "-")})}} # noqa: F821
{{cookiecutter.update({"__module": cookiecutter.slug.lower().replace("-", "_")})}} # noqa: F821
......@@ -36,7 +36,6 @@ plugins:
- search
- autorefs
- mkdocstrings:
custom_templates: templates
handlers:
python:
import: # enable auto refs to the doc
......@@ -90,7 +89,6 @@ nav:
- Transcription: ref/api/transcription.md
- WorkerVersion: ref/api/worker_version.md
- Models: ref/models.md
- Git & Gitlab support: ref/git.md
- Image utilities: ref/image.md
- Cache: ref/cache.md
- Utils: ref/utils.md
......
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "arkindex-base-worker"
version = "0.3.6-rc5"
description = "Base Worker to easily build Arkindex ML workflows"
license = { file = "LICENSE" }
dynamic = ["dependencies", "optional-dependencies"]
authors = [
{ name = "Teklia", email = "contact@teklia.com" },
]
maintainers = [
{ name = "Teklia", email = "contact@teklia.com" },
]
requires-python = ">=3.10"
readme = { file = "README.md", content-type = "text/markdown" }
keywords = ["python"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
# Specify the Python versions you support here.
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
# Topics
"Topic :: Text Processing :: Linguistic",
]
[project.urls]
Homepage = "https://workers.arkindex.org"
Documentation = "https://workers.arkindex.org"
Repository = "https://gitlab.teklia.com/workers/base-worker"
"Bug Tracker" = "https://gitlab.teklia.com/workers/base-worker/issues"
Authors = "https://teklia.com"
[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }
optional-dependencies = { docs = { file = ["docs-requirements.txt"] } }
[tool.ruff]
exclude = [".git", "__pycache__"]
ignore = ["E501"]
select = ["E", "F", "T1", "W", "I"]
select = [
# pycodestyle
"E",
"W",
# Pyflakes
"F",
# Flake8 Debugger
"T1",
# Isort
"I",
# Implicit Optional
"RUF013",
# Invalid pyproject.toml
"RUF200",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# flake8-pytest-style
"PT",
# flake8-use-pathlib
"PTH",
]
[tool.ruff.per-file-ignores]
# Ignore `pytest-composite-assertion` rules of `flake8-pytest-style` linter for non-test files
"arkindex_worker/**/*.py" = ["PT018"]
[tool.ruff.isort]
known-first-party = ["arkindex", "arkindex_common", "arkindex_worker"]
known-third-party = [
"PIL",
"apistar",
"gitlab",
"gnupg",
"peewee",
"playhouse",
......@@ -16,7 +84,6 @@ known-third-party = [
"requests",
"responses",
"setuptools",
"sh",
"shapely",
"tenacity",
"yaml",
......
arkindex-client==1.0.14
peewee==3.17.0
Pillow==10.1.0
pymdown-extensions==10.3.1
python-gitlab==4.1.1
python-gnupg==0.5.1
sh==2.0.6
pymdown-extensions==10.5
python-gnupg==0.5.2
shapely==2.0.2
tenacity==8.2.3
zstandard==0.22.0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pathlib import Path
from setuptools import find_packages, setup
def requirements(path: Path):
assert path.exists(), "Missing requirements {}".format(path)
with path.open() as f:
return list(map(str.strip, f.read().splitlines()))
with open("VERSION") as f:
VERSION = f.read()
setup(
name="arkindex-base-worker",
version=VERSION,
description="Base Worker to easily build Arkindex ML workflows",
author="Teklia",
author_email="contact@teklia.com",
url="https://teklia.com",
python_requires=">=3.7",
install_requires=requirements(Path("requirements.txt")),
extras_require={"docs": requirements(Path("docs-requirements.txt"))},
packages=find_packages(),
)
setup(packages=find_packages())
# -*- coding: utf-8 -*-
import hashlib
import json
import os
......@@ -19,10 +18,10 @@ from arkindex_worker.cache import (
CachedImage,
CachedTranscription,
Version,
create_tables,
create_version_table,
init_cache_db,
)
from arkindex_worker.git import GitHelper, GitlabHelper
from arkindex_worker.models import Artifact, Dataset
from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
from arkindex_worker.worker.dataset import DatasetState
......@@ -37,7 +36,7 @@ __yaml_cache = {}
@pytest.fixture(autouse=True)
def disable_sleep(monkeypatch):
def _disable_sleep(monkeypatch):
"""
Do not sleep at all in between API executions
when errors occur in unit tests.
......@@ -46,8 +45,8 @@ def disable_sleep(monkeypatch):
monkeypatch.setattr(time, "sleep", lambda x: None)
@pytest.fixture
def cache_yaml(monkeypatch):
@pytest.fixture()
def _cache_yaml(monkeypatch):
"""
Cache all calls to yaml.safe_load in order to speedup
every test cases that load the OpenAPI schema
......@@ -75,7 +74,7 @@ def cache_yaml(monkeypatch):
@pytest.fixture(autouse=True)
def setup_api(responses, monkeypatch, cache_yaml):
def _setup_api(responses, monkeypatch, _cache_yaml):
# Always use the environment variable first
schema_url = os.environ.get("ARKINDEX_API_SCHEMA_URL")
if schema_url is None:
......@@ -106,13 +105,13 @@ def setup_api(responses, monkeypatch, cache_yaml):
@pytest.fixture(autouse=True)
def give_env_variable(request, monkeypatch):
def _give_env_variable(monkeypatch):
"""Defines required environment variables"""
monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")
@pytest.fixture
def mock_worker_run_api(responses):
@pytest.fixture()
def _mock_worker_run_api(responses):
"""Provide a mock API response to get worker run information"""
payload = {
"id": "56785678-5678-5678-5678-567856785678",
......@@ -140,7 +139,7 @@ def mock_worker_run_api(responses):
"docker_image_name": None,
"state": "created",
"gpu_usage": "disabled",
"model_usage": False,
"model_usage": "disabled",
"worker": {
"id": "deadbeef-1234-5678-1234-worker",
"name": "Fake worker",
......@@ -180,8 +179,8 @@ def mock_worker_run_api(responses):
)
@pytest.fixture
def mock_worker_run_no_revision_api(responses):
@pytest.fixture()
def _mock_worker_run_no_revision_api(responses):
"""Provide a mock API response to get worker run not linked to a revision information"""
payload = {
"id": "56785678-5678-5678-5678-567856785678",
......@@ -207,7 +206,7 @@ def mock_worker_run_no_revision_api(responses):
"docker_image_name": None,
"state": "created",
"gpu_usage": "disabled",
"model_usage": False,
"model_usage": "disabled",
"worker": {
"id": "deadbeef-1234-5678-1234-worker",
"name": "Fake worker",
......@@ -247,8 +246,8 @@ def mock_worker_run_no_revision_api(responses):
)
@pytest.fixture
def mock_activity_calls(responses):
@pytest.fixture()
def _mock_activity_calls(responses):
"""
Mock responses when updating the activity state for multiple element of the same version
"""
......@@ -259,8 +258,8 @@ def mock_activity_calls(responses):
)
@pytest.fixture
def mock_elements_worker(monkeypatch, mock_worker_run_api):
@pytest.fixture()
def mock_elements_worker(monkeypatch, _mock_worker_run_api):
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
monkeypatch.setattr(sys, "argv", ["worker"])
worker = ElementsWorker()
......@@ -268,7 +267,7 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
return worker
@pytest.fixture
@pytest.fixture()
def mock_elements_worker_read_only(monkeypatch):
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
......@@ -277,7 +276,7 @@ def mock_elements_worker_read_only(monkeypatch):
return worker
@pytest.fixture
@pytest.fixture()
def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
"""
Mock a worker instance to list and retrieve a single element
......@@ -298,8 +297,19 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
return mock_elements_worker
@pytest.fixture
def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
@pytest.fixture()
def mock_cache_db(tmp_path):
cache_path = tmp_path / "db.sqlite"
init_cache_db(cache_path)
create_version_table()
create_tables()
return cache_path
@pytest.fixture()
def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
"""Build a BaseWorker using SQLite cache, also mocking a PONOS_TASK"""
monkeypatch.setattr(sys, "argv", ["worker"])
......@@ -309,13 +319,10 @@ def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
return worker
@pytest.fixture
def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
@pytest.fixture()
def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run_api):
"""Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
cache_path = tmp_path / "db.sqlite"
init_cache_db(cache_path)
create_version_table()
monkeypatch.setattr(sys, "argv", ["worker", "-d", str(cache_path)])
monkeypatch.setattr(sys, "argv", ["worker", "-d", str(mock_cache_db)])
worker = ElementsWorker(support_cache=True)
worker.configure()
......@@ -323,35 +330,34 @@ def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
return worker
@pytest.fixture
@pytest.fixture()
def fake_page_element():
with open(FIXTURES_DIR / "page_element.json", "r") as f:
return json.load(f)
return json.loads((FIXTURES_DIR / "page_element.json").read_text())
@pytest.fixture
@pytest.fixture()
def fake_ufcn_worker_version():
with open(FIXTURES_DIR / "ufcn_line_historical_worker_version.json", "r") as f:
return json.load(f)
return json.loads(
(FIXTURES_DIR / "ufcn_line_historical_worker_version.json").read_text()
)
@pytest.fixture
@pytest.fixture()
def fake_transcriptions_small():
with open(FIXTURES_DIR / "line_transcriptions_small.json", "r") as f:
return json.load(f)
return json.loads((FIXTURES_DIR / "line_transcriptions_small.json").read_text())
@pytest.fixture
@pytest.fixture()
def model_file_dir():
return SAMPLES_DIR / "model_files"
@pytest.fixture
@pytest.fixture()
def model_file_dir_with_subfolder():
return SAMPLES_DIR / "root_folder"
@pytest.fixture
@pytest.fixture()
def fake_dummy_worker():
api_client = MockApiClient()
worker = ElementsWorker()
......@@ -359,34 +365,8 @@ def fake_dummy_worker():
return worker
@pytest.fixture
def fake_git_helper(mocker):
gitlab_helper = mocker.MagicMock()
return GitHelper(
"repo_url",
"/tmp/git_test/foo/",
"/tmp/test/path/",
"tmp_workflow_id",
gitlab_helper,
)
@pytest.fixture
def fake_gitlab_helper_factory():
# have to set up the responses, before creating the client
def run():
return GitlabHelper(
"balsac_exporter/balsac-exported-xmls-testing",
"https://gitlab.com",
"<GITLAB_TOKEN>",
"gitlab_branch",
)
return run
@pytest.fixture
def mock_cached_elements():
@pytest.fixture()
def _mock_cached_elements(mock_cache_db):
"""Insert few elements in local cache"""
CachedElement.create(
id=UUID("99999999-9999-9999-9999-999999999999"),
......@@ -430,8 +410,8 @@ def mock_cached_elements():
assert CachedElement.select().count() == 5
@pytest.fixture
def mock_cached_images():
@pytest.fixture()
def _mock_cached_images(mock_cache_db):
"""Insert few elements in local cache"""
CachedImage.create(
id=UUID("99999999-9999-9999-9999-999999999999"),
......@@ -442,8 +422,8 @@ def mock_cached_images():
assert CachedImage.select().count() == 1
@pytest.fixture
def mock_cached_transcriptions():
@pytest.fixture()
def _mock_cached_transcriptions(mock_cache_db):
"""Insert few transcriptions in local cache, on a shared element"""
CachedElement.create(
id=UUID("11111111-1111-1111-1111-111111111111"),
......@@ -529,7 +509,7 @@ def mock_cached_transcriptions():
)
@pytest.fixture(scope="function")
@pytest.fixture()
def mock_databases(tmp_path):
"""
Initialize several temporary databases
......@@ -612,7 +592,7 @@ def mock_databases(tmp_path):
return out
@pytest.fixture
@pytest.fixture()
def default_dataset():
return Dataset(
**{
......@@ -630,8 +610,8 @@ def default_dataset():
)
@pytest.fixture
def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
@pytest.fixture()
def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
monkeypatch.setenv("PONOS_TASK", "my_task")
mocker.patch.object(sys, "argv", ["worker"])
......@@ -644,7 +624,7 @@ def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
return dataset_worker
@pytest.fixture
@pytest.fixture()
def mock_dev_dataset_worker(mocker):
mocker.patch.object(
sys,
......@@ -668,7 +648,7 @@ def mock_dev_dataset_worker(mocker):
return dataset_worker
@pytest.fixture
@pytest.fixture()
def default_artifact():
return Artifact(
**{
......