Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (5)
Showing
with 538 additions and 131 deletions
......@@ -452,8 +452,9 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
if self.generator:
assert (
dataset.state == DatasetState.Open.value
), "When generating a new dataset, its state should be Open."
dataset.state
in [DatasetState.Open.value, DatasetState.Error.value]
), "When generating a new dataset, its state should be Open or Error."
else:
assert (
dataset.state == DatasetState.Complete.value
......
......@@ -72,7 +72,7 @@ class BaseWorker:
self.parser.add_argument(
"-c",
"--config",
help="Alternative configuration file when running without a Worker Version ID",
help="Alternative configuration file when running without a Worker Run ID",
type=open,
)
self.parser.add_argument(
......@@ -94,7 +94,7 @@ class BaseWorker:
"--dev",
help=(
"Run worker in developer mode. "
"Worker will be in read-only state even if a worker_version is supplied. "
"Worker will be in read-only state even if a worker run is supplied. "
),
action="store_true",
default=False,
......@@ -176,6 +176,14 @@ class BaseWorker:
"""
return self.args.dev or self.worker_run_id is None
@property
def worker_version_id(self):
"""Deprecated property previously used to retrieve the current WorkerVersion ID.
:raises DeprecationWarning: Whenever `worker_version_id` is used.
"""
raise DeprecationWarning("`worker_version_id` usage is deprecated")
def setup_api_client(self):
"""
Create an ArkindexClient to make API requests towards Arkindex instances.
......@@ -243,10 +251,6 @@ class BaseWorker:
# Load worker version information
worker_version = worker_run["worker_version"]
# Store worker version id
self.worker_version_id = worker_version["id"]
self.worker_details = worker_version["worker"]
logger.info(f"Loaded {worker_run['summary']} from API")
......
......@@ -154,13 +154,6 @@ class ClassificationMixin:
# Detect already existing classification
if e.status_code == 400 and "non_field_errors" in e.content:
if (
"The fields element, worker_version, ml_class must make a unique set."
in e.content["non_field_errors"]
):
logger.warning(
f"This worker version has already set {ml_class} on element {element.id}"
)
elif (
"The fields element, worker_run, ml_class must make a unique set."
in e.content["non_field_errors"]
):
......
......@@ -4,6 +4,7 @@ ElementsWorker methods for elements and element types.
from collections.abc import Iterable
from typing import NamedTuple
from uuid import UUID
from warnings import warn
from peewee import IntegrityError
......@@ -422,6 +423,13 @@ class ElementMixin:
"""
List children of an element.
Warns:
----
The following parameters are **deprecated**:
- `transcription_worker_version` in favor of `transcription_worker_run`
- `worker_version` in favor of `worker_run`
:param element: Parent element to find children of.
:param folder: Restrict to or exclude elements with folder types.
This parameter is not supported when caching is enabled.
......@@ -429,9 +437,9 @@ class ElementMixin:
This parameter is not supported when caching is enabled.
:param recursive: Look for elements recursively (grand-children, etc.)
This parameter is not supported when caching is enabled.
:param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
:param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription.
This parameter is not supported when caching is enabled.
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription.
This parameter is not supported when caching is enabled.
:param type: Restrict to elements with a specific type slug
This parameter is not supported when caching is enabled.
......@@ -447,7 +455,7 @@ class ElementMixin:
:param with_zone: Include the ``zone`` attribute in the response,
holding the element's image and polygon.
This parameter is not supported when caching is enabled.
:param worker_version: Restrict to elements created by a worker version with this UUID.
:param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
:param worker_run: Restrict to elements created by a worker run with this UUID.
:return: An iterable of dicts from the ``ListElementChildren`` API endpoint,
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
......@@ -466,6 +474,11 @@ class ElementMixin:
assert isinstance(recursive, bool), "recursive should be of type bool"
query_params["recursive"] = recursive
if transcription_worker_version is not None:
warn(
"`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
transcription_worker_version, str | bool
), "transcription_worker_version should be of type str or bool"
......@@ -506,6 +519,11 @@ class ElementMixin:
assert isinstance(with_zone, bool), "with_zone should be of type bool"
query_params["with_zone"] = with_zone
if worker_version is not None:
warn(
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
worker_version, str | bool
), "worker_version should be of type str or bool"
......@@ -584,6 +602,13 @@ class ElementMixin:
"""
List parents of an element.
Warns:
----
The following parameters are **deprecated**:
- `transcription_worker_version` in favor of `transcription_worker_run`
- `worker_version` in favor of `worker_run`
:param element: Child element to find parents of.
:param folder: Restrict to or exclude elements with folder types.
This parameter is not supported when caching is enabled.
......@@ -591,7 +616,7 @@ class ElementMixin:
This parameter is not supported when caching is enabled.
:param recursive: Look for elements recursively (grand-children, etc.)
This parameter is not supported when caching is enabled.
:param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
:param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID.
This parameter is not supported when caching is enabled.
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
This parameter is not supported when caching is enabled.
......@@ -609,7 +634,7 @@ class ElementMixin:
:param with_zone: Include the ``zone`` attribute in the response,
holding the element's image and polygon.
This parameter is not supported when caching is enabled.
:param worker_version: Restrict to elements created by a worker version with this UUID.
:param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
:param worker_run: Restrict to elements created by a worker run with this UUID.
:return: An iterable of dicts from the ``ListElementParents`` API endpoint,
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
......@@ -628,6 +653,11 @@ class ElementMixin:
assert isinstance(recursive, bool), "recursive should be of type bool"
query_params["recursive"] = recursive
if transcription_worker_version is not None:
warn(
"`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
transcription_worker_version, str | bool
), "transcription_worker_version should be of type str or bool"
......@@ -668,6 +698,11 @@ class ElementMixin:
assert isinstance(with_zone, bool), "with_zone should be of type bool"
query_params["with_zone"] = with_zone
if worker_version is not None:
warn(
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
worker_version, str | bool
), "worker_version should be of type str or bool"
......
......@@ -4,6 +4,7 @@ ElementsWorker methods for entities.
from operator import itemgetter
from typing import TypedDict
from warnings import warn
from peewee import IntegrityError
......@@ -297,13 +298,21 @@ class EntityMixin:
self,
transcription: Transcription,
worker_version: str | bool | None = None,
worker_run: str | bool | None = None,
):
"""
List existing entities on a transcription
This method does not support cache
Warns:
----
The following parameters are **deprecated**:
- `worker_version` in favor of `worker_run`
:param transcription: The transcription to list entities on.
:param worker_version: Restrict to entities created by a worker version with this UUID. Set to False to look for manually created transcriptions.
:param worker_version: **Deprecated** Restrict to entities created by a worker version with this UUID. Set to False to look for manually created entities.
:param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities.
"""
query_params = {}
assert transcription and isinstance(
......@@ -311,6 +320,11 @@ class EntityMixin:
), "transcription shouldn't be null and should be a Transcription"
if worker_version is not None:
warn(
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
worker_version, str | bool
), "worker_version should be of type str or bool"
......@@ -320,6 +334,15 @@ class EntityMixin:
worker_version is False
), "if of type bool, worker_version can only be set to False"
query_params["worker_version"] = worker_version
if worker_run is not None:
assert isinstance(
worker_run, str | bool
), "worker_run should be of type str or bool"
if isinstance(worker_run, bool):
assert (
worker_run is False
), "if of type bool, worker_run can only be set to False"
query_params["worker_run"] = worker_run
return self.api_client.paginate(
"ListTranscriptionEntities", id=transcription.id, **query_params
......
......@@ -4,6 +4,7 @@ ElementsWorker methods for transcriptions.
from collections.abc import Iterable
from enum import Enum
from warnings import warn
from peewee import IntegrityError
......@@ -366,14 +367,22 @@ class TranscriptionMixin:
element_type: str | None = None,
recursive: bool | None = None,
worker_version: str | bool | None = None,
worker_run: str | bool | None = None,
) -> Iterable[dict] | Iterable[CachedTranscription]:
"""
List transcriptions on an element.
Warns:
----
The following parameters are **deprecated**:
- `worker_version` in favor of `worker_run`
:param element: The element to list transcriptions on.
:param element_type: Restrict to transcriptions whose elements have an element type with this slug.
:param recursive: Include transcriptions of any descendant of this element, recursively.
:param worker_version: Restrict to transcriptions created by a worker version with this UUID. Set to False to look for manually created transcriptions.
:param worker_version: **Deprecated** Restrict to transcriptions created by a worker version with this UUID. Set to False to look for manually created transcriptions.
:param worker_run: Restrict to transcriptions created by a worker run with this UUID. Set to False to look for manually created transcriptions.
:returns: An iterable of dicts representing each transcription,
or an iterable of CachedTranscription when cache support is enabled.
"""
......@@ -388,6 +397,11 @@ class TranscriptionMixin:
assert isinstance(recursive, bool), "recursive should be of type bool"
query_params["recursive"] = recursive
if worker_version is not None:
warn(
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
DeprecationWarning,
stacklevel=1,
)
assert isinstance(
worker_version, str | bool
), "worker_version should be of type str or bool"
......@@ -396,6 +410,15 @@ class TranscriptionMixin:
worker_version is False
), "if of type bool, worker_version can only be set to False"
query_params["worker_version"] = worker_version
if worker_run is not None:
assert isinstance(
worker_run, str | bool
), "worker_run should be of type str or bool"
if isinstance(worker_run, bool):
assert (
worker_run is False
), "if of type bool, worker_run can only be set to False"
query_params["worker_run"] = worker_run
if self.use_cache:
if not recursive:
......@@ -427,10 +450,27 @@ class TranscriptionMixin:
if worker_version is not None:
# If worker_version=False, filter by manual worker_version e.g. None
worker_version_id = worker_version if worker_version else None
transcriptions = transcriptions.where(
CachedTranscription.worker_version_id == worker_version_id
)
worker_version_id = worker_version or None
if worker_version_id:
transcriptions = transcriptions.where(
CachedTranscription.worker_version_id == worker_version_id
)
else:
transcriptions = transcriptions.where(
CachedTranscription.worker_version_id.is_null()
)
if worker_run is not None:
# If worker_run=False, filter by manual worker_run e.g. None
worker_run_id = worker_run or None
if worker_run_id:
transcriptions = transcriptions.where(
CachedTranscription.worker_run_id == worker_run_id
)
else:
transcriptions = transcriptions.where(
CachedTranscription.worker_run_id.is_null()
)
else:
transcriptions = self.api_client.paginate(
"ListTranscriptions", id=element.id, **query_params
......
"""
ElementsWorker methods for worker versions.
"""
import functools
from warnings import warn
def worker_version_deprecation(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
warn("WorkerVersion usage is deprecated.", DeprecationWarning, stacklevel=2)
return func(self, *args, **kwargs)
return wrapper
class WorkerVersionMixin:
@worker_version_deprecation
def get_worker_version(self, worker_version_id: str) -> dict:
"""
Warns:
----
This method is **deprecated**.
Retrieve a worker version, using the [ElementsWorker][arkindex_worker.worker.ElementsWorker]'s internal cache when possible.
:param worker_version_id: ID of the worker version to retrieve.
......@@ -22,8 +38,13 @@ class WorkerVersionMixin:
return worker_version
@worker_version_deprecation
def get_worker_version_slug(self, worker_version_id: str) -> str:
"""
Warns:
----
This method is **deprecated**.
Retrieve the slug of the worker of a worker version, from a worker version UUID.
Uses a worker version from the internal cache if possible, otherwise makes an API request.
......
......@@ -135,6 +135,3 @@ Many attributes are set on the worker during at the configuration stage. Here is
`worker_run_id`
: The ID of the `WorkerRun` corresponding object on the Arkindex instance. In Arkindex mode, this is used in `RetrieveWorkerRun` API call to retrieve the configuration and other necessary information. In developer mode, this is not set nor used.
`worker_version_id`
: The ID of the `WorkerVersion` object linked to the current `WorkerRun`. Like the `worker_run_id` attribute, this is not set nor used in developer mode.
......@@ -20,7 +20,6 @@ missing from the Arkindex instance.
```yaml
---
version: 2
type: worker
workers:
- slug: my_worker
......
......@@ -14,16 +14,11 @@ The following attributes are required in every `.arkindex.yml` file:
: Version of the configuration file in use. An error will occur if the version
number is not set to `2`.
`type`
: Type of the repository. Has to be set to `worker` for a repository holding Arkindex
workers.
### Example configuration
```yaml
---
version: 2
type: worker
workers:
- workers/config.yml
```
......@@ -33,8 +28,6 @@ the repository.
## Worker repository attributes
When the `type` is set to `worker`, the `workers` attribute is mandatory.
The `workers` attribute is a list of the following:
- Paths to a YAML file holding the configuration for a single worker
......@@ -362,7 +355,6 @@ If you have defined user-configurable parameters using these specifications, Ark
```yaml
---
version: 2
type: worker
workers:
# Path to a single YAML file
......
......@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "arkindex-base-worker"
version = "0.3.6"
version = "0.3.7-rc1"
description = "Base Worker to easily build Arkindex ML workflows"
license = { file = "LICENSE" }
dynamic = ["dependencies", "optional-dependencies"]
......
......@@ -466,6 +466,7 @@ def _mock_cached_transcriptions(mock_cache_db):
confidence=0.42,
orientation=TextOrientation.HorizontalLeftToRight,
worker_version_id=UUID("56785678-5678-5678-5678-567856785678"),
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
)
CachedTranscription.create(
id=UUID("22222222-2222-2222-2222-222222222222"),
......@@ -506,6 +507,7 @@ def _mock_cached_transcriptions(mock_cache_db):
confidence=0.42,
orientation=TextOrientation.HorizontalLeftToRight,
worker_version_id=None,
worker_run_id=None,
)
......
......@@ -86,7 +86,6 @@ def test_cli_default(mocker):
assert logger.level == logging.NOTSET
assert worker.api_client
assert worker.config == {"someKey": "someValue"} # from API
assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234"
logger.setLevel(logging.NOTSET)
......@@ -106,7 +105,6 @@ def test_cli_arg_verbose_given(mocker):
assert logger.level == logging.DEBUG
assert worker.api_client
assert worker.config == {"someKey": "someValue"} # from API
assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234"
logger.setLevel(logging.NOTSET)
......@@ -126,7 +124,6 @@ def test_cli_envvar_debug_given(mocker, monkeypatch):
assert logger.level == logging.DEBUG
assert worker.api_client
assert worker.config == {"someKey": "someValue"} # from API
assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234"
logger.setLevel(logging.NOTSET)
......@@ -142,7 +139,6 @@ def test_configure_dev_mode(mocker):
assert worker.args.dev is True
assert worker.process_information is None
assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678"
assert worker.is_read_only is True
assert worker.user_configuration == {}
......@@ -212,7 +208,6 @@ def test_configure_worker_run(mocker, responses, caplog):
("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
]
assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234"
assert worker.user_configuration == {"a": "b"}
......@@ -482,7 +477,6 @@ def test_configure_load_model_configuration(mocker, responses):
worker.configure()
assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234"
assert worker.model_configuration == {
"param1": "value1",
"param2": 2,
......
......@@ -430,7 +430,7 @@ def test_run_no_datasets(mocker, caplog, mock_dataset_worker, generator):
@pytest.mark.parametrize(
("generator", "error"),
[
(True, "When generating a new dataset, its state should be Open."),
(True, "When generating a new dataset, its state should be Open or Error."),
(False, "When processing an existing dataset, its state should be Complete."),
],
)
......
......@@ -24,7 +24,7 @@ def test_cli_default(monkeypatch):
)
)
monkeypatch.setenv("TASK_ELEMENTS", path)
monkeypatch.setenv("TASK_ELEMENTS", str(path))
monkeypatch.setattr(sys, "argv", ["worker"])
worker = ElementsWorker()
worker.configure()
......
......@@ -1770,13 +1770,11 @@ def test_list_element_children_wrong_with_metadata(mock_elements_worker):
@pytest.mark.parametrize(
("param", "value"),
[
("worker_version", 1234),
("worker_run", 1234),
("transcription_worker_version", 1234),
("transcription_worker_run", 1234),
],
)
def test_list_element_children_wrong_worker_version(mock_elements_worker, param, value):
def test_list_element_children_wrong_worker_run(mock_elements_worker, param, value):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
......@@ -1786,16 +1784,36 @@ def test_list_element_children_wrong_worker_version(mock_elements_worker, param,
)
@pytest.mark.parametrize(
("param", "alternative", "value"),
[
("worker_version", "worker_run", 1234),
("transcription_worker_version", "transcription_worker_run", 1234),
],
)
def test_list_element_children_wrong_worker_version(
mock_elements_worker, param, alternative, value
):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
), pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
mock_elements_worker.list_element_children(
element=elt,
**{param: value},
)
@pytest.mark.parametrize(
"param",
[
"worker_version",
"worker_run",
"transcription_worker_version",
"transcription_worker_run",
],
)
def test_list_element_children_wrong_bool_worker_version(mock_elements_worker, param):
def test_list_element_children_wrong_bool_worker_run(mock_elements_worker, param):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(
......@@ -1807,6 +1825,30 @@ def test_list_element_children_wrong_bool_worker_version(mock_elements_worker, p
)
@pytest.mark.parametrize(
("param", "alternative"),
[
("worker_version", "worker_run"),
("transcription_worker_version", "transcription_worker_run"),
],
)
def test_list_element_children_wrong_bool_worker_version(
mock_elements_worker, param, alternative
):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
), pytest.raises(
AssertionError, match=f"if of type bool, {param} can only be set to False"
):
mock_elements_worker.list_element_children(
element=elt,
**{param: True},
)
def test_list_element_children_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
......@@ -1942,10 +1984,15 @@ def test_list_element_children_manual_worker_version(responses, mock_elements_wo
},
)
for idx, child in enumerate(
mock_elements_worker.list_element_children(element=elt, worker_version=False)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
assert child == expected_children[idx]
for idx, child in enumerate(
mock_elements_worker.list_element_children(
element=elt, worker_version=False
)
):
assert child == expected_children[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
......@@ -2038,56 +2085,84 @@ def test_list_element_children_with_cache_unhandled_param(
},
("22222222-2222-2222-2222-222222222222",),
),
# Filter on element and worker version should give first two elements
# Filter on element and worker run should give second
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_version": "56785678-5678-5678-5678-567856785678",
"worker_run": "56785678-5678-5678-5678-567856785678",
},
(
"11111111-1111-1111-1111-111111111111",
"22222222-2222-2222-2222-222222222222",
),
("22222222-2222-2222-2222-222222222222",),
),
# Filter on element, type something and worker version should give first
# Filter on element, manual worker run should give first and third
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"type": "something",
"worker_version": "56785678-5678-5678-5678-567856785678",
"worker_run": False,
},
("11111111-1111-1111-1111-111111111111",),
(
"11111111-1111-1111-1111-111111111111",
"33333333-3333-3333-3333-333333333333",
),
),
# Filter on element, manual worker version should give third
],
)
def test_list_element_children_with_cache(
responses,
mock_elements_worker_with_cache,
filters,
expected_ids,
):
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_children(**filters)
assert elements.count() == len(expected_ids)
for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
assert child.id == UUID(expected_id)
# Check the worker never hits the API for elements
assert len(responses.calls) == len(BASE_API_CALLS)
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS
@pytest.mark.usefixtures("_mock_cached_elements")
@pytest.mark.parametrize(
("filters", "expected_ids"),
[
# Filter on element and worker version
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_version": False,
"worker_version": "56785678-5678-5678-5678-567856785678",
},
("33333333-3333-3333-3333-333333333333",),
(
"11111111-1111-1111-1111-111111111111",
"22222222-2222-2222-2222-222222222222",
),
),
# Filter on element and worker run should give second
# Filter on element, type double_page and worker version
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_run": "56785678-5678-5678-5678-567856785678",
"type": "page",
"worker_version": "56785678-5678-5678-5678-567856785678",
},
("22222222-2222-2222-2222-222222222222",),
),
# Filter on element, manual worker run should give first and third
# Filter on element, manual worker version
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_run": False,
"worker_version": False,
},
(
"11111111-1111-1111-1111-111111111111",
"33333333-3333-3333-3333-333333333333",
),
("33333333-3333-3333-3333-333333333333",),
),
],
)
def test_list_element_children_with_cache(
def test_list_element_children_with_cache_deprecation(
responses,
mock_elements_worker_with_cache,
filters,
......@@ -2096,8 +2171,11 @@ def test_list_element_children_with_cache(
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_children(**filters)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_children(**filters)
assert elements.count() == len(expected_ids)
for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
assert child.id == UUID(expected_id)
......@@ -2218,13 +2296,11 @@ def test_list_element_parents_wrong_with_metadata(mock_elements_worker):
@pytest.mark.parametrize(
("param", "value"),
[
("worker_version", 1234),
("worker_run", 1234),
("transcription_worker_version", 1234),
("transcription_worker_run", 1234),
],
)
def test_list_element_parents_wrong_worker_version(mock_elements_worker, param, value):
def test_list_element_parents_wrong_worker_run(mock_elements_worker, param, value):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
......@@ -2234,16 +2310,36 @@ def test_list_element_parents_wrong_worker_version(mock_elements_worker, param,
)
@pytest.mark.parametrize(
("param", "alternative", "value"),
[
("worker_version", "worker_run", 1234),
("transcription_worker_version", "transcription_worker_run", 1234),
],
)
def test_list_element_parents_wrong_worker_version(
mock_elements_worker, param, alternative, value
):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
), pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
mock_elements_worker.list_element_parents(
element=elt,
**{param: value},
)
@pytest.mark.parametrize(
"param",
[
"worker_version",
"worker_run",
"transcription_worker_version",
"transcription_worker_run",
],
)
def test_list_element_parents_wrong_bool_worker_version(mock_elements_worker, param):
def test_list_element_parents_wrong_bool_worker_run(mock_elements_worker, param):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(
......@@ -2255,6 +2351,30 @@ def test_list_element_parents_wrong_bool_worker_version(mock_elements_worker, pa
)
@pytest.mark.parametrize(
("param", "alternative"),
[
("worker_version", "worker_run"),
("transcription_worker_version", "transcription_worker_run"),
],
)
def test_list_element_parents_wrong_bool_worker_version(
mock_elements_worker, param, alternative
):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
), pytest.raises(
AssertionError, match=f"if of type bool, {param} can only be set to False"
):
mock_elements_worker.list_element_parents(
element=elt,
**{param: True},
)
def test_list_element_parents_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
......@@ -2390,10 +2510,13 @@ def test_list_element_parents_manual_worker_version(responses, mock_elements_wor
},
)
for idx, parent in enumerate(
mock_elements_worker.list_element_parents(element=elt, worker_version=False)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
assert parent == expected_parents[idx]
for idx, parent in enumerate(
mock_elements_worker.list_element_parents(element=elt, worker_version=False)
):
assert parent == expected_parents[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
......@@ -2482,6 +2605,50 @@ def test_list_element_parents_with_cache_unhandled_param(
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element and worker run
(
{
"element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
"worker_run": "56785678-5678-5678-5678-567856785678",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element, manual worker run
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_run": False,
},
"99999999-9999-9999-9999-999999999999",
),
],
)
def test_list_element_parents_with_cache(
responses,
mock_elements_worker_with_cache,
filters,
expected_id,
):
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_parents(**filters)
assert elements.count() == 1
for parent in elements.order_by("id"):
assert parent.id == UUID(expected_id)
# Check the worker never hits the API for elements
assert len(responses.calls) == len(BASE_API_CALLS)
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS
@pytest.mark.usefixtures("_mock_cached_elements")
@pytest.mark.parametrize(
("filters", "expected_id"),
[
# Filter on element and worker version
(
{
......@@ -2507,25 +2674,9 @@ def test_list_element_parents_with_cache_unhandled_param(
},
"99999999-9999-9999-9999-999999999999",
),
# Filter on element and worker run
(
{
"element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
"worker_run": "56785678-5678-5678-5678-567856785678",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element, manual worker run
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_run": False,
},
"99999999-9999-9999-9999-999999999999",
),
],
)
def test_list_element_parents_with_cache(
def test_list_element_parents_with_cache_deprecation(
responses,
mock_elements_worker_with_cache,
filters,
......@@ -2534,8 +2685,11 @@ def test_list_element_parents_with_cache(
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_parents(**filters)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_parents(**filters)
assert elements.count() == 1
for parent in elements.order_by("id"):
assert parent.id == UUID(expected_id)
......
......@@ -708,7 +708,7 @@ def test_create_transcription_entity_with_confidence_with_cache(
]
def test_list_transcription_entities(fake_dummy_worker):
def test_list_transcription_entities_deprecation(fake_dummy_worker):
transcription = Transcription({"id": "fake_transcription_id"})
worker_version = "worker_version_id"
fake_dummy_worker.api_client.add_response(
......@@ -717,8 +717,28 @@ def test_list_transcription_entities(fake_dummy_worker):
worker_version=worker_version,
response={"id": "entity_id"},
)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
assert fake_dummy_worker.list_transcription_entities(
transcription, worker_version=worker_version
) == {"id": "entity_id"}
assert len(fake_dummy_worker.api_client.history) == 1
assert len(fake_dummy_worker.api_client.responses) == 0
def test_list_transcription_entities(fake_dummy_worker):
transcription = Transcription({"id": "fake_transcription_id"})
worker_run = "worker_run_id"
fake_dummy_worker.api_client.add_response(
"ListTranscriptionEntities",
id=transcription.id,
worker_run=worker_run,
response={"id": "entity_id"},
)
assert fake_dummy_worker.list_transcription_entities(
transcription, worker_version
transcription, worker_run=worker_run
) == {"id": "entity_id"}
assert len(fake_dummy_worker.api_client.history) == 1
......
......@@ -1711,10 +1711,25 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker):
)
def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
def test_list_transcriptions_wrong_worker_run(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(
AssertionError, match="worker_run should be of type str or bool"
):
mock_elements_worker.list_transcriptions(
element=elt,
worker_run=1234,
)
def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
), pytest.raises(
AssertionError, match="worker_version should be of type str or bool"
):
mock_elements_worker.list_transcriptions(
......@@ -1723,10 +1738,25 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
)
def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(
AssertionError, match="if of type bool, worker_run can only be set to False"
):
mock_elements_worker.list_transcriptions(
element=elt,
worker_run=True,
)
def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
), pytest.raises(
AssertionError, match="if of type bool, worker_version can only be set to False"
):
mock_elements_worker.list_transcriptions(
......@@ -1784,6 +1814,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
"text": "hey",
"confidence": 0.42,
"worker_version_id": "56785678-5678-5678-5678-567856785678",
"worker_run_id": "56785678-5678-5678-5678-567856785678",
"element": None,
},
{
......@@ -1791,6 +1822,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
"text": "it's",
"confidence": 0.42,
"worker_version_id": "56785678-5678-5678-5678-567856785678",
"worker_run_id": "56785678-5678-5678-5678-567856785678",
"element": None,
},
{
......@@ -1798,6 +1830,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
"text": "me",
"confidence": 0.42,
"worker_version_id": "56785678-5678-5678-5678-567856785678",
"worker_run_id": "56785678-5678-5678-5678-567856785678",
"element": None,
},
]
......@@ -1836,6 +1869,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
"text": "hey",
"confidence": 0.42,
"worker_version_id": None,
"worker_run_id": None,
"element": None,
}
]
......@@ -1850,8 +1884,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
},
)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
for idx, transcription in enumerate(
mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
):
assert transcription == trans[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
(
"GET",
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
),
]
def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
trans = [
{
"id": "0000",
"text": "hey",
"confidence": 0.42,
"worker_version_id": None,
"worker_run_id": None,
"element": None,
}
]
responses.add(
responses.GET,
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
status=200,
json={
"count": 1,
"next": None,
"results": trans,
},
)
for idx, transcription in enumerate(
mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
mock_elements_worker.list_transcriptions(element=elt, worker_run=False)
):
assert transcription == trans[idx]
......@@ -1861,7 +1937,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
] == BASE_API_CALLS + [
(
"GET",
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
),
]
......@@ -1895,16 +1971,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
"66666666-6666-6666-6666-666666666666",
),
),
# Filter on element and worker_version should give first transcription
# Filter on element and worker run should give the first transcription
(
{
"element": CachedElement(
id="11111111-1111-1111-1111-111111111111", type="page"
),
"worker_version": "56785678-5678-5678-5678-567856785678",
"worker_run": "56785678-5678-5678-5678-567856785678",
},
("11111111-1111-1111-1111-111111111111",),
),
# Filter on element, manual worker run should give the sixth transcription
(
{
"element": CachedElement(
id="11111111-1111-1111-1111-111111111111", type="page"
),
"worker_run": False,
},
("66666666-6666-6666-6666-666666666666",),
),
# Filter recursively on element should give all transcriptions inserted
(
{
......@@ -1922,33 +2008,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
"66666666-6666-6666-6666-666666666666",
),
),
# Filter recursively on element and worker_version should give four transcriptions
# Filter recursively on element and element_type should give three transcriptions
(
{
"element": CachedElement(
id="11111111-1111-1111-1111-111111111111", type="page"
),
"worker_version": "90129012-9012-9012-9012-901290129012",
"element_type": "something_else",
"recursive": True,
},
(
"22222222-2222-2222-2222-222222222222",
"33333333-3333-3333-3333-333333333333",
"44444444-4444-4444-4444-444444444444",
"55555555-5555-5555-5555-555555555555",
),
),
# Filter recursively on element and element_type should give three transcriptions
],
)
def test_list_transcriptions_with_cache(
responses, mock_elements_worker_with_cache, filters, expected_ids
):
# Check we have 5 elements already present in database
assert CachedTranscription.select().count() == 6
# Query database through cache
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
assert transcriptions.count() == len(expected_ids)
for transcription, expected_id in zip(
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
):
assert transcription.id == UUID(expected_id)
# Check the worker never hits the API for elements
assert len(responses.calls) == len(BASE_API_CALLS)
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS
@pytest.mark.usefixtures("_mock_cached_transcriptions")
@pytest.mark.parametrize(
("filters", "expected_ids"),
[
# Filter on element and worker_version should give first transcription
(
{
"element": CachedElement(
id="11111111-1111-1111-1111-111111111111", type="page"
),
"element_type": "something_else",
"worker_version": "56785678-5678-5678-5678-567856785678",
},
("11111111-1111-1111-1111-111111111111",),
),
# Filter recursively on element and worker_version should give four transcriptions
(
{
"element": CachedElement(
id="11111111-1111-1111-1111-111111111111", type="page"
),
"worker_version": "90129012-9012-9012-9012-901290129012",
"recursive": True,
},
(
"22222222-2222-2222-2222-222222222222",
"33333333-3333-3333-3333-333333333333",
"44444444-4444-4444-4444-444444444444",
"55555555-5555-5555-5555-555555555555",
),
......@@ -1965,14 +2088,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
),
],
)
def test_list_transcriptions_with_cache(
def test_list_transcriptions_with_cache_deprecation(
responses, mock_elements_worker_with_cache, filters, expected_ids
):
# Check we have 5 elements already present in database
assert CachedTranscription.select().count() == 6
# Query database through cache
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
with pytest.deprecated_call(
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
):
# Query database through cache
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
assert transcriptions.count() == len(expected_ids)
for transcription, expected_id in zip(
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
......
......@@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker):
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
assert res == response
assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
......@@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker):
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
assert response_1 == response
assert response_1 == response_2
......@@ -51,12 +55,16 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker):
"worker": {"slug": "mock_slug"},
}
slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
assert slug == "mock_slug"
def test_get_worker_version_slug_none(fake_dummy_worker):
with pytest.raises(ValueError, match="No worker version ID"):
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
with pytest.deprecated_call(
match="WorkerVersion usage is deprecated."
), pytest.raises(ValueError, match="No worker version ID"):
fake_dummy_worker.get_worker_version_slug(None)
......
---
version: 2
type: worker
workers:
- slug: {{ cookiecutter.slug }}
name: {{ cookiecutter.name }}
......