From c18d47068ec67417fa02aec8cba5d792e1703d2d Mon Sep 17 00:00:00 2001 From: Eva Bardou <bardou@teklia.com> Date: Tue, 30 Jan 2024 16:27:58 +0000 Subject: [PATCH] Deprecate `worker_version` usage and support `worker_run` where it was missing --- arkindex_worker/worker/base.py | 16 +- arkindex_worker/worker/classification.py | 7 - arkindex_worker/worker/element.py | 45 ++- arkindex_worker/worker/entity.py | 25 +- arkindex_worker/worker/transcription.py | 50 +++- arkindex_worker/worker/version.py | 21 ++ docs/contents/implem/configure.md | 3 - tests/conftest.py | 2 + tests/test_base_worker.py | 6 - tests/test_elements_worker/test_elements.py | 276 ++++++++++++++---- tests/test_elements_worker/test_entities.py | 24 +- .../test_transcriptions.py | 154 +++++++++- tests/test_elements_worker/test_worker.py | 18 +- 13 files changed, 532 insertions(+), 115 deletions(-) diff --git a/arkindex_worker/worker/base.py b/arkindex_worker/worker/base.py index 573f1bd1..d638df80 100644 --- a/arkindex_worker/worker/base.py +++ b/arkindex_worker/worker/base.py @@ -72,7 +72,7 @@ class BaseWorker: self.parser.add_argument( "-c", "--config", - help="Alternative configuration file when running without a Worker Version ID", + help="Alternative configuration file when running without a Worker Run ID", type=open, ) self.parser.add_argument( @@ -94,7 +94,7 @@ class BaseWorker: "--dev", help=( "Run worker in developer mode. " - "Worker will be in read-only state even if a worker_version is supplied. " + "Worker will be in read-only state even if a worker run is supplied. " ), action="store_true", default=False, @@ -176,6 +176,14 @@ class BaseWorker: """ return self.args.dev or self.worker_run_id is None + @property + def worker_version_id(self): + """Deprecated property previously used to retrieve the current WorkerVersion ID. + + :raises DeprecationWarning: Whenever `worker_version_id` is used. + """ + raise DeprecationWarning("`worker_version_id` usage is deprecated") + def setup_api_client(self): """ Create an ArkindexClient to make API requests towards Arkindex instances. @@ -243,10 +251,6 @@ class BaseWorker: # Load worker version information worker_version = worker_run["worker_version"] - - # Store worker version id - self.worker_version_id = worker_version["id"] - self.worker_details = worker_version["worker"] logger.info(f"Loaded {worker_run['summary']} from API") diff --git a/arkindex_worker/worker/classification.py b/arkindex_worker/worker/classification.py index 37f42dd8..bc9257b8 100644 --- a/arkindex_worker/worker/classification.py +++ b/arkindex_worker/worker/classification.py @@ -154,13 +154,6 @@ class ClassificationMixin: # Detect already existing classification if e.status_code == 400 and "non_field_errors" in e.content: if ( - "The fields element, worker_version, ml_class must make a unique set." - in e.content["non_field_errors"] - ): - logger.warning( - f"This worker version has already set {ml_class} on element {element.id}" - ) - elif ( "The fields element, worker_run, ml_class must make a unique set." in e.content["non_field_errors"] ): diff --git a/arkindex_worker/worker/element.py b/arkindex_worker/worker/element.py index a1f09849..0ab37242 100644 --- a/arkindex_worker/worker/element.py +++ b/arkindex_worker/worker/element.py @@ -4,6 +4,7 @@ ElementsWorker methods for elements and element types. from collections.abc import Iterable from typing import NamedTuple from uuid import UUID +from warnings import warn from peewee import IntegrityError @@ -422,6 +423,13 @@ class ElementMixin: """ List children of an element. + Warns: + ---- + The following parameters are **deprecated**: + + - `transcription_worker_version` in favor of `transcription_worker_run` + - `worker_version` in favor of `worker_run` + :param element: Parent element to find children of. :param folder: Restrict to or exclude elements with folder types. This parameter is not supported when caching is enabled. @@ -429,9 +437,9 @@ class ElementMixin: This parameter is not supported when caching is enabled. :param recursive: Look for elements recursively (grand-children, etc.) This parameter is not supported when caching is enabled. - :param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID. + :param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription. This parameter is not supported when caching is enabled. - :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. + :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription. This parameter is not supported when caching is enabled. :param type: Restrict to elements with a specific type slug This parameter is not supported when caching is enabled. @@ -447,7 +455,7 @@ class ElementMixin: :param with_zone: Include the ``zone`` attribute in the response, holding the element's image and polygon. This parameter is not supported when caching is enabled. - :param worker_version: Restrict to elements created by a worker version with this UUID. + :param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID. :param worker_run: Restrict to elements created by a worker run with this UUID. :return: An iterable of dicts from the ``ListElementChildren`` API endpoint, or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled. @@ -466,6 +474,11 @@ class ElementMixin: assert isinstance(recursive, bool), "recursive should be of type bool" query_params["recursive"] = recursive if transcription_worker_version is not None: + warn( + "`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( transcription_worker_version, str | bool ), "transcription_worker_version should be of type str or bool" @@ -506,6 +519,11 @@ class ElementMixin: assert isinstance(with_zone, bool), "with_zone should be of type bool" query_params["with_zone"] = with_zone if worker_version is not None: + warn( + "`worker_version` usage is deprecated. Consider using `worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( worker_version, str | bool ), "worker_version should be of type str or bool" @@ -584,6 +602,13 @@ class ElementMixin: """ List parents of an element. + Warns: + ---- + The following parameters are **deprecated**: + + - `transcription_worker_version` in favor of `transcription_worker_run` + - `worker_version` in favor of `worker_run` + :param element: Child element to find parents of. :param folder: Restrict to or exclude elements with folder types. This parameter is not supported when caching is enabled. @@ -591,7 +616,7 @@ class ElementMixin: This parameter is not supported when caching is enabled. :param recursive: Look for elements recursively (grand-children, etc.) This parameter is not supported when caching is enabled. - :param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID. + :param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. This parameter is not supported when caching is enabled. :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. This parameter is not supported when caching is enabled. @@ -609,7 +634,7 @@ class ElementMixin: :param with_zone: Include the ``zone`` attribute in the response, holding the element's image and polygon. This parameter is not supported when caching is enabled. - :param worker_version: Restrict to elements created by a worker version with this UUID. + :param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID. :param worker_run: Restrict to elements created by a worker run with this UUID. :return: An iterable of dicts from the ``ListElementParents`` API endpoint, or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled. @@ -628,6 +653,11 @@ class ElementMixin: assert isinstance(recursive, bool), "recursive should be of type bool" query_params["recursive"] = recursive if transcription_worker_version is not None: + warn( + "`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( transcription_worker_version, str | bool ), "transcription_worker_version should be of type str or bool" @@ -668,6 +698,11 @@ class ElementMixin: assert isinstance(with_zone, bool), "with_zone should be of type bool" query_params["with_zone"] = with_zone if worker_version is not None: + warn( + "`worker_version` usage is deprecated. Consider using `worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( worker_version, str | bool ), "worker_version should be of type str or bool" diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index bb52291c..07fd8a6d 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -4,6 +4,7 @@ ElementsWorker methods for entities. from operator import itemgetter from typing import TypedDict +from warnings import warn from peewee import IntegrityError @@ -297,13 +298,21 @@ class EntityMixin: self, transcription: Transcription, worker_version: str | bool | None = None, + worker_run: str | bool | None = None, ): """ List existing entities on a transcription This method does not support cache + Warns: + ---- + The following parameters are **deprecated**: + + - `worker_version` in favor of `worker_run` + :param transcription: The transcription to list entities on. - :param worker_version: Restrict to entities created by a worker version with this UUID. Set to False to look for manually created transcriptions. + :param worker_version: **Deprecated** Restrict to entities created by a worker version with this UUID. Set to False to look for manually created entities. + :param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities. """ query_params = {} assert transcription and isinstance( @@ -311,6 +320,11 @@ class EntityMixin: ), "transcription shouldn't be null and should be a Transcription" if worker_version is not None: + warn( + "`worker_version` usage is deprecated. Consider using `worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( worker_version, str | bool ), "worker_version should be of type str or bool" @@ -320,6 +334,15 @@ class EntityMixin: worker_version is False ), "if of type bool, worker_version can only be set to False" query_params["worker_version"] = worker_version + if worker_run is not None: + assert isinstance( + worker_run, str | bool + ), "worker_run should be of type str or bool" + if isinstance(worker_run, bool): + assert ( + worker_run is False + ), "if of type bool, worker_run can only be set to False" + query_params["worker_run"] = worker_run return self.api_client.paginate( "ListTranscriptionEntities", id=transcription.id, **query_params diff --git a/arkindex_worker/worker/transcription.py b/arkindex_worker/worker/transcription.py index ec3781a9..80300cfc 100644 --- a/arkindex_worker/worker/transcription.py +++ b/arkindex_worker/worker/transcription.py @@ -4,6 +4,7 @@ ElementsWorker methods for transcriptions. from collections.abc import Iterable from enum import Enum +from warnings import warn from peewee import IntegrityError @@ -366,14 +367,22 @@ class TranscriptionMixin: element_type: str | None = None, recursive: bool | None = None, worker_version: str | bool | None = None, + worker_run: str | bool | None = None, ) -> Iterable[dict] | Iterable[CachedTranscription]: """ List transcriptions on an element. + Warns: + ---- + The following parameters are **deprecated**: + + - `worker_version` in favor of `worker_run` + :param element: The element to list transcriptions on. :param element_type: Restrict to transcriptions whose elements have an element type with this slug. :param recursive: Include transcriptions of any descendant of this element, recursively. - :param worker_version: Restrict to transcriptions created by a worker version with this UUID. Set to False to look for manually created transcriptions. + :param worker_version: **Deprecated** Restrict to transcriptions created by a worker version with this UUID. Set to False to look for manually created transcriptions. + :param worker_run: Restrict to transcriptions created by a worker run with this UUID. Set to False to look for manually created transcriptions. :returns: An iterable of dicts representing each transcription, or an iterable of CachedTranscription when cache support is enabled. """ @@ -388,6 +397,11 @@ class TranscriptionMixin: assert isinstance(recursive, bool), "recursive should be of type bool" query_params["recursive"] = recursive if worker_version is not None: + warn( + "`worker_version` usage is deprecated. Consider using `worker_run` instead.", + DeprecationWarning, + stacklevel=1, + ) assert isinstance( worker_version, str | bool ), "worker_version should be of type str or bool" @@ -396,6 +410,15 @@ class TranscriptionMixin: worker_version is False ), "if of type bool, worker_version can only be set to False" query_params["worker_version"] = worker_version + if worker_run is not None: + assert isinstance( + worker_run, str | bool + ), "worker_run should be of type str or bool" + if isinstance(worker_run, bool): + assert ( + worker_run is False + ), "if of type bool, worker_run can only be set to False" + query_params["worker_run"] = worker_run if self.use_cache: if not recursive: @@ -427,10 +450,27 @@ class TranscriptionMixin: if worker_version is not None: # If worker_version=False, filter by manual worker_version e.g. None - worker_version_id = worker_version if worker_version else None - transcriptions = transcriptions.where( - CachedTranscription.worker_version_id == worker_version_id - ) + worker_version_id = worker_version or None + if worker_version_id: + transcriptions = transcriptions.where( + CachedTranscription.worker_version_id == worker_version_id + ) + else: + transcriptions = transcriptions.where( + CachedTranscription.worker_version_id.is_null() + ) + + if worker_run is not None: + # If worker_run=False, filter by manual worker_run e.g. None + worker_run_id = worker_run or None + if worker_run_id: + transcriptions = transcriptions.where( + CachedTranscription.worker_run_id == worker_run_id + ) + else: + transcriptions = transcriptions.where( + CachedTranscription.worker_run_id.is_null() + ) else: transcriptions = self.api_client.paginate( "ListTranscriptions", id=element.id, **query_params diff --git a/arkindex_worker/worker/version.py b/arkindex_worker/worker/version.py index d71e4509..ba9a65e3 100644 --- a/arkindex_worker/worker/version.py +++ b/arkindex_worker/worker/version.py @@ -1,11 +1,27 @@ """ ElementsWorker methods for worker versions. """ +import functools +from warnings import warn + + +def worker_version_deprecation(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + warn("WorkerVersion usage is deprecated.", DeprecationWarning, stacklevel=2) + return func(self, *args, **kwargs) + + return wrapper class WorkerVersionMixin: + @worker_version_deprecation def get_worker_version(self, worker_version_id: str) -> dict: """ + Warns: + ---- + This method is **deprecated**. + Retrieve a worker version, using the [ElementsWorker][arkindex_worker.worker.ElementsWorker]'s internal cache when possible. :param worker_version_id: ID of the worker version to retrieve. @@ -22,8 +38,13 @@ class WorkerVersionMixin: return worker_version + @worker_version_deprecation def get_worker_version_slug(self, worker_version_id: str) -> str: """ + Warns: + ---- + This method is **deprecated**. + Retrieve the slug of the worker of a worker version, from a worker version UUID. Uses a worker version from the internal cache if possible, otherwise makes an API request. diff --git a/docs/contents/implem/configure.md b/docs/contents/implem/configure.md index cdb425e4..903959f4 100644 --- a/docs/contents/implem/configure.md +++ b/docs/contents/implem/configure.md @@ -135,6 +135,3 @@ Many attributes are set on the worker during at the configuration stage. Here is `worker_run_id` : The ID of the `WorkerRun` corresponding object on the Arkindex instance. In Arkindex mode, this is used in `RetrieveWorkerRun` API call to retrieve the configuration and other necessary information. In developer mode, this is not set nor used. - -`worker_version_id` -: The ID of the `WorkerVersion` object linked to the current `WorkerRun`. Like the `worker_run_id` attribute, this is not set nor used in developer mode. diff --git a/tests/conftest.py b/tests/conftest.py index 4155fbe1..c52e60f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -466,6 +466,7 @@ def _mock_cached_transcriptions(mock_cache_db): confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=UUID("56785678-5678-5678-5678-567856785678"), + worker_run_id=UUID("56785678-5678-5678-5678-567856785678"), ) CachedTranscription.create( id=UUID("22222222-2222-2222-2222-222222222222"), @@ -506,6 +507,7 @@ def _mock_cached_transcriptions(mock_cache_db): confidence=0.42, orientation=TextOrientation.HorizontalLeftToRight, worker_version_id=None, + worker_run_id=None, ) diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py index 75accdd3..262a3b7b 100644 --- a/tests/test_base_worker.py +++ b/tests/test_base_worker.py @@ -86,7 +86,6 @@ def test_cli_default(mocker): assert logger.level == logging.NOTSET assert worker.api_client assert worker.config == {"someKey": "someValue"} # from API - assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" logger.setLevel(logging.NOTSET) @@ -106,7 +105,6 @@ def test_cli_arg_verbose_given(mocker): assert logger.level == logging.DEBUG assert worker.api_client assert worker.config == {"someKey": "someValue"} # from API - assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" logger.setLevel(logging.NOTSET) @@ -126,7 +124,6 @@ def test_cli_envvar_debug_given(mocker, monkeypatch): assert logger.level == logging.DEBUG assert worker.api_client assert worker.config == {"someKey": "someValue"} # from API - assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" logger.setLevel(logging.NOTSET) @@ -142,7 +139,6 @@ def test_configure_dev_mode(mocker): assert worker.args.dev is True assert worker.process_information is None - assert worker.worker_run_id == "56785678-5678-5678-5678-567856785678" assert worker.is_read_only is True assert worker.user_configuration == {} @@ -212,7 +208,6 @@ def test_configure_worker_run(mocker, responses, caplog): ("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"), ] - assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" assert worker.user_configuration == {"a": "b"} @@ -482,7 +477,6 @@ def test_configure_load_model_configuration(mocker, responses): worker.configure() - assert worker.worker_version_id == "12341234-1234-1234-1234-123412341234" assert worker.model_configuration == { "param1": "value1", "param2": 2, diff --git a/tests/test_elements_worker/test_elements.py b/tests/test_elements_worker/test_elements.py index 2445f230..32872abf 100644 --- a/tests/test_elements_worker/test_elements.py +++ b/tests/test_elements_worker/test_elements.py @@ -1770,13 +1770,11 @@ def test_list_element_children_wrong_with_metadata(mock_elements_worker): @pytest.mark.parametrize( ("param", "value"), [ - ("worker_version", 1234), ("worker_run", 1234), - ("transcription_worker_version", 1234), ("transcription_worker_run", 1234), ], ) -def test_list_element_children_wrong_worker_version(mock_elements_worker, param, value): +def test_list_element_children_wrong_worker_run(mock_elements_worker, param, value): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"): @@ -1786,16 +1784,36 @@ def test_list_element_children_wrong_worker_version(mock_elements_worker, param, ) +@pytest.mark.parametrize( + ("param", "alternative", "value"), + [ + ("worker_version", "worker_run", 1234), + ("transcription_worker_version", "transcription_worker_run", 1234), + ], +) +def test_list_element_children_wrong_worker_version( + mock_elements_worker, param, alternative, value +): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead." + ), pytest.raises(AssertionError, match=f"{param} should be of type str or bool"): + mock_elements_worker.list_element_children( + element=elt, + **{param: value}, + ) + + @pytest.mark.parametrize( "param", [ - "worker_version", "worker_run", - "transcription_worker_version", "transcription_worker_run", ], ) -def test_list_element_children_wrong_bool_worker_version(mock_elements_worker, param): +def test_list_element_children_wrong_bool_worker_run(mock_elements_worker, param): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises( @@ -1807,6 +1825,30 @@ def test_list_element_children_wrong_bool_worker_version(mock_elements_worker, p ) +@pytest.mark.parametrize( + ("param", "alternative"), + [ + ("worker_version", "worker_run"), + ("transcription_worker_version", "transcription_worker_run"), + ], +) +def test_list_element_children_wrong_bool_worker_version( + mock_elements_worker, param, alternative +): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead." + ), pytest.raises( + AssertionError, match=f"if of type bool, {param} can only be set to False" + ): + mock_elements_worker.list_element_children( + element=elt, + **{param: True}, + ) + + def test_list_element_children_api_error(responses, mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( @@ -1942,10 +1984,15 @@ def test_list_element_children_manual_worker_version(responses, mock_elements_wo }, ) - for idx, child in enumerate( - mock_elements_worker.list_element_children(element=elt, worker_version=False) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." ): - assert child == expected_children[idx] + for idx, child in enumerate( + mock_elements_worker.list_element_children( + element=elt, worker_version=False + ) + ): + assert child == expected_children[idx] assert len(responses.calls) == len(BASE_API_CALLS) + 1 assert [ @@ -2038,56 +2085,84 @@ def test_list_element_children_with_cache_unhandled_param( }, ("22222222-2222-2222-2222-222222222222",), ), - # Filter on element and worker version should give first two elements + # Filter on element and worker run should give second ( { "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "worker_version": "56785678-5678-5678-5678-567856785678", + "worker_run": "56785678-5678-5678-5678-567856785678", }, - ( - "11111111-1111-1111-1111-111111111111", - "22222222-2222-2222-2222-222222222222", - ), + ("22222222-2222-2222-2222-222222222222",), ), - # Filter on element, type something and worker version should give first + # Filter on element, manual worker run should give first and third ( { "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "type": "something", - "worker_version": "56785678-5678-5678-5678-567856785678", + "worker_run": False, }, - ("11111111-1111-1111-1111-111111111111",), + ( + "11111111-1111-1111-1111-111111111111", + "33333333-3333-3333-3333-333333333333", + ), ), - # Filter on element, manual worker version should give third + ], +) +def test_list_element_children_with_cache( + responses, + mock_elements_worker_with_cache, + filters, + expected_ids, +): + # Check we have 5 elements already present in database + assert CachedElement.select().count() == 5 + + # Query database through cache + elements = mock_elements_worker_with_cache.list_element_children(**filters) + assert elements.count() == len(expected_ids) + for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True): + assert child.id == UUID(expected_id) + + # Check the worker never hits the API for elements + assert len(responses.calls) == len(BASE_API_CALLS) + assert [ + (call.request.method, call.request.url) for call in responses.calls + ] == BASE_API_CALLS + + +@pytest.mark.usefixtures("_mock_cached_elements") +@pytest.mark.parametrize( + ("filters", "expected_ids"), + [ + # Filter on element and worker version ( { "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "worker_version": False, + "worker_version": "56785678-5678-5678-5678-567856785678", }, - ("33333333-3333-3333-3333-333333333333",), + ( + "11111111-1111-1111-1111-111111111111", + "22222222-2222-2222-2222-222222222222", + ), ), - # Filter on element and worker run should give second + # Filter on element, type double_page and worker version ( { "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "worker_run": "56785678-5678-5678-5678-567856785678", + "type": "page", + "worker_version": "56785678-5678-5678-5678-567856785678", }, ("22222222-2222-2222-2222-222222222222",), ), - # Filter on element, manual worker run should give first and third + # Filter on element, manual worker version ( { "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "worker_run": False, + "worker_version": False, }, - ( - "11111111-1111-1111-1111-111111111111", - "33333333-3333-3333-3333-333333333333", - ), + ("33333333-3333-3333-3333-333333333333",), ), ], ) -def test_list_element_children_with_cache( +def test_list_element_children_with_cache_deprecation( responses, mock_elements_worker_with_cache, filters, @@ -2096,8 +2171,11 @@ def test_list_element_children_with_cache( # Check we have 5 elements already present in database assert CachedElement.select().count() == 5 - # Query database through cache - elements = mock_elements_worker_with_cache.list_element_children(**filters) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ): + # Query database through cache + elements = mock_elements_worker_with_cache.list_element_children(**filters) assert elements.count() == len(expected_ids) for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True): assert child.id == UUID(expected_id) @@ -2218,13 +2296,11 @@ def test_list_element_parents_wrong_with_metadata(mock_elements_worker): @pytest.mark.parametrize( ("param", "value"), [ - ("worker_version", 1234), ("worker_run", 1234), - ("transcription_worker_version", 1234), ("transcription_worker_run", 1234), ], ) -def test_list_element_parents_wrong_worker_version(mock_elements_worker, param, value): +def test_list_element_parents_wrong_worker_run(mock_elements_worker, param, value): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"): @@ -2234,16 +2310,36 @@ def test_list_element_parents_wrong_worker_version(mock_elements_worker, param, ) +@pytest.mark.parametrize( + ("param", "alternative", "value"), + [ + ("worker_version", "worker_run", 1234), + ("transcription_worker_version", "transcription_worker_run", 1234), + ], +) +def test_list_element_parents_wrong_worker_version( + mock_elements_worker, param, alternative, value +): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead." + ), pytest.raises(AssertionError, match=f"{param} should be of type str or bool"): + mock_elements_worker.list_element_parents( + element=elt, + **{param: value}, + ) + + @pytest.mark.parametrize( "param", [ - "worker_version", "worker_run", - "transcription_worker_version", "transcription_worker_run", ], ) -def test_list_element_parents_wrong_bool_worker_version(mock_elements_worker, param): +def test_list_element_parents_wrong_bool_worker_run(mock_elements_worker, param): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises( @@ -2255,6 +2351,30 @@ def test_list_element_parents_wrong_bool_worker_version(mock_elements_worker, pa ) +@pytest.mark.parametrize( + ("param", "alternative"), + [ + ("worker_version", "worker_run"), + ("transcription_worker_version", "transcription_worker_run"), + ], +) +def test_list_element_parents_wrong_bool_worker_version( + mock_elements_worker, param, alternative +): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead." + ), pytest.raises( + AssertionError, match=f"if of type bool, {param} can only be set to False" + ): + mock_elements_worker.list_element_parents( + element=elt, + **{param: True}, + ) + + def test_list_element_parents_api_error(responses, mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) responses.add( @@ -2390,10 +2510,13 @@ def test_list_element_parents_manual_worker_version(responses, mock_elements_wor }, ) - for idx, parent in enumerate( - mock_elements_worker.list_element_parents(element=elt, worker_version=False) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." ): - assert parent == expected_parents[idx] + for idx, parent in enumerate( + mock_elements_worker.list_element_parents(element=elt, worker_version=False) + ): + assert parent == expected_parents[idx] assert len(responses.calls) == len(BASE_API_CALLS) + 1 assert [ @@ -2482,6 +2605,50 @@ def test_list_element_parents_with_cache_unhandled_param( }, "12341234-1234-1234-1234-123412341234", ), + # Filter on element and worker run + ( + { + "element": CachedElement(id="22222222-2222-2222-2222-222222222222"), + "worker_run": "56785678-5678-5678-5678-567856785678", + }, + "12341234-1234-1234-1234-123412341234", + ), + # Filter on element, manual worker run + ( + { + "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), + "worker_run": False, + }, + "99999999-9999-9999-9999-999999999999", + ), + ], +) +def test_list_element_parents_with_cache( + responses, + mock_elements_worker_with_cache, + filters, + expected_id, +): + # Check we have 5 elements already present in database + assert CachedElement.select().count() == 5 + + # Query database through cache + elements = mock_elements_worker_with_cache.list_element_parents(**filters) + assert elements.count() == 1 + for parent in elements.order_by("id"): + assert parent.id == UUID(expected_id) + + # Check the worker never hits the API for elements + assert len(responses.calls) == len(BASE_API_CALLS) + assert [ + (call.request.method, call.request.url) for call in responses.calls + ] == BASE_API_CALLS + + +@pytest.mark.usefixtures("_mock_cached_elements") +@pytest.mark.parametrize( + ("filters", "expected_id"), + [ # Filter on element and worker version ( { @@ -2507,25 +2674,9 @@ def test_list_element_parents_with_cache_unhandled_param( }, "99999999-9999-9999-9999-999999999999", ), - # Filter on element and worker run - ( - { - "element": CachedElement(id="22222222-2222-2222-2222-222222222222"), - "worker_run": "56785678-5678-5678-5678-567856785678", - }, - "12341234-1234-1234-1234-123412341234", - ), - # Filter on element, manual worker run - ( - { - "element": CachedElement(id="12341234-1234-1234-1234-123412341234"), - "worker_run": False, - }, - "99999999-9999-9999-9999-999999999999", - ), ], ) -def test_list_element_parents_with_cache( +def test_list_element_parents_with_cache_deprecation( responses, mock_elements_worker_with_cache, filters, @@ -2534,8 +2685,11 @@ def test_list_element_parents_with_cache( # Check we have 5 elements already present in database assert CachedElement.select().count() == 5 - # Query database through cache - elements = mock_elements_worker_with_cache.list_element_parents(**filters) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ): + # Query database through cache + elements = mock_elements_worker_with_cache.list_element_parents(**filters) assert elements.count() == 1 for parent in elements.order_by("id"): assert parent.id == UUID(expected_id) diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index 5734458f..6e805da3 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -708,7 +708,7 @@ def test_create_transcription_entity_with_confidence_with_cache( ] -def test_list_transcription_entities(fake_dummy_worker): +def test_list_transcription_entities_deprecation(fake_dummy_worker): transcription = Transcription({"id": "fake_transcription_id"}) worker_version = "worker_version_id" fake_dummy_worker.api_client.add_response( @@ -717,8 +717,28 @@ def test_list_transcription_entities(fake_dummy_worker): worker_version=worker_version, response={"id": "entity_id"}, ) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ): + assert fake_dummy_worker.list_transcription_entities( + transcription, worker_version=worker_version + ) == {"id": "entity_id"} + + assert len(fake_dummy_worker.api_client.history) == 1 + assert len(fake_dummy_worker.api_client.responses) == 0 + + +def test_list_transcription_entities(fake_dummy_worker): + transcription = Transcription({"id": "fake_transcription_id"}) + worker_run = "worker_run_id" + fake_dummy_worker.api_client.add_response( + "ListTranscriptionEntities", + id=transcription.id, + worker_run=worker_run, + response={"id": "entity_id"}, + ) assert fake_dummy_worker.list_transcription_entities( - transcription, worker_version + transcription, worker_run=worker_run ) == {"id": "entity_id"} assert len(fake_dummy_worker.api_client.history) == 1 diff --git a/tests/test_elements_worker/test_transcriptions.py b/tests/test_elements_worker/test_transcriptions.py index eee24282..61752a86 100644 --- a/tests/test_elements_worker/test_transcriptions.py +++ b/tests/test_elements_worker/test_transcriptions.py @@ -1711,10 +1711,25 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker): ) -def test_list_transcriptions_wrong_worker_version(mock_elements_worker): +def test_list_transcriptions_wrong_worker_run(mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises( + AssertionError, match="worker_run should be of type str or bool" + ): + mock_elements_worker.list_transcriptions( + element=elt, + worker_run=1234, + ) + + +def test_list_transcriptions_wrong_worker_version(mock_elements_worker): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ), pytest.raises( AssertionError, match="worker_version should be of type str or bool" ): mock_elements_worker.list_transcriptions( @@ -1723,10 +1738,25 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker): ) -def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker): +def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) with pytest.raises( + AssertionError, match="if of type bool, worker_run can only be set to False" + ): + mock_elements_worker.list_transcriptions( + element=elt, + worker_run=True, + ) + + +def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ), pytest.raises( AssertionError, match="if of type bool, worker_version can only be set to False" ): mock_elements_worker.list_transcriptions( @@ -1784,6 +1814,7 @@ def test_list_transcriptions(responses, mock_elements_worker): "text": "hey", "confidence": 0.42, "worker_version_id": "56785678-5678-5678-5678-567856785678", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "element": None, }, { @@ -1791,6 +1822,7 @@ def test_list_transcriptions(responses, mock_elements_worker): "text": "it's", "confidence": 0.42, "worker_version_id": "56785678-5678-5678-5678-567856785678", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "element": None, }, { @@ -1798,6 +1830,7 @@ def test_list_transcriptions(responses, mock_elements_worker): "text": "me", "confidence": 0.42, "worker_version_id": "56785678-5678-5678-5678-567856785678", + "worker_run_id": "56785678-5678-5678-5678-567856785678", "element": None, }, ] @@ -1836,6 +1869,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work "text": "hey", "confidence": 0.42, "worker_version_id": None, + "worker_run_id": None, "element": None, } ] @@ -1850,8 +1884,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work }, ) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ): + for idx, transcription in enumerate( + mock_elements_worker.list_transcriptions(element=elt, worker_version=False) + ): + assert transcription == trans[idx] + + assert len(responses.calls) == len(BASE_API_CALLS) + 1 + assert [ + (call.request.method, call.request.url) for call in responses.calls + ] == BASE_API_CALLS + [ + ( + "GET", + "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False", + ), + ] + + +def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker): + elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) + trans = [ + { + "id": "0000", + "text": "hey", + "confidence": 0.42, + "worker_version_id": None, + "worker_run_id": None, + "element": None, + } + ] + responses.add( + responses.GET, + "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False", + status=200, + json={ + "count": 1, + "next": None, + "results": trans, + }, + ) + for idx, transcription in enumerate( - mock_elements_worker.list_transcriptions(element=elt, worker_version=False) + mock_elements_worker.list_transcriptions(element=elt, worker_run=False) ): assert transcription == trans[idx] @@ -1861,7 +1937,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work ] == BASE_API_CALLS + [ ( "GET", - "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False", + "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False", ), ] @@ -1895,16 +1971,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work "66666666-6666-6666-6666-666666666666", ), ), - # Filter on element and worker_version should give first transcription + # Filter on element and worker run should give the first transcription ( { "element": CachedElement( id="11111111-1111-1111-1111-111111111111", type="page" ), - "worker_version": "56785678-5678-5678-5678-567856785678", + "worker_run": "56785678-5678-5678-5678-567856785678", }, ("11111111-1111-1111-1111-111111111111",), ), + # Filter on element, manual worker run should give the sixth transcription + ( + { + "element": CachedElement( + id="11111111-1111-1111-1111-111111111111", type="page" + ), + "worker_run": False, + }, + ("66666666-6666-6666-6666-666666666666",), + ), # Filter recursively on element should give all transcriptions inserted ( { @@ -1922,33 +2008,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work "66666666-6666-6666-6666-666666666666", ), ), - # Filter recursively on element and worker_version should give four transcriptions + # Filter recursively on element and element_type should give three transcriptions ( { "element": CachedElement( id="11111111-1111-1111-1111-111111111111", type="page" ), - "worker_version": "90129012-9012-9012-9012-901290129012", + "element_type": "something_else", "recursive": True, }, ( "22222222-2222-2222-2222-222222222222", - "33333333-3333-3333-3333-333333333333", "44444444-4444-4444-4444-444444444444", "55555555-5555-5555-5555-555555555555", ), ), - # Filter recursively on element and element_type should give three transcriptions + ], +) +def test_list_transcriptions_with_cache( + responses, mock_elements_worker_with_cache, filters, expected_ids +): + # Check we have 5 elements already present in database + assert CachedTranscription.select().count() == 6 + + # Query database through cache + transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters) + assert transcriptions.count() == len(expected_ids) + for transcription, expected_id in zip( + transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True + ): + assert transcription.id == UUID(expected_id) + + # Check the worker never hits the API for elements + assert len(responses.calls) == len(BASE_API_CALLS) + assert [ + (call.request.method, call.request.url) for call in responses.calls + ] == BASE_API_CALLS + + +@pytest.mark.usefixtures("_mock_cached_transcriptions") +@pytest.mark.parametrize( + ("filters", "expected_ids"), + [ + # Filter on element and worker_version should give first transcription ( { "element": CachedElement( id="11111111-1111-1111-1111-111111111111", type="page" ), - "element_type": "something_else", + "worker_version": "56785678-5678-5678-5678-567856785678", + }, + ("11111111-1111-1111-1111-111111111111",), + ), + # Filter recursively on element and worker_version should give four transcriptions + ( + { + "element": CachedElement( + id="11111111-1111-1111-1111-111111111111", type="page" + ), + "worker_version": "90129012-9012-9012-9012-901290129012", "recursive": True, }, ( "22222222-2222-2222-2222-222222222222", + "33333333-3333-3333-3333-333333333333", "44444444-4444-4444-4444-444444444444", "55555555-5555-5555-5555-555555555555", ), @@ -1965,14 +2088,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work ), ], ) -def test_list_transcriptions_with_cache( +def test_list_transcriptions_with_cache_deprecation( responses, mock_elements_worker_with_cache, filters, expected_ids ): # Check we have 5 elements already present in database assert CachedTranscription.select().count() == 6 - # Query database through cache - transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters) + with pytest.deprecated_call( + match="`worker_version` usage is deprecated. Consider using `worker_run` instead." + ): + # Query database through cache + transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters) assert transcriptions.count() == len(expected_ids) for transcription, expected_id in zip( transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True diff --git a/tests/test_elements_worker/test_worker.py b/tests/test_elements_worker/test_worker.py index 22a558c7..f84ef247 100644 --- a/tests/test_elements_worker/test_worker.py +++ b/tests/test_elements_worker/test_worker.py @@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker): api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID) - res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) + with pytest.deprecated_call(match="WorkerVersion usage is deprecated."): + res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) assert res == response assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response @@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker): api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID) - response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) - response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) + with pytest.deprecated_call(match="WorkerVersion usage is deprecated."): + response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) + + with pytest.deprecated_call(match="WorkerVersion usage is deprecated."): + response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID) assert response_1 == response assert response_1 == response_2 @@ -51,12 +55,16 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker): "worker": {"slug": "mock_slug"}, } - slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID) + with pytest.deprecated_call(match="WorkerVersion usage is deprecated."): + slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID) assert slug == "mock_slug" def test_get_worker_version_slug_none(fake_dummy_worker): - with pytest.raises(ValueError, match="No worker version ID"): + # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked + with pytest.deprecated_call( + match="WorkerVersion usage is deprecated." + ), pytest.raises(ValueError, match="No worker version ID"): fake_dummy_worker.get_worker_version_slug(None) -- GitLab