Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (10)
......@@ -25,7 +25,7 @@ lint:
- pre-commit run -a
test:
image: python:3
image: python:3.11
stage: test
cache:
......@@ -55,7 +55,7 @@ test:
- tox -- --junitxml=test-report.xml --durations=50
test-cookiecutter:
image: python:3
image: python:3.11
stage: test
cache:
......@@ -136,7 +136,7 @@ pypi-publication:
- twine upload dist/* -r pypi
.docs:
image: python:3
image: python:3.11
artifacts:
paths:
- public
......
0.3.3
0.3.4
......@@ -102,14 +102,26 @@ class CachedElement(Model):
database = db
table_name = "elements"
def open_image(self, *args, max_size: Optional[int] = None, **kwargs) -> Image:
def open_image(
self,
*args,
max_width: Optional[int] = None,
max_height: Optional[int] = None,
**kwargs,
) -> Image:
"""
Open this element's image as a Pillow image.
This does not crop the image to the element's polygon.
IIIF servers with maxWidth, maxHeight or maxArea restrictions on image size are not supported.
Warns:
----
If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
:param *args: Positional arguments passed to [arkindex_worker.image.open_image][]
:param max_size: Subresolution of the image.
:param max_width: The maximum width of the image.
:param max_height: The maximum height of the image.
:param **kwargs: Keyword arguments passed to [arkindex_worker.image.open_image][]
:raises ValueError: When this element does not have an image ID or a polygon.
:return: A Pillow image.
......@@ -129,7 +141,7 @@ class CachedElement(Model):
else:
box = "full"
if max_size is None:
if max_width is None and max_height is None:
resize = "full"
else:
# Do not resize for polygons that do not exactly match the images
......@@ -141,14 +153,12 @@ class CachedElement(Model):
resize = "full"
# Do not resize when the image is below the maximum size
elif self.image.width <= max_size and self.image.height <= max_size:
elif (max_width is None or self.image.width <= max_width) and (
max_height is None or self.image.height <= max_height
):
resize = "full"
else:
ratio = max_size / max(self.image.width, self.image.height)
new_width, new_height = int(self.image.width * ratio), int(
self.image.height * ratio
)
resize = f"{new_width},{new_height}"
resize = f"{max_width or ''},{max_height or ''}"
url = self.image.url
if not url.endswith("/"):
......
......@@ -10,8 +10,6 @@ from typing import Generator, List, Optional
from PIL import Image
from requests import HTTPError
from arkindex_worker import logger
class MagicDict(dict):
"""
......@@ -124,9 +122,10 @@ class Element(MagicDict):
def open_image(
self,
*args,
max_size: Optional[int] = None,
max_width: Optional[int] = None,
max_height: Optional[int] = None,
use_full_image: Optional[bool] = False,
**kwargs
**kwargs,
) -> Image:
"""
Open this element's image using Pillow, rotating and mirroring it according
......@@ -149,7 +148,13 @@ class Element(MagicDict):
``rotation_angle=0, mirrored=False`` as keyword arguments.
:param max_size: The maximum size of the requested image.
Warns:
----
If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
:param max_width: The maximum width of the image.
:param max_height: The maximum height of the image.
:param use_full_image: Ignore the ``zone.polygon`` and always
retrieve the image without cropping.
:param *args: Positional arguments passed to [arkindex_worker.image.open_image][].
......@@ -165,41 +170,29 @@ class Element(MagicDict):
from arkindex_worker.image import (
download_tiles,
open_image,
polygon_bounding_box,
)
if not self.get("zone"):
raise ValueError("Element {} has no zone".format(self.id))
if self.requires_tiles:
if max_size is None:
if max_width is None and max_height is None:
return download_tiles(self.zone.image.url)
else:
raise NotImplementedError
if max_size is not None:
bounding_box = polygon_bounding_box(self.zone.polygon)
if max_width is None and max_height is None:
resize = "full"
else:
original_size = {"w": self.zone.image.width, "h": self.zone.image.height}
# No resizing if the element is smaller than the image.
if (
bounding_box.width != original_size["w"]
or bounding_box.height != original_size["h"]
):
resize = "full"
logger.warning(
"Only full image size elements covered, "
+ "downloading full size image."
)
# No resizing if the image is smaller than the wanted size.
elif original_size["w"] <= max_size and original_size["h"] <= max_size:
if (max_width is None or original_size["w"] <= max_width) and (
max_height is None or original_size["h"] <= max_height
):
resize = "full"
# Resizing if the image is bigger than the wanted size.
else:
ratio = max_size / max(original_size.values())
new_width, new_height = [int(x * ratio) for x in original_size.values()]
resize = "{},{}".format(new_width, new_height)
else:
resize = "full"
resize = f"{max_width or ''},{max_height or ''}"
if use_full_image:
url = self.image_url(resize)
......@@ -212,7 +205,7 @@ class Element(MagicDict):
*args,
rotation_angle=self.rotation_angle,
mirrored=self.mirrored,
**kwargs
**kwargs,
)
except HTTPError as e:
if (
......
......@@ -497,3 +497,165 @@ class ElementMixin(object):
)
return children
def list_element_parents(
self,
element: Union[Element, CachedElement],
folder: Optional[bool] = None,
name: Optional[str] = None,
recursive: Optional[bool] = None,
transcription_worker_version: Optional[Union[str, bool]] = None,
transcription_worker_run: Optional[Union[str, bool]] = None,
type: Optional[str] = None,
with_classes: Optional[bool] = None,
with_corpus: Optional[bool] = None,
with_metadata: Optional[bool] = None,
with_has_children: Optional[bool] = None,
with_zone: Optional[bool] = None,
worker_version: Optional[Union[str, bool]] = None,
worker_run: Optional[Union[str, bool]] = None,
) -> Union[Iterable[dict], Iterable[CachedElement]]:
"""
List parents of an element.
:param element: Child element to find parents of.
:param folder: Restrict to or exclude elements with folder types.
This parameter is not supported when caching is enabled.
:param name: Restrict to elements whose name contain a substring (case-insensitive).
This parameter is not supported when caching is enabled.
:param recursive: Look for elements recursively (grand-children, etc.)
This parameter is not supported when caching is enabled.
:param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
This parameter is not supported when caching is enabled.
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
This parameter is not supported when caching is enabled.
:param type: Restrict to elements with a specific type slug
This parameter is not supported when caching is enabled.
:param with_classes: Include each element's classifications in the response.
This parameter is not supported when caching is enabled.
:param with_corpus: Include each element's corpus in the response.
This parameter is not supported when caching is enabled.
:param with_has_children: Include the ``has_children`` attribute in the response,
indicating if this element has child elements of its own.
This parameter is not supported when caching is enabled.
:param with_metadata: Include each element's metadata in the response.
This parameter is not supported when caching is enabled.
:param with_zone: Include the ``zone`` attribute in the response,
holding the element's image and polygon.
This parameter is not supported when caching is enabled.
:param worker_version: Restrict to elements created by a worker version with this UUID.
:param worker_run: Restrict to elements created by a worker run with this UUID.
:return: An iterable of dicts from the ``ListElementParents`` API endpoint,
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
"""
assert element and isinstance(
element, (Element, CachedElement)
), "element shouldn't be null and should be an Element or CachedElement"
query_params = {}
if folder is not None:
assert isinstance(folder, bool), "folder should be of type bool"
query_params["folder"] = folder
if name:
assert isinstance(name, str), "name should be of type str"
query_params["name"] = name
if recursive is not None:
assert isinstance(recursive, bool), "recursive should be of type bool"
query_params["recursive"] = recursive
if transcription_worker_version is not None:
assert isinstance(
transcription_worker_version, (str, bool)
), "transcription_worker_version should be of type str or bool"
if isinstance(transcription_worker_version, bool):
assert (
transcription_worker_version is False
), "if of type bool, transcription_worker_version can only be set to False"
query_params["transcription_worker_version"] = transcription_worker_version
if transcription_worker_run is not None:
assert isinstance(
transcription_worker_run, (str, bool)
), "transcription_worker_run should be of type str or bool"
if isinstance(transcription_worker_run, bool):
assert (
transcription_worker_run is False
), "if of type bool, transcription_worker_run can only be set to False"
query_params["transcription_worker_run"] = transcription_worker_run
if type:
assert isinstance(type, str), "type should be of type str"
query_params["type"] = type
if with_classes is not None:
assert isinstance(with_classes, bool), "with_classes should be of type bool"
query_params["with_classes"] = with_classes
if with_corpus is not None:
assert isinstance(with_corpus, bool), "with_corpus should be of type bool"
query_params["with_corpus"] = with_corpus
if with_has_children is not None:
assert isinstance(
with_has_children, bool
), "with_has_children should be of type bool"
query_params["with_has_children"] = with_has_children
if with_metadata is not None:
assert isinstance(
with_metadata, bool
), "with_metadata should be of type bool"
query_params["with_metadata"] = with_metadata
if with_zone is not None:
assert isinstance(with_zone, bool), "with_zone should be of type bool"
query_params["with_zone"] = with_zone
if worker_version is not None:
assert isinstance(
worker_version, (str, bool)
), "worker_version should be of type str or bool"
if isinstance(worker_version, bool):
assert (
worker_version is False
), "if of type bool, worker_version can only be set to False"
query_params["worker_version"] = worker_version
if worker_run is not None:
assert isinstance(
worker_run, (str, bool)
), "worker_run should be of type str or bool"
if isinstance(worker_run, bool):
assert (
worker_run is False
), "if of type bool, worker_run can only be set to False"
query_params["worker_run"] = worker_run
if self.use_cache:
# Checking that we only received query_params handled by the cache
assert set(query_params.keys()) <= {
"type",
"worker_version",
"worker_run",
}, "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
parent_ids = CachedElement.select(CachedElement.parent_id).where(
CachedElement.id == element.id
)
query = CachedElement.select().where(CachedElement.id.in_(parent_ids))
if type:
query = query.where(CachedElement.type == type)
if worker_version is not None:
# If worker_version=False, filter by manual worker_version e.g. None
worker_version_id = worker_version or None
if worker_version_id:
query = query.where(
CachedElement.worker_version_id == worker_version_id
)
else:
query = query.where(CachedElement.worker_version_id.is_null())
if worker_run is not None:
# If worker_run=False, filter by manual worker_run e.g. None
worker_run_id = worker_run or None
if worker_run_id:
query = query.where(CachedElement.worker_run_id == worker_run_id)
else:
query = query.where(CachedElement.worker_run_id.is_null())
return query
else:
parents = self.api_client.paginate(
"ListElementParents", id=element.id, **query_params
)
return parents
# Releases
## 0.3.4
Released on **14 Sept 2023** &bull; View on [Gitlab](https://gitlab.teklia.com/workers/base-worker/-/releases/0.3.4)
- The worker template was updated to correctly install [Git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) if it depends on any.
- Base-worker now uses [ruff](https://github.com/charliermarsh/ruff) for linting. This tool replaces `isort` and `flake8`.
- New Arkindex API helper to update an element, calling [PartialUpdateElement](https://demo.arkindex.org/api-docs/#tag/elements/operation/PartialUpdateElement).
- New Arkindex API helper to list an element's parents, calling [ListElementParents](https://demo.arkindex.org/api-docs/#tag/elements/operation/ListElementParents).
- Worker Activity API is now disabled when the worker runs in `read-only` mode instead of relying on the `--dev` CLI argument. The [update_activity](https://workers.arkindex.org/ref/elements_worker/#arkindex_worker.worker.ElementsWorker.update_activity) API helper was updated following Arkindex 1.5.1 changes.
- Worker can now resize the image of an element when opening them. This uses the [IIIF](https://iiif.io/api/image/2.1/#size) resizing API.
## 0.3.3
......
arkindex-client==1.0.13
arkindex-client==1.0.14
peewee==3.16.3
Pillow==10.0.0
pymdown-extensions==10.2
......
......@@ -321,9 +321,25 @@ def fake_gitlab_helper_factory():
@pytest.fixture
def mock_cached_elements():
"""Insert few elements in local cache"""
CachedElement.create(
id=UUID("99999999-9999-9999-9999-999999999999"),
parent_id=None,
type="something",
polygon="[[1, 1], [2, 2], [2, 1], [1, 2]]",
worker_version_id=None,
worker_run_id=None,
)
CachedElement.create(
id=UUID("12341234-1234-1234-1234-123412341234"),
parent_id=UUID("99999999-9999-9999-9999-999999999999"),
type="double_page",
polygon="[[1, 1], [2, 2], [2, 1], [1, 2]]",
worker_version_id=UUID("56785678-5678-5678-5678-567856785678"),
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
)
CachedElement.create(
id=UUID("11111111-1111-1111-1111-111111111111"),
parent_id="12341234-1234-1234-1234-123412341234",
parent_id=UUID("12341234-1234-1234-1234-123412341234"),
type="something",
polygon="[[1, 1], [2, 2], [2, 1], [1, 2]]",
worker_version_id=UUID("56785678-5678-5678-5678-567856785678"),
......@@ -344,7 +360,7 @@ def mock_cached_elements():
worker_version_id=None,
worker_run_id=None,
)
assert CachedElement.select().count() == 3
assert CachedElement.select().count() == 5
@pytest.fixture
......
......@@ -148,10 +148,20 @@ def test_check_version_same_version(tmp_path):
@pytest.mark.parametrize(
"image_width,image_height,polygon_x,polygon_y,polygon_width,polygon_height,max_size,expected_url",
"image_width,image_height,polygon_x,polygon_y,polygon_width,polygon_height,max_width,max_height,expected_url",
[
# No max_size: no resize
(400, 600, 0, 0, 400, 600, None, "http://something/full/full/0/default.jpg"),
(
400,
600,
0,
0,
400,
600,
None,
None,
"http://something/full/full/0/default.jpg",
),
# No max_size: resize on bbox
(
400,
......@@ -161,6 +171,7 @@ def test_check_version_same_version(tmp_path):
200,
100,
None,
None,
"http://something/0,0,200,100/full/0/default.jpg",
),
(
......@@ -171,12 +182,43 @@ def test_check_version_same_version(tmp_path):
200,
100,
None,
None,
"http://something/50,50,200,100/full/0/default.jpg",
),
# max_size equal to the image size, no resize
(400, 600, 0, 0, 400, 600, 600, "http://something/full/full/0/default.jpg"),
(600, 400, 0, 0, 600, 400, 600, "http://something/full/full/0/default.jpg"),
(400, 400, 0, 0, 400, 400, 400, "http://something/full/full/0/default.jpg"),
(
400,
600,
0,
0,
400,
600,
400,
None,
"http://something/full/full/0/default.jpg",
),
(
600,
400,
0,
0,
600,
400,
None,
400,
"http://something/full/full/0/default.jpg",
),
(
400,
400,
0,
0,
400,
400,
400,
400,
"http://something/full/full/0/default.jpg",
),
(
400,
400,
......@@ -185,11 +227,32 @@ def test_check_version_same_version(tmp_path):
200,
100,
200,
100,
"http://something/50,50,200,100/full/0/default.jpg",
),
# max_size is smaller than the image, resize
(400, 600, 0, 0, 400, 600, 400, "http://something/full/266,400/0/default.jpg"),
(600, 400, 0, 0, 600, 400, 400, "http://something/full/400,266/0/default.jpg"),
(
400,
600,
0,
0,
400,
600,
None,
400,
"http://something/full/,400/0/default.jpg",
),
(
600,
400,
0,
0,
600,
400,
400,
None,
"http://something/full/400,/0/default.jpg",
),
(
400,
600,
......@@ -198,6 +261,7 @@ def test_check_version_same_version(tmp_path):
200,
600,
400,
600,
"http://something/0,0,200,600/full/0/default.jpg",
),
(
......@@ -208,13 +272,54 @@ def test_check_version_same_version(tmp_path):
200,
600,
400,
600,
"http://something/50,50,200,600/full/0/default.jpg",
),
(400, 400, 0, 0, 400, 400, 200, "http://something/full/200,200/0/default.jpg"),
(
400,
400,
0,
0,
400,
400,
200,
200,
"http://something/full/200,200/0/default.jpg",
),
# max_size above the image size, no resize
(400, 600, 0, 0, 400, 600, 800, "http://something/full/full/0/default.jpg"),
(600, 400, 0, 0, 600, 400, 800, "http://something/full/full/0/default.jpg"),
(400, 400, 0, 0, 400, 400, 800, "http://something/full/full/0/default.jpg"),
(
400,
600,
0,
0,
400,
600,
800,
None,
"http://something/full/full/0/default.jpg",
),
(
600,
400,
0,
0,
600,
400,
None,
800,
"http://something/full/full/0/default.jpg",
),
(
400,
400,
0,
0,
400,
400,
800,
800,
"http://something/full/full/0/default.jpg",
),
(
400,
400,
......@@ -223,6 +328,7 @@ def test_check_version_same_version(tmp_path):
200,
100,
800,
800,
"http://something/50,50,200,100/full/0/default.jpg",
),
],
......@@ -235,7 +341,8 @@ def test_element_open_image(
polygon_y,
polygon_width,
polygon_height,
max_size,
max_width,
max_height,
expected_url,
):
open_mock = mocker.patch(
......@@ -261,7 +368,7 @@ def test_element_open_image(
],
)
assert elt.open_image(max_size=max_size) == "an image!"
assert elt.open_image(max_width=max_width, max_height=max_height) == "an image!"
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
expected_url, mirrored=False, rotation_angle=0
......
......@@ -94,7 +94,7 @@ def test_open_image_resize_portrait(mocker):
}
)
# Resize = original size
assert elt.open_image(max_size=600, use_full_image=True) == "an image!"
assert elt.open_image(max_height=600, use_full_image=True) == "an image!"
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -102,15 +102,15 @@ def test_open_image_resize_portrait(mocker):
mirrored=False,
)
# Resize = smaller height
assert elt.open_image(max_size=400, use_full_image=True) == "an image!"
assert elt.open_image(max_height=400, use_full_image=True) == "an image!"
assert open_mock.call_count == 2
assert open_mock.call_args == mocker.call(
"http://something/full/266,400/0/default.jpg",
"http://something/full/,400/0/default.jpg",
rotation_angle=0,
mirrored=False,
)
# Resize = bigger height
assert elt.open_image(max_size=800, use_full_image=True) == "an image!"
assert elt.open_image(max_height=800, use_full_image=True) == "an image!"
assert open_mock.call_count == 3
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -138,10 +138,10 @@ def test_open_image_resize_partial_element(mocker):
"mirrored": False,
}
)
assert elt.open_image(max_size=400, use_full_image=True) == "an image!"
assert elt.open_image(max_height=400, use_full_image=True) == "an image!"
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
"http://something/full/,400/0/default.jpg",
rotation_angle=0,
mirrored=False,
)
......@@ -167,7 +167,7 @@ def test_open_image_resize_landscape(mocker):
}
)
# Resize = original size
assert elt.open_image(max_size=600, use_full_image=True) == "an image!"
assert elt.open_image(max_width=600, use_full_image=True) == "an image!"
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -175,15 +175,15 @@ def test_open_image_resize_landscape(mocker):
mirrored=False,
)
# Resize = smaller width
assert elt.open_image(max_size=400, use_full_image=True) == "an image!"
assert elt.open_image(max_width=400, use_full_image=True) == "an image!"
assert open_mock.call_count == 2
assert open_mock.call_args == mocker.call(
"http://something/full/400,266/0/default.jpg",
"http://something/full/400,/0/default.jpg",
rotation_angle=0,
mirrored=False,
)
# Resize = bigger width
assert elt.open_image(max_size=800, use_full_image=True) == "an image!"
assert elt.open_image(max_width=800, use_full_image=True) == "an image!"
assert open_mock.call_count == 3
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -212,7 +212,14 @@ def test_open_image_resize_square(mocker):
}
)
# Resize = original size
assert elt.open_image(max_size=400, use_full_image=True) == "an image!"
assert (
elt.open_image(
max_width=400,
max_height=400,
use_full_image=True,
)
== "an image!"
)
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -220,7 +227,14 @@ def test_open_image_resize_square(mocker):
mirrored=False,
)
# Resize = smaller
assert elt.open_image(max_size=200, use_full_image=True) == "an image!"
assert (
elt.open_image(
max_width=200,
max_height=200,
use_full_image=True,
)
== "an image!"
)
assert open_mock.call_count == 2
assert open_mock.call_args == mocker.call(
"http://something/full/200,200/0/default.jpg",
......@@ -228,7 +242,14 @@ def test_open_image_resize_square(mocker):
mirrored=False,
)
# Resize = bigger
assert elt.open_image(max_size=800, use_full_image=True) == "an image!"
assert (
elt.open_image(
max_width=800,
max_height=800,
use_full_image=True,
)
== "an image!"
)
assert open_mock.call_count == 3
assert open_mock.call_args == mocker.call(
"http://something/full/full/0/default.jpg",
......@@ -251,7 +272,7 @@ def test_open_image_resize_tiles(mocker):
}
)
with pytest.raises(NotImplementedError):
elt.open_image(max_size=400)
elt.open_image(max_width=400)
def test_open_image_requires_zone():
......@@ -364,10 +385,10 @@ def test_open_image_resize_use_full_image_false(mocker):
}
)
# Resize = smaller
assert elt.open_image(max_size=200, use_full_image=False) == "an image!"
assert elt.open_image(max_height=200, use_full_image=False) == "an image!"
assert open_mock.call_count == 1
assert open_mock.call_args == mocker.call(
"http://zoneurl/0,0,400,600/133,200/0/default.jpg",
"http://zoneurl/0,0,400,600/,200/0/default.jpg",
rotation_angle=0,
mirrored=False,
)
......
......@@ -1848,8 +1848,8 @@ def test_list_element_children_with_cache(
filters,
expected_ids,
):
# Check we have 2 elements already present in database
assert CachedElement.select().count() == 3
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_children(**filters)
......@@ -1862,3 +1862,451 @@ def test_list_element_children_with_cache(
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS
def test_list_element_parents_wrong_element(mock_elements_worker):
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(element=None)
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(element="not element type")
assert (
str(e.value)
== "element shouldn't be null and should be an Element or CachedElement"
)
def test_list_element_parents_wrong_folder(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
folder="not bool",
)
assert str(e.value) == "folder should be of type bool"
def test_list_element_parents_wrong_name(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
name=1234,
)
assert str(e.value) == "name should be of type str"
def test_list_element_parents_wrong_recursive(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
recursive="not bool",
)
assert str(e.value) == "recursive should be of type bool"
def test_list_element_parents_wrong_type(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
type=1234,
)
assert str(e.value) == "type should be of type str"
def test_list_element_parents_wrong_with_classes(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
with_classes="not bool",
)
assert str(e.value) == "with_classes should be of type bool"
def test_list_element_parents_wrong_with_corpus(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
with_corpus="not bool",
)
assert str(e.value) == "with_corpus should be of type bool"
def test_list_element_parents_wrong_with_has_children(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
with_has_children="not bool",
)
assert str(e.value) == "with_has_children should be of type bool"
def test_list_element_parents_wrong_with_zone(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
with_zone="not bool",
)
assert str(e.value) == "with_zone should be of type bool"
def test_list_element_parents_wrong_with_metadata(mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
with_metadata="not bool",
)
assert str(e.value) == "with_metadata should be of type bool"
@pytest.mark.parametrize(
"param, value",
(
("worker_version", 1234),
("worker_run", 1234),
("transcription_worker_version", 1234),
("transcription_worker_run", 1234),
),
)
def test_list_element_parents_wrong_worker_version(mock_elements_worker, param, value):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
**{param: value},
)
assert str(e.value) == f"{param} should be of type str or bool"
@pytest.mark.parametrize(
"param",
(
("worker_version"),
("worker_run"),
("transcription_worker_version"),
("transcription_worker_run"),
),
)
def test_list_element_parents_wrong_bool_worker_version(mock_elements_worker, param):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker.list_element_parents(
element=elt,
**{param: True},
)
assert str(e.value) == f"if of type bool, {param} can only be set to False"
def test_list_element_parents_api_error(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
responses.add(
responses.GET,
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
status=500,
)
with pytest.raises(
Exception, match="Stopping pagination as data will be incomplete"
):
next(mock_elements_worker.list_element_parents(element=elt))
assert len(responses.calls) == len(BASE_API_CALLS) + 5
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
# We do 5 retries
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
]
def test_list_element_parents(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
expected_parents = [
{
"id": "0000",
"type": "page",
"name": "Test",
"corpus": {},
"thumbnail_url": None,
"zone": {},
"best_classes": None,
"has_children": None,
"worker_version_id": None,
"worker_run_id": None,
},
{
"id": "1111",
"type": "page",
"name": "Test 2",
"corpus": {},
"thumbnail_url": None,
"zone": {},
"best_classes": None,
"has_children": None,
"worker_version_id": None,
"worker_run_id": None,
},
{
"id": "2222",
"type": "page",
"name": "Test 3",
"corpus": {},
"thumbnail_url": None,
"zone": {},
"best_classes": None,
"has_children": None,
"worker_version_id": None,
"worker_run_id": None,
},
]
responses.add(
responses.GET,
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
status=200,
json={
"count": 3,
"next": None,
"results": expected_parents,
},
)
for idx, parent in enumerate(
mock_elements_worker.list_element_parents(element=elt)
):
assert parent == expected_parents[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
),
]
def test_list_element_parents_manual_worker_version(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
expected_parents = [
{
"id": "0000",
"type": "page",
"name": "Test",
"corpus": {},
"thumbnail_url": None,
"zone": {},
"best_classes": None,
"has_children": None,
"worker_version_id": None,
"worker_run_id": None,
}
]
responses.add(
responses.GET,
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_version=False",
status=200,
json={
"count": 1,
"next": None,
"results": expected_parents,
},
)
for idx, parent in enumerate(
mock_elements_worker.list_element_parents(element=elt, worker_version=False)
):
assert parent == expected_parents[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_version=False",
),
]
def test_list_element_parents_manual_worker_run(responses, mock_elements_worker):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
expected_parents = [
{
"id": "0000",
"type": "page",
"name": "Test",
"corpus": {},
"thumbnail_url": None,
"zone": {},
"best_classes": None,
"has_children": None,
"worker_version_id": None,
"worker_run_id": None,
}
]
responses.add(
responses.GET,
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_run=False",
status=200,
json={
"count": 1,
"next": None,
"results": expected_parents,
},
)
for idx, parent in enumerate(
mock_elements_worker.list_element_parents(element=elt, worker_run=False)
):
assert parent == expected_parents[idx]
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
(
"GET",
"http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_run=False",
),
]
def test_list_element_parents_with_cache_unhandled_param(
mock_elements_worker_with_cache,
):
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
with pytest.raises(AssertionError) as e:
mock_elements_worker_with_cache.list_element_parents(
element=elt, with_corpus=True
)
assert (
str(e.value)
== "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
)
@pytest.mark.parametrize(
"filters, expected_id",
(
# Filter on element
(
{
"element": CachedElement(id="11111111-1111-1111-1111-111111111111"),
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element and double_page
(
{
"element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
"type": "double_page",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element and worker version
(
{
"element": CachedElement(id="33333333-3333-3333-3333-333333333333"),
"worker_version": "56785678-5678-5678-5678-567856785678",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element, type double_page and worker version
(
{
"element": CachedElement(id="11111111-1111-1111-1111-111111111111"),
"type": "double_page",
"worker_version": "56785678-5678-5678-5678-567856785678",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element, manual worker version
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_version": False,
},
"99999999-9999-9999-9999-999999999999",
),
# Filter on element and worker run
(
{
"element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
"worker_run": "56785678-5678-5678-5678-567856785678",
},
"12341234-1234-1234-1234-123412341234",
),
# Filter on element, manual worker run
(
{
"element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
"worker_run": False,
},
"99999999-9999-9999-9999-999999999999",
),
),
)
def test_list_element_parents_with_cache(
responses,
mock_elements_worker_with_cache,
mock_cached_elements,
filters,
expected_id,
):
# Check we have 5 elements already present in database
assert CachedElement.select().count() == 5
# Query database through cache
elements = mock_elements_worker_with_cache.list_element_parents(**filters)
assert elements.count() == 1
for parent in elements.order_by("id"):
assert parent.id == UUID(expected_id)
# Check the worker never hits the API for elements
assert len(responses.calls) == len(BASE_API_CALLS)
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS
FROM python:3
FROM python:3.11
WORKDIR /src
......
arkindex-base-worker==0.3.3
arkindex-base-worker==0.3.4