Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (16)
Showing
with 182 additions and 404 deletions
......@@ -4,7 +4,7 @@ stages:
- release
lint:
image: python:3.10
image: python:3
cache:
paths:
......@@ -55,7 +55,8 @@ test:
- tox -- --junitxml=test-report.xml --durations=50
test-cookiecutter:
image: python:3
# Needed till next release
image: python:3.11
stage: test
cache:
......@@ -91,7 +92,7 @@ test-cookiecutter:
- worker-demo/
build-cookiecutter:
image: docker:19.03.1
image: docker:24.0.6
stage: build
services:
- docker:dind
......
0.3.3
0.3.4
......@@ -102,14 +102,26 @@ class CachedElement(Model):
database = db
table_name = "elements"
def open_image(self, *args, max_size: Optional[int] = None, **kwargs) -> Image:
def open_image(
self,
*args,
max_width: Optional[int] = None,
max_height: Optional[int] = None,
**kwargs,
) -> Image:
"""
Open this element's image as a Pillow image.
This does not crop the image to the element's polygon.
IIIF servers with maxWidth, maxHeight or maxArea restrictions on image size are not supported.
Warns:
----
If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
:param *args: Positional arguments passed to [arkindex_worker.image.open_image][]
:param max_size: Subresolution of the image.
:param max_width: The maximum width of the image.
:param max_height: The maximum height of the image.
:param **kwargs: Keyword arguments passed to [arkindex_worker.image.open_image][]
:raises ValueError: When this element does not have an image ID or a polygon.
:return: A Pillow image.
......@@ -129,7 +141,7 @@ class CachedElement(Model):
else:
box = "full"
if max_size is None:
if max_width is None and max_height is None:
resize = "full"
else:
# Do not resize for polygons that do not exactly match the images
......@@ -141,14 +153,12 @@ class CachedElement(Model):
resize = "full"
# Do not resize when the image is below the maximum size
elif self.image.width <= max_size and self.image.height <= max_size:
elif (max_width is None or self.image.width <= max_width) and (
max_height is None or self.image.height <= max_height
):
resize = "full"
else:
ratio = max_size / max(self.image.width, self.image.height)
new_width, new_height = int(self.image.width * ratio), int(
self.image.height * ratio
)
resize = f"{new_width},{new_height}"
resize = f"{max_width or ''},{max_height or ''}"
url = self.image.url
if not url.endswith("/"):
......
......@@ -10,8 +10,6 @@ from typing import Generator, List, Optional
from PIL import Image
from requests import HTTPError
from arkindex_worker import logger
class MagicDict(dict):
"""
......@@ -124,9 +122,10 @@ class Element(MagicDict):
def open_image(
self,
*args,
max_size: Optional[int] = None,
max_width: Optional[int] = None,
max_height: Optional[int] = None,
use_full_image: Optional[bool] = False,
**kwargs
**kwargs,
) -> Image:
"""
Open this element's image using Pillow, rotating and mirroring it according
......@@ -149,7 +148,13 @@ class Element(MagicDict):
``rotation_angle=0, mirrored=False`` as keyword arguments.
:param max_size: The maximum size of the requested image.
Warns:
----
If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
:param max_width: The maximum width of the image.
:param max_height: The maximum height of the image.
:param use_full_image: Ignore the ``zone.polygon`` and always
retrieve the image without cropping.
:param *args: Positional arguments passed to [arkindex_worker.image.open_image][].
......@@ -165,41 +170,29 @@ class Element(MagicDict):
from arkindex_worker.image import (
download_tiles,
open_image,
polygon_bounding_box,
)
if not self.get("zone"):
raise ValueError("Element {} has no zone".format(self.id))
if self.requires_tiles:
if max_size is None:
if max_width is None and max_height is None:
return download_tiles(self.zone.image.url)
else:
raise NotImplementedError
if max_size is not None:
bounding_box = polygon_bounding_box(self.zone.polygon)
if max_width is None and max_height is None:
resize = "full"
else:
original_size = {"w": self.zone.image.width, "h": self.zone.image.height}
# No resizing if the element is smaller than the image.
if (
bounding_box.width != original_size["w"]
or bounding_box.height != original_size["h"]
):
resize = "full"
logger.warning(
"Only full image size elements covered, "
+ "downloading full size image."
)
# No resizing if the image is smaller than the wanted size.
elif original_size["w"] <= max_size and original_size["h"] <= max_size:
if (max_width is None or original_size["w"] <= max_width) and (
max_height is None or original_size["h"] <= max_height
):
resize = "full"
# Resizing if the image is bigger than the wanted size.
else:
ratio = max_size / max(original_size.values())
new_width, new_height = [int(x * ratio) for x in original_size.values()]
resize = "{},{}".format(new_width, new_height)
else:
resize = "full"
resize = f"{max_width or ''},{max_height or ''}"
if use_full_image:
url = self.image_url(resize)
......@@ -212,7 +205,7 @@ class Element(MagicDict):
*args,
rotation_angle=self.rotation_angle,
mirrored=self.mirrored,
**kwargs
**kwargs,
)
except HTTPError as e:
if (
......
# -*- coding: utf-8 -*-
"""
Generator for the ``ml_report.json`` file, to report created worker results and exceptions.
"""
import json
import traceback
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Union
from uuid import UUID
from apistar.exceptions import ErrorResponse
from arkindex_worker import logger
from arkindex_worker.models import Transcription
class Reporter(object):
"""
Helper to generate an ``ml_report.json`` artifact.
"""
def __init__(
self,
name: Optional[str] = "Unknown worker",
slug: Optional[str] = "unknown-slug",
version: Optional[str] = None,
**kwargs,
):
self.report_data = {
"name": name,
"slug": slug,
"version": version,
"started": datetime.utcnow().isoformat(),
"elements": {},
}
logger.info(f"Starting ML report for {name}")
def __repr__(self):
return "{}({})".format(self.__class__.__name__, self.report_data["slug"])
def _get_element(self, element_id):
return self.report_data["elements"].setdefault(
str(element_id),
{
"started": datetime.utcnow().isoformat(),
# Created element counts, by type slug
"elements": {},
# Created transcriptions count
"transcriptions": 0,
# Created classification counts, by class
"classifications": {},
# Created entities ({"id": "", "type": "", "name": ""}) from this element
"entities": [],
# Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element
"transcription_entities": [],
# Created metadata ({"id": "", "type": "", "name": ""}) from this element
"metadata": [],
"errors": [],
},
)
def process(self, element_id: Union[str, UUID]):
"""
Report that a specific element ID is being processed.
:param element_id: ID of the element being processed.
"""
# Just call the element initializer
self._get_element(element_id)
def add_element(self, parent_id: Union[str, UUID], type: str, type_count: int = 1):
"""
Report creating an element as a child of another.
:param parent_id: ID of the parent element.
:param type: Slug of the type of the child element.
:param type_count: How many elements of this type were created.
"""
elements = self._get_element(parent_id)["elements"]
elements.setdefault(type, 0)
elements[type] += type_count
def add_classification(self, element_id: Union[str, UUID], class_name: str):
"""
Report creating a classification on an element.
:param element_id: ID of the element.
:param class_name: Name of the ML class of the new classification.
"""
classifications = self._get_element(element_id)["classifications"]
classifications.setdefault(class_name, 0)
classifications[class_name] += 1
def add_classifications(
self, element_id: Union[str, UUID], classifications: List[Dict[str, str]]
):
"""
Report creating one or more classifications at once on an element.
:param element_id: ID of the element.
:param classifications: List of classifications.
Each classification is represented as a ``dict`` with a ``class_name`` key
holding the name of the ML class being used.
"""
assert isinstance(
classifications, list
), "A list is required for classifications"
element = self._get_element(element_id)
# Retrieve the previous existing classification counts, if any
counter = Counter(**element["classifications"])
# Add the new ones
counter.update(
[classification["class_name"] for classification in classifications]
)
element["classifications"] = dict(counter)
def add_transcription(self, element_id: Union[str, UUID], count=1):
"""
Report creating a transcription on an element.
:param element_id: ID of the element.
:param count: Number of transcriptions created at once
"""
self._get_element(element_id)["transcriptions"] += count
def add_entity(
self,
element_id: Union[str, UUID],
entity_id: Union[str, UUID],
type: str,
name: str,
):
"""
Report creating an entity on an element.
:param element_id: ID of the element.
:param entity_id: ID of the new entity.
:param type: Type of the entity.
:param name: Name of the entity.
"""
entities = self._get_element(element_id)["entities"]
entities.append({"id": entity_id, "type": type, "name": name})
def add_transcription_entity(
self,
entity_id: Union[str, UUID],
transcription: Transcription,
transcription_entity_id: Union[str, UUID],
):
"""
Report creating a transcription entity on an element.
:param entity_id: ID of the entity element.
:param transcription: Transcription to add the entity on
:param transcription_entity_id: ID of the transcription entity that is created.
"""
transcription_entities = self._get_element(transcription.element.id)[
"transcription_entities"
]
transcription_entities.append(
{
"transcription_id": transcription.id,
"entity_id": entity_id,
"transcription_entity_id": transcription_entity_id,
}
)
def add_entity_link(self, *args, **kwargs):
"""
Report creating an entity link. Not currently supported.
:raises NotImplementedError:
"""
raise NotImplementedError
def add_entity_role(self, *args, **kwargs):
"""
Report creating an entity role. Not currently supported.
:raises NotImplementedError:
"""
raise NotImplementedError
def add_metadata(
self,
element_id: Union[str, UUID],
metadata_id: Union[str, UUID],
type: str,
name: str,
):
"""
Report creating a metadata from an element.
:param element_id: ID of the element.
:param metadata_id: ID of the new metadata.
:param type: Type of the metadata.
:param name: Name of the metadata.
"""
metadata = self._get_element(element_id)["metadata"]
metadata.append({"id": metadata_id, "type": type, "name": name})
def error(self, element_id: Union[str, UUID], exception: Exception):
"""
Report that a Python exception occurred when processing an element.
:param element_id: ID of the element.
:param exception: A Python exception.
"""
error_data = {
"class": exception.__class__.__name__,
"message": str(exception),
}
if exception.__traceback__ is not None:
error_data["traceback"] = "\n".join(
traceback.format_tb(exception.__traceback__)
)
if isinstance(exception, ErrorResponse):
error_data["message"] = exception.title
error_data["status_code"] = exception.status_code
error_data["content"] = exception.content
self._get_element(element_id)["errors"].append(error_data)
def save(self, path: Union[str, Path]):
"""
Save the ML report to the specified path.
:param path: Path to save the ML report to.
"""
logger.info(f"Saving ML report to {path}")
with open(path, "w") as f:
json.dump(self.report_data, f)
......@@ -15,7 +15,6 @@ from apistar.exceptions import ErrorResponse
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement
from arkindex_worker.models import Element
from arkindex_worker.reporting import Reporter
from arkindex_worker.worker.base import BaseWorker
from arkindex_worker.worker.classification import ClassificationMixin
from arkindex_worker.worker.element import ElementMixin
......@@ -156,17 +155,12 @@ class ElementsWorker(
super().configure()
super().configure_cache()
# Add report concerning elements
self.report = Reporter(
**self.worker_details, version=getattr(self, "worker_version_id", None)
)
def run(self):
"""
Implements an Arkindex worker that goes through each element returned by
[list_elements][arkindex_worker.worker.ElementsWorker.list_elements]. It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element], catching exceptions
and reporting them using the [Reporter][arkindex_worker.reporting.Reporter], and handles saving the report
once the process is complete as well as WorkerActivity updates when enabled.
[list_elements][arkindex_worker.worker.ElementsWorker.list_elements].
It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element],
catching exceptions, and handles saving WorkerActivity updates when enabled.
"""
self.configure()
......@@ -232,10 +226,6 @@ class ElementsWorker(
self.update_activity(element.id, ActivityState.Error)
except Exception:
pass
self.report.error(element_id, e)
# Save report as local artifact
self.report.save(self.work_dir / "ml_report.json")
if failed:
logger.error(
......
......@@ -140,7 +140,7 @@ class BaseWorker(object):
self.process_information = None
# corpus_id will be updated in configure() using the worker_run's corpus
# or in configure_for_developers() from the environment
self.corpus_id = None
self._corpus_id = None
self.user_configuration = {}
self.model_configuration = {}
self.support_cache = support_cache
......@@ -155,6 +155,17 @@ class BaseWorker(object):
# Define API Client
self.setup_api_client()
@property
def corpus_id(self) -> str:
"""
ID of the corpus on which the worker is executed.
Has to be set through the `ARKINDEX_CORPUS_ID` variable in **read-only** mode.
Raises an Exception when trying to access when unset.
"""
if not self._corpus_id:
raise Exception("Missing ARKINDEX_CORPUS_ID environment variable")
return self._corpus_id
@property
def is_read_only(self) -> bool:
"""
......@@ -199,11 +210,7 @@ class BaseWorker(object):
logger.warning("Running without any extra configuration")
# Define corpus_id from environment
self.corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
if not self.corpus_id:
logger.warning(
"'ARKINDEX_CORPUS_ID' was not set in the environment. Any API request involving a `corpus_id` will fail."
)
self._corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
# Define model_version_id from environment
self.model_version_id = os.environ.get("ARKINDEX_MODEL_VERSION_ID")
......@@ -229,7 +236,7 @@ class BaseWorker(object):
self.process_information = worker_run["process"]
# Load corpus id
self.corpus_id = worker_run["process"]["corpus"]
self._corpus_id = worker_run["process"]["corpus"]
# Load worker version information
worker_version = worker_run["worker_version"]
......
......@@ -176,8 +176,6 @@ class ClassificationMixin(object):
# Propagate any other API error
raise
self.report.add_classification(element.id, ml_class)
return created
def create_classifications(
......@@ -248,7 +246,6 @@ class ClassificationMixin(object):
for created_cl in created_cls:
created_cl["class_name"] = self.retrieve_ml_class(created_cl["ml_class"])
self.report.add_classification(element.id, created_cl["class_name"])
if self.use_cache:
# Store classifications in local cache
......
......@@ -3,6 +3,7 @@
ElementsWorker methods for elements and element types.
"""
from typing import Dict, Iterable, List, NamedTuple, Optional, Union
from uuid import UUID
from peewee import IntegrityError
......@@ -141,7 +142,6 @@ class ElementMixin(object):
"confidence": confidence,
},
)
self.report.add_element(element.id, type)
return sub_element["id"] if slim_output else sub_element
......@@ -237,9 +237,6 @@ class ElementMixin(object):
},
)
for element in elements:
self.report.add_element(parent.id, element["type"])
if self.use_cache:
# Create the image as needed and handle both an Element and a CachedElement
if isinstance(parent, CachedElement):
......@@ -275,33 +272,39 @@ class ElementMixin(object):
return created_ids
def update_element(
self,
element: Union[Element, CachedElement],
type: Optional[str] = None,
name: Optional[str] = None,
polygon: Optional[List[List[Union[int, float]]]] = None,
confidence: Optional[float] = None,
def partial_update_element(
self, element: Union[Element, CachedElement], **kwargs
) -> dict:
"""
Partially update an element through the API.
Partially updates an element through the API.
:param element: The element to update.
:param type: Optional new slug type of the element.
:param name: Optional new name of the element.
:param polygon: Optional new polygon of the element.
:param confidence: Optional new confidence score, between 0.0 and 1.0.
:param **kwargs:
* *type* (``str``): Optional slug type of the element.
* *name* (``str``): Optional name of the element.
* *polygon* (``list``): Optional polygon for this element
* *confidence* (``float``): Optional confidence score of this element
* *rotation_angle* (``int``): Optional rotation angle of this element
* *mirrored* (``bool``): Optional mirror status of this element
* *image* (``UUID``): Optional ID of the image of this element
:returns: A dict from the ``PartialUpdateElement`` API endpoint,
"""
assert element and isinstance(
element, (Element, CachedElement)
), "element shouldn't be null and should be an Element or CachedElement"
assert type is None or isinstance(type, str), "type should be None or a str"
assert name is None or isinstance(name, str), "name should be None or a str"
assert polygon is None or isinstance(
polygon, list
), "polygon should be None or a list"
if polygon:
if "type" in kwargs:
assert isinstance(kwargs["type"], str), "type should be a str"
if "name" in kwargs:
assert isinstance(kwargs["name"], str), "name should be a str"
if "polygon" in kwargs:
polygon = kwargs["polygon"]
assert isinstance(polygon, list), "polygon should be a list"
assert len(polygon) >= 3, "polygon should have at least three points"
assert all(
isinstance(point, list) and len(point) == 2 for point in polygon
......@@ -309,9 +312,27 @@ class ElementMixin(object):
assert all(
isinstance(coord, (int, float)) for point in polygon for coord in point
), "polygon points should be lists of two numbers"
assert confidence is None or (
isinstance(confidence, float) and 0 <= confidence <= 1
), "confidence should be None or a float in [0..1] range"
if "confidence" in kwargs:
confidence = kwargs["confidence"]
assert confidence is None or (
isinstance(confidence, float) and 0 <= confidence <= 1
), "confidence should be None or a float in [0..1] range"
if "rotation_angle" in kwargs:
rotation_angle = kwargs["rotation_angle"]
assert (
isinstance(rotation_angle, int) and rotation_angle >= 0
), "rotation_angle should be a positive integer"
if "mirrored" in kwargs:
assert isinstance(kwargs["mirrored"], bool), "mirrored should be a boolean"
if "image" in kwargs:
image = kwargs["image"]
assert isinstance(image, UUID), "image should be a UUID"
# Cast to string
kwargs["image"] = str(image)
if self.is_read_only:
logger.warning("Cannot update element as this worker is in read-only mode")
......@@ -320,22 +341,24 @@ class ElementMixin(object):
updated_element = self.request(
"PartialUpdateElement",
id=element.id,
body={
"type": type,
"name": name,
"polygon": polygon,
"confidence": confidence,
},
body=kwargs,
)
if self.use_cache:
CachedElement.update(
{
CachedElement.type: type,
CachedElement.polygon: str(polygon),
CachedElement.confidence: confidence,
}
).where(CachedElement.id == element.id).execute()
# Name is not present in CachedElement model
kwargs.pop("name", None)
# Stringify polygon if present
if "polygon" in kwargs:
kwargs["polygon"] = str(kwargs["polygon"])
# Retrieve the right image
if "image" in kwargs:
kwargs["image"] = CachedImage.get_by_id(kwargs["image"])
CachedElement.update(**kwargs).where(
CachedElement.id == element.id
).execute()
return updated_element
......
......@@ -8,7 +8,7 @@ from typing import Dict, List, Optional, TypedDict, Union
from peewee import IntegrityError
from arkindex_worker import logger
from arkindex_worker.cache import CachedElement, CachedEntity, CachedTranscriptionEntity
from arkindex_worker.cache import CachedEntity, CachedTranscriptionEntity
from arkindex_worker.models import Element, Transcription
Entity = TypedDict(
......@@ -68,7 +68,6 @@ class EntityMixin(object):
def create_entity(
self,
element: Union[Element, CachedElement],
name: str,
type: str,
metas=dict(),
......@@ -78,14 +77,9 @@ class EntityMixin(object):
Create an entity on the given corpus.
If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.
:param element: An element on which the entity will be reported with the [Reporter][arkindex_worker.reporting.Reporter].
This does not have any effect on the entity itself.
:param name: Name of the entity.
:param type: Type of the entity.
"""
assert element and isinstance(
element, (Element, CachedElement)
), "element shouldn't be null and should be an Element or CachedElement"
assert name and isinstance(
name, str
), "name shouldn't be null and should be of type str"
......@@ -119,7 +113,6 @@ class EntityMixin(object):
"worker_run_id": self.worker_run_id,
},
)
self.report.add_entity(element.id, entity["id"], entity_type_id, name)
if self.use_cache:
# Store entity in local cache
......@@ -196,7 +189,6 @@ class EntityMixin(object):
id=transcription.id,
body=body,
)
self.report.add_transcription_entity(entity, transcription, transcription_ent)
if self.use_cache:
# Store transcription entity in local cache
......@@ -247,11 +239,6 @@ class EntityMixin(object):
transcription, Transcription
), "transcription shouldn't be null and should be of type Transcription"
# Needed for MLreport
assert (
hasattr(transcription, "element") and transcription.element
), f"No element linked to {transcription}"
assert entities and isinstance(
entities, list
), "entities shouldn't be null and should be of type list"
......@@ -301,22 +288,6 @@ class EntityMixin(object):
},
)
for entity, created_objects in zip(entities, created_ids["entities"]):
# Report entity creation
self.report.add_entity(
transcription.element.id,
created_objects["entity_id"],
entity.get("type_id"),
entity.get("name"),
)
# Report transcription entity creation
self.report.add_transcription_entity(
created_objects["entity_id"],
transcription,
created_objects["transcription_entity_id"],
)
return created_ids["entities"]
def list_transcription_entities(
......
......@@ -105,7 +105,6 @@ class MetaDataMixin(object):
"worker_run_id": self.worker_run_id,
},
)
self.report.add_metadata(element.id, metadata["id"], type.value, name)
return metadata["id"]
......@@ -182,9 +181,6 @@ class MetaDataMixin(object):
},
)["metadata_list"]
for meta in created_metadatas:
self.report.add_metadata(element.id, meta["id"], meta["type"], meta["name"])
return created_metadatas
def list_element_metadata(
......
......@@ -88,8 +88,6 @@ class TranscriptionMixin(object):
},
)
self.report.add_transcription(element.id)
if self.use_cache:
# Store transcription in local cache
try:
......@@ -181,9 +179,6 @@ class TranscriptionMixin(object):
},
)["transcriptions"]
for created_tr in created_trs:
self.report.add_transcription(created_tr["element_id"])
if self.use_cache:
# Store transcriptions in local cache
try:
......@@ -308,8 +303,6 @@ class TranscriptionMixin(object):
logger.debug(
f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation"
)
self.report.add_element(element.id, sub_element_type)
self.report.add_transcription(annotation["element_id"])
if self.use_cache:
# Store transcriptions and their associated element (if created) in local cache
......
......@@ -83,20 +83,6 @@ The multiple configuration sources from the Arkindex-mode are merged into a uniq
One information cannot be retrieved directly from the configuration file and is required in some cases: the ID of the Arkindex corpus which the elements processed belong to. This is retrieved via the `ARKINDEX_CORPUS_ID` environment variable.
## Worker reporter
At the end of a worker execution, a report about the publication done by the worker is generated in JSON-format. This lists
- the starting time,
- the number of elements created, grouped by type,
- the number of transcription created,
- the number of classifications created, grouped by class,
- the number of entities created,
- the number of entities created on transcriptions,
- the number of metadatas created,
- the encountered errors' logs.
This is done by the many helper described in the [reporting module](../../ref/reporting.md). They use the `report` attribute initialized at the configuration stage.
## Setting Debug logging level
There are three ways to activate the debug mode:
......@@ -132,9 +118,6 @@ Many attributes are set on the worker during at the configuration stage. Here is
`process_information`
: The details about the process parent to this worker execution. Only set in Arkindex mode.
`reporter`
: The `Reporter` instance that will generate the `ml_report.json` artifacts which sums up the publication done during this execution and the errors encountered.
`secrets`
: A dictionary mapping the secret name to their parsed content.
......
......@@ -28,7 +28,6 @@ flowchart LR
subgraph id3[Loop over each element]
element_processing --> element_processing
end
element_processing -- Save ML report to disk --> reporting
end
init --> run
end
......
# Reporting
::: arkindex_worker.reporting
# Releases
## 0.3.4
Released on **14 Sept 2023** &bull; View on [Gitlab](https://gitlab.teklia.com/workers/base-worker/-/releases/0.3.4)
- The worker template was updated to correctly install [Git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) if it depends on any.
- Base-worker now uses [ruff](https://github.com/charliermarsh/ruff) for linting. This tool replaces `isort` and `flake8`.
- New Arkindex API helper to update an element, calling [PartialUpdateElement](https://demo.arkindex.org/api-docs/#tag/elements/operation/PartialUpdateElement).
- New Arkindex API helper to list an element's parents, calling [ListElementParents](https://demo.arkindex.org/api-docs/#tag/elements/operation/ListElementParents).
- Worker Activity API is now disabled when the worker runs in `read-only` mode instead of relying on the `--dev` CLI argument. The [update_activity](https://workers.arkindex.org/ref/elements_worker/#arkindex_worker.worker.ElementsWorker.update_activity) API helper was updated following Arkindex 1.5.1 changes.
- Worker can now resize the image of an element when opening them. This uses the [IIIF](https://iiif.io/api/image/2.1/#size) resizing API.
## 0.3.3
......
......@@ -88,7 +88,6 @@ nav:
- Models: ref/models.md
- Git & Gitlab support: ref/git.md
- Image utilities: ref/image.md
- Reporting: ref/reporting.md
- Cache: ref/cache.md
- Utils: ref/utils.md
- Releases: releases.md
......
arkindex-client==1.0.13
arkindex-client==1.0.14
peewee==3.16.3
Pillow==10.0.0
pymdown-extensions==10.2
Pillow==10.1.0
pymdown-extensions==10.3
python-gitlab==3.15.0
python-gnupg==0.5.1
sh==2.0.6
shapely==2.0.1
shapely==2.0.2
tenacity==8.2.3
zstandard==0.21.0
......@@ -16,6 +16,7 @@ from arkindex_worker.cache import (
MODELS,
SQL_VERSION,
CachedElement,
CachedImage,
CachedTranscription,
Version,
create_version_table,
......@@ -210,6 +211,15 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
return worker
@pytest.fixture
def mock_elements_worker_read_only(monkeypatch):
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
worker = ElementsWorker()
worker.configure()
return worker
@pytest.fixture
def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
"""
......@@ -363,6 +373,18 @@ def mock_cached_elements():
assert CachedElement.select().count() == 5
@pytest.fixture
def mock_cached_images():
"""Insert few elements in local cache"""
CachedImage.create(
id=UUID("99999999-9999-9999-9999-999999999999"),
width=1250,
height=2500,
url="http://testserver/iiif/3/image",
)
assert CachedImage.select().count() == 1
@pytest.fixture
def mock_cached_transcriptions():
"""Insert few transcriptions in local cache, on a shared element"""
......
......@@ -10,7 +10,7 @@ import pytest
from arkindex.mock import MockApiClient
from arkindex_worker import logger
from arkindex_worker.worker import BaseWorker
from arkindex_worker.worker import BaseWorker, ElementsWorker
from arkindex_worker.worker.base import ModelNotFoundError
from tests.conftest import FIXTURES_DIR
......@@ -739,3 +739,25 @@ def test_extract_parent_archives(tmp_path):
)
mode = "rb" if extracted_file.suffix == ".png" else "r"
assert extracted_file.open(mode).read() == expected_file.open(mode).read()
def test_corpus_id_not_set_read_only_mode(
mock_elements_worker_read_only: ElementsWorker,
):
mock_elements_worker_read_only.configure()
with pytest.raises(
Exception, match="Missing ARKINDEX_CORPUS_ID environment variable"
):
mock_elements_worker_read_only.corpus_id
def test_corpus_id_set_read_only_mode(
monkeypatch, mock_elements_worker_read_only: ElementsWorker
):
corpus_id = str(uuid.uuid4())
monkeypatch.setenv("ARKINDEX_CORPUS_ID", corpus_id)
mock_elements_worker_read_only.configure()
assert mock_elements_worker_read_only.corpus_id == corpus_id