Compare revisions

Yoann Schneider · Manon Blanco · Yoann Schneider · Yoann Schneider · Bastien Abadie · Yoann Schneider
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -4,7 +4,7 @@ stages:
  - release

 lint:
-  image: python:3.10
+  image: python:3

  cache:
    paths:
@@ -55,7 +55,8 @@ test:
    - tox -- --junitxml=test-report.xml --durations=50

 test-cookiecutter:
-  image: python:3
+  # Needed till next release
+  image: python:3.11

  stage: test
  cache:
@@ -91,7 +92,7 @@ test-cookiecutter:
      - worker-demo/

 build-cookiecutter:
-  image: docker:19.03.1
+  image: docker:24.0.6
  stage: build
  services:
    - docker:dind

--- a/VERSION
+++ b/VERSION
-0.3.3
+0.3.4
--- a/arkindex_worker/cache.py
+++ b/arkindex_worker/cache.py
@@ -102,14 +102,26 @@ class CachedElement(Model):
        database = db
        table_name = "elements"

-    def open_image(self, *args, max_size: Optional[int] = None, **kwargs) -> Image:
+    def open_image(
+        self,
+        *args,
+        max_width: Optional[int] = None,
+        max_height: Optional[int] = None,
+        **kwargs,
+    ) -> Image:
        """
        Open this element's image as a Pillow image.
        This does not crop the image to the element's polygon.
        IIIF servers with maxWidth, maxHeight or maxArea restrictions on image size are not supported.

+        Warns:
+        ----
+           If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
+
+
        :param *args: Positional arguments passed to [arkindex_worker.image.open_image][]
-        :param max_size: Subresolution of the image.
+        :param max_width: The maximum width of the image.
+        :param max_height: The maximum height of the image.
        :param **kwargs: Keyword arguments passed to [arkindex_worker.image.open_image][]
        :raises ValueError: When this element does not have an image ID or a polygon.
        :return: A Pillow image.
@@ -129,7 +141,7 @@ class CachedElement(Model):
        else:
            box = "full"

-        if max_size is None:
+        if max_width is None and max_height is None:
            resize = "full"
        else:
            # Do not resize for polygons that do not exactly match the images
@@ -141,14 +153,12 @@ class CachedElement(Model):
                resize = "full"

            # Do not resize when the image is below the maximum size
-            elif self.image.width <= max_size and self.image.height <= max_size:
+            elif (max_width is None or self.image.width <= max_width) and (
+                max_height is None or self.image.height <= max_height
+            ):
                resize = "full"
            else:
-                ratio = max_size / max(self.image.width, self.image.height)
-                new_width, new_height = int(self.image.width * ratio), int(
-                    self.image.height * ratio
-                )
-                resize = f"{new_width},{new_height}"
+                resize = f"{max_width or ''},{max_height or ''}"

        url = self.image.url
        if not url.endswith("/"):

--- a/arkindex_worker/models.py
+++ b/arkindex_worker/models.py
@@ -10,8 +10,6 @@ from typing import Generator, List, Optional
 from PIL import Image
 from requests import HTTPError

-from arkindex_worker import logger
-

 class MagicDict(dict):
    """
@@ -124,9 +122,10 @@ class Element(MagicDict):
    def open_image(
        self,
        *args,
-        max_size: Optional[int] = None,
+        max_width: Optional[int] = None,
+        max_height: Optional[int] = None,
        use_full_image: Optional[bool] = False,
-        **kwargs
+        **kwargs,
    ) -> Image:
        """
        Open this element's image using Pillow, rotating and mirroring it according
@@ -149,7 +148,13 @@ class Element(MagicDict):
           ``rotation_angle=0, mirrored=False`` as keyword arguments.


-        :param max_size: The maximum size of the requested image.
+        Warns:
+        ----
+           If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
+
+
+        :param max_width: The maximum width of the image.
+        :param max_height: The maximum height of the image.
        :param use_full_image: Ignore the ``zone.polygon`` and always
           retrieve the image without cropping.
        :param *args: Positional arguments passed to [arkindex_worker.image.open_image][].
@@ -165,41 +170,29 @@ class Element(MagicDict):
        from arkindex_worker.image import (
            download_tiles,
            open_image,
-            polygon_bounding_box,
        )

        if not self.get("zone"):
            raise ValueError("Element {} has no zone".format(self.id))

        if self.requires_tiles:
-            if max_size is None:
+            if max_width is None and max_height is None:
                return download_tiles(self.zone.image.url)
            else:
                raise NotImplementedError

-        if max_size is not None:
-            bounding_box = polygon_bounding_box(self.zone.polygon)
+        if max_width is None and max_height is None:
+            resize = "full"
+        else:
            original_size = {"w": self.zone.image.width, "h": self.zone.image.height}
-            # No resizing if the element is smaller than the image.
-            if (
-                bounding_box.width != original_size["w"]
-                or bounding_box.height != original_size["h"]
-            ):
-                resize = "full"
-                logger.warning(
-                    "Only full image size elements covered, "
-                    + "downloading full size image."
-                )
            # No resizing if the image is smaller than the wanted size.
-            elif original_size["w"] <= max_size and original_size["h"] <= max_size:
+            if (max_width is None or original_size["w"] <= max_width) and (
+                max_height is None or original_size["h"] <= max_height
+            ):
                resize = "full"
            # Resizing if the image is bigger than the wanted size.
            else:
-                ratio = max_size / max(original_size.values())
-                new_width, new_height = [int(x * ratio) for x in original_size.values()]
-                resize = "{},{}".format(new_width, new_height)
-        else:
-            resize = "full"
+                resize = f"{max_width or ''},{max_height or ''}"

        if use_full_image:
            url = self.image_url(resize)
@@ -212,7 +205,7 @@ class Element(MagicDict):
                *args,
                rotation_angle=self.rotation_angle,
                mirrored=self.mirrored,
-                **kwargs
+                **kwargs,
            )
        except HTTPError as e:
            if (

--- a/arkindex_worker/reporting.py
+++ b/arkindex_worker/reporting.py
-# -*- coding: utf-8 -*-
-"""
-Generator for the ``ml_report.json`` file, to report created worker results and exceptions.
-"""
-
-import json
-import traceback
-from collections import Counter
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, List, Optional, Union
-from uuid import UUID
-
-from apistar.exceptions import ErrorResponse
-
-from arkindex_worker import logger
-from arkindex_worker.models import Transcription
-
-
-class Reporter(object):
-    """
-    Helper to generate an ``ml_report.json`` artifact.
-    """
-
-    def __init__(
-        self,
-        name: Optional[str] = "Unknown worker",
-        slug: Optional[str] = "unknown-slug",
-        version: Optional[str] = None,
-        **kwargs,
-    ):
-        self.report_data = {
-            "name": name,
-            "slug": slug,
-            "version": version,
-            "started": datetime.utcnow().isoformat(),
-            "elements": {},
-        }
-        logger.info(f"Starting ML report for {name}")
-
-    def __repr__(self):
-        return "{}({})".format(self.__class__.__name__, self.report_data["slug"])
-
-    def _get_element(self, element_id):
-        return self.report_data["elements"].setdefault(
-            str(element_id),
-            {
-                "started": datetime.utcnow().isoformat(),
-                # Created element counts, by type slug
-                "elements": {},
-                # Created transcriptions count
-                "transcriptions": 0,
-                # Created classification counts, by class
-                "classifications": {},
-                # Created entities ({"id": "", "type": "", "name": ""}) from this element
-                "entities": [],
-                # Created transcription entities ({"transcription_id": "", "entity_id": ""}) from this element
-                "transcription_entities": [],
-                # Created metadata ({"id": "", "type": "", "name": ""}) from this element
-                "metadata": [],
-                "errors": [],
-            },
-        )
-
-    def process(self, element_id: Union[str, UUID]):
-        """
-        Report that a specific element ID is being processed.
-
-        :param element_id: ID of the element being processed.
-        """
-        # Just call the element initializer
-        self._get_element(element_id)
-
-    def add_element(self, parent_id: Union[str, UUID], type: str, type_count: int = 1):
-        """
-        Report creating an element as a child of another.
-
-        :param parent_id: ID of the parent element.
-        :param type: Slug of the type of the child element.
-        :param type_count: How many elements of this type were created.
-        """
-        elements = self._get_element(parent_id)["elements"]
-        elements.setdefault(type, 0)
-        elements[type] += type_count
-
-    def add_classification(self, element_id: Union[str, UUID], class_name: str):
-        """
-        Report creating a classification on an element.
-
-        :param element_id: ID of the element.
-        :param class_name: Name of the ML class of the new classification.
-        """
-        classifications = self._get_element(element_id)["classifications"]
-        classifications.setdefault(class_name, 0)
-        classifications[class_name] += 1
-
-    def add_classifications(
-        self, element_id: Union[str, UUID], classifications: List[Dict[str, str]]
-    ):
-        """
-        Report creating one or more classifications at once on an element.
-
-        :param element_id: ID of the element.
-        :param classifications: List of classifications.
-           Each classification is represented as a ``dict`` with a ``class_name`` key
-           holding the name of the ML class being used.
-        """
-        assert isinstance(
-            classifications, list
-        ), "A list is required for classifications"
-        element = self._get_element(element_id)
-        # Retrieve the previous existing classification counts, if any
-        counter = Counter(**element["classifications"])
-        # Add the new ones
-        counter.update(
-            [classification["class_name"] for classification in classifications]
-        )
-        element["classifications"] = dict(counter)
-
-    def add_transcription(self, element_id: Union[str, UUID], count=1):
-        """
-        Report creating a transcription on an element.
-
-        :param element_id: ID of the element.
-        :param count: Number of transcriptions created at once
-        """
-        self._get_element(element_id)["transcriptions"] += count
-
-    def add_entity(
-        self,
-        element_id: Union[str, UUID],
-        entity_id: Union[str, UUID],
-        type: str,
-        name: str,
-    ):
-        """
-        Report creating an entity on an element.
-
-        :param element_id: ID of the element.
-        :param entity_id: ID of the new entity.
-        :param type: Type of the entity.
-        :param name: Name of the entity.
-        """
-        entities = self._get_element(element_id)["entities"]
-        entities.append({"id": entity_id, "type": type, "name": name})
-
-    def add_transcription_entity(
-        self,
-        entity_id: Union[str, UUID],
-        transcription: Transcription,
-        transcription_entity_id: Union[str, UUID],
-    ):
-        """
-        Report creating a transcription entity on an element.
-
-        :param entity_id: ID of the entity element.
-        :param transcription: Transcription to add the entity on
-        :param transcription_entity_id: ID of the transcription entity that is created.
-        """
-        transcription_entities = self._get_element(transcription.element.id)[
-            "transcription_entities"
-        ]
-        transcription_entities.append(
-            {
-                "transcription_id": transcription.id,
-                "entity_id": entity_id,
-                "transcription_entity_id": transcription_entity_id,
-            }
-        )
-
-    def add_entity_link(self, *args, **kwargs):
-        """
-        Report creating an entity link. Not currently supported.
-
-        :raises NotImplementedError:
-        """
-        raise NotImplementedError
-
-    def add_entity_role(self, *args, **kwargs):
-        """
-        Report creating an entity role. Not currently supported.
-
-        :raises NotImplementedError:
-        """
-        raise NotImplementedError
-
-    def add_metadata(
-        self,
-        element_id: Union[str, UUID],
-        metadata_id: Union[str, UUID],
-        type: str,
-        name: str,
-    ):
-        """
-        Report creating a metadata from an element.
-
-        :param element_id: ID of the element.
-        :param metadata_id: ID of the new metadata.
-        :param type: Type of the metadata.
-        :param name: Name of the metadata.
-        """
-        metadata = self._get_element(element_id)["metadata"]
-        metadata.append({"id": metadata_id, "type": type, "name": name})
-
-    def error(self, element_id: Union[str, UUID], exception: Exception):
-        """
-        Report that a Python exception occurred when processing an element.
-
-        :param element_id: ID of the element.
-        :param exception: A Python exception.
-        """
-        error_data = {
-            "class": exception.__class__.__name__,
-            "message": str(exception),
-        }
-        if exception.__traceback__ is not None:
-            error_data["traceback"] = "\n".join(
-                traceback.format_tb(exception.__traceback__)
-            )
-
-        if isinstance(exception, ErrorResponse):
-            error_data["message"] = exception.title
-            error_data["status_code"] = exception.status_code
-            error_data["content"] = exception.content
-
-        self._get_element(element_id)["errors"].append(error_data)
-
-    def save(self, path: Union[str, Path]):
-        """
-        Save the ML report to the specified path.
-
-        :param path: Path to save the ML report to.
-        """
-        logger.info(f"Saving ML report to {path}")
-        with open(path, "w") as f:
-            json.dump(self.report_data, f)
--- a/arkindex_worker/worker/__init__.py
+++ b/arkindex_worker/worker/__init__.py
@@ -15,7 +15,6 @@ from apistar.exceptions import ErrorResponse
 from arkindex_worker import logger
 from arkindex_worker.cache import CachedElement
 from arkindex_worker.models import Element
-from arkindex_worker.reporting import Reporter
 from arkindex_worker.worker.base import BaseWorker
 from arkindex_worker.worker.classification import ClassificationMixin
 from arkindex_worker.worker.element import ElementMixin
@@ -156,17 +155,12 @@ class ElementsWorker(
            super().configure()
            super().configure_cache()

-        # Add report concerning elements
-        self.report = Reporter(
-            **self.worker_details, version=getattr(self, "worker_version_id", None)
-        )
-
    def run(self):
        """
        Implements an Arkindex worker that goes through each element returned by
-        [list_elements][arkindex_worker.worker.ElementsWorker.list_elements]. It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element], catching exceptions
-        and reporting them using the [Reporter][arkindex_worker.reporting.Reporter], and handles saving the report
-        once the process is complete as well as WorkerActivity updates when enabled.
+        [list_elements][arkindex_worker.worker.ElementsWorker.list_elements].
+        It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element],
+        catching exceptions, and handles saving WorkerActivity updates when enabled.
        """
        self.configure()

@@ -232,10 +226,6 @@ class ElementsWorker(
                        self.update_activity(element.id, ActivityState.Error)
                    except Exception:
                        pass
-                self.report.error(element_id, e)
-
-        # Save report as local artifact
-        self.report.save(self.work_dir / "ml_report.json")

        if failed:
            logger.error(

--- a/arkindex_worker/worker/base.py
+++ b/arkindex_worker/worker/base.py
@@ -140,7 +140,7 @@ class BaseWorker(object):
        self.process_information = None
        # corpus_id will be updated in configure() using the worker_run's corpus
        # or in configure_for_developers() from the environment
-        self.corpus_id = None
+        self._corpus_id = None
        self.user_configuration = {}
        self.model_configuration = {}
        self.support_cache = support_cache
@@ -155,6 +155,17 @@ class BaseWorker(object):
        # Define API Client
        self.setup_api_client()

+    @property
+    def corpus_id(self) -> str:
+        """
+        ID of the corpus on which the worker is executed.
+        Has to be set through the `ARKINDEX_CORPUS_ID` variable in **read-only** mode.
+        Raises an Exception when trying to access when unset.
+        """
+        if not self._corpus_id:
+            raise Exception("Missing ARKINDEX_CORPUS_ID environment variable")
+        return self._corpus_id
+
    @property
    def is_read_only(self) -> bool:
        """
@@ -199,11 +210,7 @@ class BaseWorker(object):
            logger.warning("Running without any extra configuration")

        # Define corpus_id from environment
-        self.corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")
-        if not self.corpus_id:
-            logger.warning(
-                "'ARKINDEX_CORPUS_ID' was not set in the environment. Any API request involving a `corpus_id` will fail."
-            )
+        self._corpus_id = os.environ.get("ARKINDEX_CORPUS_ID")

        # Define model_version_id from environment
        self.model_version_id = os.environ.get("ARKINDEX_MODEL_VERSION_ID")
@@ -229,7 +236,7 @@ class BaseWorker(object):
        self.process_information = worker_run["process"]

        # Load corpus id
-        self.corpus_id = worker_run["process"]["corpus"]
+        self._corpus_id = worker_run["process"]["corpus"]

        # Load worker version information
        worker_version = worker_run["worker_version"]

--- a/arkindex_worker/worker/classification.py
+++ b/arkindex_worker/worker/classification.py
@@ -176,8 +176,6 @@ class ClassificationMixin(object):
            # Propagate any other API error
            raise

-        self.report.add_classification(element.id, ml_class)
-
        return created

    def create_classifications(
@@ -248,7 +246,6 @@ class ClassificationMixin(object):

        for created_cl in created_cls:
            created_cl["class_name"] = self.retrieve_ml_class(created_cl["ml_class"])
-            self.report.add_classification(element.id, created_cl["class_name"])

        if self.use_cache:
            # Store classifications in local cache

--- a/arkindex_worker/worker/element.py
+++ b/arkindex_worker/worker/element.py
@@ -3,6 +3,7 @@
 ElementsWorker methods for elements and element types.
 """
 from typing import Dict, Iterable, List, NamedTuple, Optional, Union
+from uuid import UUID

 from peewee import IntegrityError

@@ -141,7 +142,6 @@ class ElementMixin(object):
                "confidence": confidence,
            },
        )
-        self.report.add_element(element.id, type)

        return sub_element["id"] if slim_output else sub_element

@@ -237,9 +237,6 @@ class ElementMixin(object):
            },
        )

-        for element in elements:
-            self.report.add_element(parent.id, element["type"])
-
        if self.use_cache:
            # Create the image as needed and handle both an Element and a CachedElement
            if isinstance(parent, CachedElement):
@@ -275,33 +272,39 @@ class ElementMixin(object):

        return created_ids

-    def update_element(
-        self,
-        element: Union[Element, CachedElement],
-        type: Optional[str] = None,
-        name: Optional[str] = None,
-        polygon: Optional[List[List[Union[int, float]]]] = None,
-        confidence: Optional[float] = None,
+    def partial_update_element(
+        self, element: Union[Element, CachedElement], **kwargs
    ) -> dict:
        """
-        Partially update an element through the API.
+        Partially updates an element through the API.

        :param element: The element to update.
-        :param type: Optional new slug type of the element.
-        :param name: Optional new name of the element.
-        :param polygon: Optional new polygon of the element.
-        :param confidence: Optional new confidence score, between 0.0 and 1.0.
+        :param **kwargs:
+
+            * *type* (``str``): Optional slug type of the element.
+            * *name* (``str``): Optional name of the element.
+            * *polygon* (``list``): Optional polygon for this element
+            * *confidence* (``float``): Optional confidence score of this element
+            * *rotation_angle* (``int``): Optional rotation angle of this element
+            * *mirrored* (``bool``): Optional mirror status of this element
+            * *image* (``UUID``): Optional ID of the image of this element
+
+
        :returns: A dict from the ``PartialUpdateElement`` API endpoint,
        """
        assert element and isinstance(
            element, (Element, CachedElement)
        ), "element shouldn't be null and should be an Element or CachedElement"
-        assert type is None or isinstance(type, str), "type should be None or a str"
-        assert name is None or isinstance(name, str), "name should be None or a str"
-        assert polygon is None or isinstance(
-            polygon, list
-        ), "polygon should be None or a list"
-        if polygon:
+
+        if "type" in kwargs:
+            assert isinstance(kwargs["type"], str), "type should be a str"
+
+        if "name" in kwargs:
+            assert isinstance(kwargs["name"], str), "name should be a str"
+
+        if "polygon" in kwargs:
+            polygon = kwargs["polygon"]
+            assert isinstance(polygon, list), "polygon should be a list"
            assert len(polygon) >= 3, "polygon should have at least three points"
            assert all(
                isinstance(point, list) and len(point) == 2 for point in polygon
@@ -309,9 +312,27 @@ class ElementMixin(object):
            assert all(
                isinstance(coord, (int, float)) for point in polygon for coord in point
            ), "polygon points should be lists of two numbers"
-        assert confidence is None or (
-            isinstance(confidence, float) and 0 <= confidence <= 1
-        ), "confidence should be None or a float in [0..1] range"
+
+        if "confidence" in kwargs:
+            confidence = kwargs["confidence"]
+            assert confidence is None or (
+                isinstance(confidence, float) and 0 <= confidence <= 1
+            ), "confidence should be None or a float in [0..1] range"
+
+        if "rotation_angle" in kwargs:
+            rotation_angle = kwargs["rotation_angle"]
+            assert (
+                isinstance(rotation_angle, int) and rotation_angle >= 0
+            ), "rotation_angle should be a positive integer"
+
+        if "mirrored" in kwargs:
+            assert isinstance(kwargs["mirrored"], bool), "mirrored should be a boolean"
+
+        if "image" in kwargs:
+            image = kwargs["image"]
+            assert isinstance(image, UUID), "image should be a UUID"
+            # Cast to string
+            kwargs["image"] = str(image)

        if self.is_read_only:
            logger.warning("Cannot update element as this worker is in read-only mode")
@@ -320,22 +341,24 @@ class ElementMixin(object):
        updated_element = self.request(
            "PartialUpdateElement",
            id=element.id,
-            body={
-                "type": type,
-                "name": name,
-                "polygon": polygon,
-                "confidence": confidence,
-            },
+            body=kwargs,
        )

        if self.use_cache:
-            CachedElement.update(
-                {
-                    CachedElement.type: type,
-                    CachedElement.polygon: str(polygon),
-                    CachedElement.confidence: confidence,
-                }
-            ).where(CachedElement.id == element.id).execute()
+            # Name is not present in CachedElement model
+            kwargs.pop("name", None)
+
+            # Stringify polygon if present
+            if "polygon" in kwargs:
+                kwargs["polygon"] = str(kwargs["polygon"])
+
+            # Retrieve the right image
+            if "image" in kwargs:
+                kwargs["image"] = CachedImage.get_by_id(kwargs["image"])
+
+            CachedElement.update(**kwargs).where(
+                CachedElement.id == element.id
+            ).execute()

        return updated_element


--- a/arkindex_worker/worker/entity.py
+++ b/arkindex_worker/worker/entity.py
@@ -8,7 +8,7 @@ from typing import Dict, List, Optional, TypedDict, Union
 from peewee import IntegrityError

 from arkindex_worker import logger
-from arkindex_worker.cache import CachedElement, CachedEntity, CachedTranscriptionEntity
+from arkindex_worker.cache import CachedEntity, CachedTranscriptionEntity
 from arkindex_worker.models import Element, Transcription

 Entity = TypedDict(
@@ -68,7 +68,6 @@ class EntityMixin(object):

    def create_entity(
        self,
-        element: Union[Element, CachedElement],
        name: str,
        type: str,
        metas=dict(),
@@ -78,14 +77,9 @@ class EntityMixin(object):
        Create an entity on the given corpus.
        If cache support is enabled, a [CachedEntity][arkindex_worker.cache.CachedEntity] will also be created.

-        :param element: An element on which the entity will be reported with the [Reporter][arkindex_worker.reporting.Reporter].
-           This does not have any effect on the entity itself.
        :param name: Name of the entity.
        :param type: Type of the entity.
        """
-        assert element and isinstance(
-            element, (Element, CachedElement)
-        ), "element shouldn't be null and should be an Element or CachedElement"
        assert name and isinstance(
            name, str
        ), "name shouldn't be null and should be of type str"
@@ -119,7 +113,6 @@ class EntityMixin(object):
                "worker_run_id": self.worker_run_id,
            },
        )
-        self.report.add_entity(element.id, entity["id"], entity_type_id, name)

        if self.use_cache:
            # Store entity in local cache
@@ -196,7 +189,6 @@ class EntityMixin(object):
            id=transcription.id,
            body=body,
        )
-        self.report.add_transcription_entity(entity, transcription, transcription_ent)

        if self.use_cache:
            # Store transcription entity in local cache
@@ -247,11 +239,6 @@ class EntityMixin(object):
            transcription, Transcription
        ), "transcription shouldn't be null and should be of type Transcription"

-        # Needed for MLreport
-        assert (
-            hasattr(transcription, "element") and transcription.element
-        ), f"No element linked to {transcription}"
-
        assert entities and isinstance(
            entities, list
        ), "entities shouldn't be null and should be of type list"
@@ -301,22 +288,6 @@ class EntityMixin(object):
            },
        )

-        for entity, created_objects in zip(entities, created_ids["entities"]):
-            # Report entity creation
-            self.report.add_entity(
-                transcription.element.id,
-                created_objects["entity_id"],
-                entity.get("type_id"),
-                entity.get("name"),
-            )
-
-            # Report transcription entity creation
-            self.report.add_transcription_entity(
-                created_objects["entity_id"],
-                transcription,
-                created_objects["transcription_entity_id"],
-            )
-
        return created_ids["entities"]

    def list_transcription_entities(

--- a/arkindex_worker/worker/metadata.py
+++ b/arkindex_worker/worker/metadata.py
@@ -105,7 +105,6 @@ class MetaDataMixin(object):
                "worker_run_id": self.worker_run_id,
            },
        )
-        self.report.add_metadata(element.id, metadata["id"], type.value, name)

        return metadata["id"]

@@ -182,9 +181,6 @@ class MetaDataMixin(object):
            },
        )["metadata_list"]

-        for meta in created_metadatas:
-            self.report.add_metadata(element.id, meta["id"], meta["type"], meta["name"])
-
        return created_metadatas

    def list_element_metadata(

--- a/arkindex_worker/worker/transcription.py
+++ b/arkindex_worker/worker/transcription.py
@@ -88,8 +88,6 @@ class TranscriptionMixin(object):
            },
        )

-        self.report.add_transcription(element.id)
-
        if self.use_cache:
            # Store transcription in local cache
            try:
@@ -181,9 +179,6 @@ class TranscriptionMixin(object):
            },
        )["transcriptions"]

-        for created_tr in created_trs:
-            self.report.add_transcription(created_tr["element_id"])
-
        if self.use_cache:
            # Store transcriptions in local cache
            try:
@@ -308,8 +303,6 @@ class TranscriptionMixin(object):
                logger.debug(
                    f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation"
                )
-                self.report.add_element(element.id, sub_element_type)
-            self.report.add_transcription(annotation["element_id"])

        if self.use_cache:
            # Store transcriptions and their associated element (if created) in local cache

--- a/docs/contents/implem/configure.md
+++ b/docs/contents/implem/configure.md
@@ -83,20 +83,6 @@ The multiple configuration sources from the Arkindex-mode are merged into a uniq

 One information cannot be retrieved directly from the configuration file and is required in some cases: the ID of the Arkindex corpus which the elements processed belong to. This is retrieved via the `ARKINDEX_CORPUS_ID` environment variable.

-## Worker reporter
-At the end of a worker execution, a report about the publication done by the worker is generated in JSON-format. This lists
-
- the starting time,
- the number of elements created, grouped by type,
- the number of transcription created,
- the number of classifications created, grouped by class,
- the number of entities created,
- the number of entities created on transcriptions,
- the number of metadatas created,
- the encountered errors' logs.
-
-This is done by the many helper described in the [reporting module](../../ref/reporting.md). They use the `report` attribute initialized at the configuration stage.
-
 ## Setting Debug logging level
 There are three ways to activate the debug mode:

@@ -132,9 +118,6 @@ Many attributes are set on the worker during at the configuration stage. Here is
 `process_information`
 : The details about the process parent to this worker execution. Only set in Arkindex mode.

-`reporter`
-: The `Reporter` instance that will generate the `ml_report.json` artifacts which sums up the publication done during this execution and the errors encountered.
-
 `secrets`
 : A dictionary mapping the secret name to their parsed content.


--- a/docs/contents/implem/index.md
+++ b/docs/contents/implem/index.md
@@ -28,7 +28,6 @@ flowchart LR
            subgraph id3[Loop over each element]
                element_processing --> element_processing
            end
-            element_processing -- Save ML report to disk --> reporting
        end
        init --> run
    end

--- a/docs/ref/reporting.md
+++ b/docs/ref/reporting.md
-# Reporting
-
-::: arkindex_worker.reporting
--- a/docs/releases.md
+++ b/docs/releases.md
 # Releases


+## 0.3.4
+
+Released on **14 Sept 2023** &bull; View on [Gitlab](https://gitlab.teklia.com/workers/base-worker/-/releases/0.3.4)
+
+- The worker template was updated to correctly install [Git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) if it depends on any.
+- Base-worker now uses [ruff](https://github.com/charliermarsh/ruff) for linting. This tool replaces `isort` and `flake8`.
+- New Arkindex API helper to update an element, calling [PartialUpdateElement](https://demo.arkindex.org/api-docs/#tag/elements/operation/PartialUpdateElement).
+- New Arkindex API helper to list an element's parents, calling [ListElementParents](https://demo.arkindex.org/api-docs/#tag/elements/operation/ListElementParents).
+- Worker Activity API is now disabled when the worker runs in `read-only` mode instead of relying on the `--dev` CLI argument. The [update_activity](https://workers.arkindex.org/ref/elements_worker/#arkindex_worker.worker.ElementsWorker.update_activity) API helper was updated following Arkindex 1.5.1 changes.
+- Worker can now resize the image of an element when opening them. This uses the [IIIF](https://iiif.io/api/image/2.1/#size) resizing API.
+

 ## 0.3.3


--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -88,7 +88,6 @@ nav:
      - Models: ref/models.md
      - Git & Gitlab support: ref/git.md
      - Image utilities: ref/image.md
-      - Reporting: ref/reporting.md
      - Cache: ref/cache.md
      - Utils: ref/utils.md
  - Releases: releases.md

--- a/requirements.txt
+++ b/requirements.txt
-arkindex-client==1.0.13
+arkindex-client==1.0.14
 peewee==3.16.3
-Pillow==10.0.0
-pymdown-extensions==10.2
+Pillow==10.1.0
+pymdown-extensions==10.3
 python-gitlab==3.15.0
 python-gnupg==0.5.1
 sh==2.0.6
-shapely==2.0.1
+shapely==2.0.2
 tenacity==8.2.3
 zstandard==0.21.0
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,7 @@ from arkindex_worker.cache import (
    MODELS,
    SQL_VERSION,
    CachedElement,
+    CachedImage,
    CachedTranscription,
    Version,
    create_version_table,
@@ -210,6 +211,15 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
    return worker


+@pytest.fixture
+def mock_elements_worker_read_only(monkeypatch):
+    """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
+    monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
+    worker = ElementsWorker()
+    worker.configure()
+    return worker
+
+
 @pytest.fixture
 def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
    """
@@ -363,6 +373,18 @@ def mock_cached_elements():
    assert CachedElement.select().count() == 5


+@pytest.fixture
+def mock_cached_images():
+    """Insert few elements in local cache"""
+    CachedImage.create(
+        id=UUID("99999999-9999-9999-9999-999999999999"),
+        width=1250,
+        height=2500,
+        url="http://testserver/iiif/3/image",
+    )
+    assert CachedImage.select().count() == 1
+
+
 @pytest.fixture
 def mock_cached_transcriptions():
    """Insert few transcriptions in local cache, on a shared element"""

--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -10,7 +10,7 @@ import pytest

 from arkindex.mock import MockApiClient
 from arkindex_worker import logger
-from arkindex_worker.worker import BaseWorker
+from arkindex_worker.worker import BaseWorker, ElementsWorker
 from arkindex_worker.worker.base import ModelNotFoundError
 from tests.conftest import FIXTURES_DIR

@@ -739,3 +739,25 @@ def test_extract_parent_archives(tmp_path):
        )
        mode = "rb" if extracted_file.suffix == ".png" else "r"
        assert extracted_file.open(mode).read() == expected_file.open(mode).read()
+
+
+def test_corpus_id_not_set_read_only_mode(
+    mock_elements_worker_read_only: ElementsWorker,
+):
+    mock_elements_worker_read_only.configure()
+
+    with pytest.raises(
+        Exception, match="Missing ARKINDEX_CORPUS_ID environment variable"
+    ):
+        mock_elements_worker_read_only.corpus_id
+
+
+def test_corpus_id_set_read_only_mode(
+    monkeypatch, mock_elements_worker_read_only: ElementsWorker
+):
+    corpus_id = str(uuid.uuid4())
+    monkeypatch.setenv("ARKINDEX_CORPUS_ID", corpus_id)
+
+    mock_elements_worker_read_only.configure()
+
+    assert mock_elements_worker_read_only.corpus_id == corpus_id
No results found