Compare revisions

e4e53bdf · e4e53bdf · e4e53bdf · e4e53bdf · e4e53bdf · e4e53bdf
--- a/arkindex_worker/worker/transcription.py
+++ b/arkindex_worker/worker/transcription.py
-# -*- coding: utf-8 -*-
 """
 ElementsWorker methods for transcriptions.
 """

+from collections.abc import Iterable
 from enum import Enum
-from typing import Dict, Iterable, List, Optional, Union

 from peewee import IntegrityError

@@ -40,14 +39,14 @@ class TextOrientation(Enum):
    """


-class TranscriptionMixin(object):
+class TranscriptionMixin:
    def create_transcription(
        self,
-        element: Union[Element, CachedElement],
+        element: Element | CachedElement,
        text: str,
        confidence: float,
        orientation: TextOrientation = TextOrientation.HorizontalLeftToRight,
-    ) -> Optional[Dict[str, Union[str, float]]]:
+    ) -> dict[str, str | float] | None:
        """
        Create a transcription on the given element through the API.

@@ -59,7 +58,7 @@ class TranscriptionMixin(object):
           or None if the worker is in read-only mode.
        """
        assert element and isinstance(
-            element, (Element, CachedElement)
+            element, Element | CachedElement
        ), "element shouldn't be null and should be an Element or CachedElement"
        assert text and isinstance(
            text, str
@@ -111,8 +110,8 @@ class TranscriptionMixin(object):

    def create_transcriptions(
        self,
-        transcriptions: List[Dict[str, Union[str, float, Optional[TextOrientation]]]],
-    ) -> List[Dict[str, Union[str, float]]]:
+        transcriptions: list[dict[str, str | float | TextOrientation | None]],
+    ) -> list[dict[str, str | float]]:
        """
        Create multiple transcriptions at once on existing elements through the API,
        and creates [CachedTranscription][arkindex_worker.cache.CachedTranscription] instances if cache support is enabled.
@@ -140,13 +139,13 @@ class TranscriptionMixin(object):

        for index, transcription in enumerate(transcriptions_payload):
            element_id = transcription.get("element_id")
-            assert element_id and isinstance(
-                element_id, str
+            assert (
+                element_id and isinstance(element_id, str)
            ), f"Transcription at index {index} in transcriptions: element_id shouldn't be null and should be of type str"

            text = transcription.get("text")
-            assert text and isinstance(
-                text, str
+            assert (
+                text and isinstance(text, str)
            ), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"

            confidence = transcription.get("confidence")
@@ -159,8 +158,8 @@ class TranscriptionMixin(object):
            orientation = transcription.get(
                "orientation", TextOrientation.HorizontalLeftToRight
            )
-            assert orientation and isinstance(
-                orientation, TextOrientation
+            assert (
+                orientation and isinstance(orientation, TextOrientation)
            ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
            if orientation:
                transcription["orientation"] = orientation.value
@@ -203,10 +202,10 @@ class TranscriptionMixin(object):

    def create_element_transcriptions(
        self,
-        element: Union[Element, CachedElement],
+        element: Element | CachedElement,
        sub_element_type: str,
-        transcriptions: List[Dict[str, Union[str, float]]],
-    ) -> Dict[str, Union[str, bool]]:
+        transcriptions: list[dict[str, str | float]],
+    ) -> dict[str, str | bool]:
        """
        Create multiple elements and transcriptions at once on a single parent element through the API.

@@ -228,7 +227,7 @@ class TranscriptionMixin(object):
        :returns: A list of dicts as returned by the ``CreateElementTranscriptions`` API endpoint.
        """
        assert element and isinstance(
-            element, (Element, CachedElement)
+            element, Element | CachedElement
        ), "element shouldn't be null and should be an Element or CachedElement"
        assert sub_element_type and isinstance(
            sub_element_type, str
@@ -242,8 +241,8 @@ class TranscriptionMixin(object):

        for index, transcription in enumerate(transcriptions_payload):
            text = transcription.get("text")
-            assert text and isinstance(
-                text, str
+            assert (
+                text and isinstance(text, str)
            ), f"Transcription at index {index} in transcriptions: text shouldn't be null and should be of type str"

            confidence = transcription.get("confidence")
@@ -256,15 +255,15 @@ class TranscriptionMixin(object):
            orientation = transcription.get(
                "orientation", TextOrientation.HorizontalLeftToRight
            )
-            assert orientation and isinstance(
-                orientation, TextOrientation
+            assert (
+                orientation and isinstance(orientation, TextOrientation)
            ), f"Transcription at index {index} in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
            if orientation:
                transcription["orientation"] = orientation.value

            polygon = transcription.get("polygon")
-            assert polygon and isinstance(
-                polygon, list
+            assert (
+                polygon and isinstance(polygon, list)
            ), f"Transcription at index {index} in transcriptions: polygon shouldn't be null and should be of type list"
            assert (
                len(polygon) >= 3
@@ -273,12 +272,16 @@ class TranscriptionMixin(object):
                isinstance(point, list) and len(point) == 2 for point in polygon
            ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two items"
            assert all(
-                isinstance(coord, (int, float)) for point in polygon for coord in point
+                isinstance(coord, int | float) for point in polygon for coord in point
            ), f"Transcription at index {index} in transcriptions: polygon points should be lists of two numbers"

            element_confidence = transcription.get("element_confidence")
-            assert element_confidence is None or (
-                isinstance(element_confidence, float) and 0 <= element_confidence <= 1
+            assert (
+                element_confidence is None
+                or (
+                    isinstance(element_confidence, float)
+                    and 0 <= element_confidence <= 1
+                )
            ), f"Transcription at index {index} in transcriptions: element_confidence should be either null or a float in [0..1] range"

        if self.is_read_only:
@@ -359,11 +362,11 @@ class TranscriptionMixin(object):

    def list_transcriptions(
        self,
-        element: Union[Element, CachedElement],
-        element_type: Optional[str] = None,
-        recursive: Optional[bool] = None,
-        worker_version: Optional[Union[str, bool]] = None,
-    ) -> Union[Iterable[dict], Iterable[CachedTranscription]]:
+        element: Element | CachedElement,
+        element_type: str | None = None,
+        recursive: bool | None = None,
+        worker_version: str | bool | None = None,
+    ) -> Iterable[dict] | Iterable[CachedTranscription]:
        """
        List transcriptions on an element.

@@ -375,7 +378,7 @@ class TranscriptionMixin(object):
           or an iterable of CachedTranscription when cache support is enabled.
        """
        assert element and isinstance(
-            element, (Element, CachedElement)
+            element, Element | CachedElement
        ), "element shouldn't be null and should be an Element or CachedElement"
        query_params = {}
        if element_type:
@@ -386,7 +389,7 @@ class TranscriptionMixin(object):
            query_params["recursive"] = recursive
        if worker_version is not None:
            assert isinstance(
-                worker_version, (str, bool)
+                worker_version, str | bool
            ), "worker_version should be of type str or bool"
            if isinstance(worker_version, bool):
                assert (

--- a/arkindex_worker/worker/version.py
+++ b/arkindex_worker/worker/version.py
-# -*- coding: utf-8 -*-
 """
 ElementsWorker methods for worker versions.
 """


-class WorkerVersionMixin(object):
+class WorkerVersionMixin:
    def get_worker_version(self, worker_version_id: str) -> dict:
        """
        Retrieve a worker version, using the [ElementsWorker][arkindex_worker.worker.ElementsWorker]'s internal cache when possible.

--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -3,6 +3,6 @@
  "name": "Demo",
  "description": "Demo ML worker for Arkindex",
  "worker_type": "demo",
-  "author": "",
-  "email": ""
+  "author": "John Doe",
+  "email": "john.doe@company.com"
 }
--- a/demo.py
+++ b/demo.py
-# -*- coding: utf-8 -*-
 from arkindex_worker.worker import ElementsWorker



--- a/docs-requirements.txt
+++ b/docs-requirements.txt
-black==23.11.0
+black==23.12.0
 doc8==1.1.1
 mkdocs==1.5.3
 mkdocs-material==9.4.8

--- a/docs/contents/workers/ci/index.md
+++ b/docs/contents/workers/ci/index.md
@@ -24,7 +24,8 @@ At Teklia, we use a simple version of [Git Flow][gitflow]:
 - Developments should happen in branches, with merge requests to enable code
  review and Gitlab CI pipelines.
 - Project maintainers should use Git tags to create official releases, by
-  updating the `VERSION` file and using the same version string as the tag name.
+  updating the `project.version` key of the `pyproject.toml` file and using
+  the same version string as the tag name.

 This process is reflected the template's `.gitlab-ci.yml` file.


--- a/docs/contents/workers/create.md
+++ b/docs/contents/workers/create.md
@@ -131,9 +131,9 @@ to get a basic structure for your worker.
 Cookiecutter will ask you for several options:

 `slug`
-: A slug for the worker. This should use lowercase alphanumeric characters or
-  underscores to meet the code formatting requirements that the template
-  automatically enforces via [black].
+: A slug for the worker. This should use lowercase alphanumeric characters,
+  underscores or hyphens to meet the code formatting requirements that the
+  template automatically enforces via [black].

 `name`
 : A name for the worker, purely used for display purposes.
@@ -159,6 +159,16 @@ Cookiecutter will ask you for several options:
 `email`
 : Your e-mail address. This will be used to contact you if any administrative need arise

+Cookiecutter will also automatically normalize your worker's `slug` in new parameters:
+
+`__package`
+: The name of the Python package for your worker, generated by normalizing the `slug`
+  with characters' lowering and replacing underscores with hyphens.
+
+`__module`
+: The name of the Python module for your worker, generated by normalizing the `slug`
+  with characters' lowering and replacing hyphens with underscores.
+
 ### Pushing to GitLab

 This section guides you through pushing the newly created worker from your
@@ -169,7 +179,7 @@ This section assumes you have Maintainer or Owner access to the GitLab project.
 #### To push to GitLab

 1. Enter the newly created directory, starting in `worker-` and ending with your
-   worker's slug.
+   worker's `slug`.

 2. Add your GitLab project as a Git remote:


--- a/docs/contents/workers/run-local.md
+++ b/docs/contents/workers/run-local.md
@@ -115,6 +115,6 @@ in the browser's address bar when browsing an element on Arkindex.

 1. Activate the Python environment: run `workon X` where `X` is the name of
   your Python environment.
-2. Run `worker-X`, where `X` is the slug of your worker, followed by
+2. Run `worker-X`, where `X` is the `__package` name of your worker, followed by
   `--element=Y` where `Y` is the ID of an element. You can repeat `--element`
   as many times as you need to process multiple elements.
--- a/docs/contents/workers/template-structure.md
+++ b/docs/contents/workers/template-structure.md
@@ -53,8 +53,8 @@ package, a Docker build, with the best development practices:
 `setup.py`
 : Configures the worker's Python package.

-`VERSION`
-: Official version number of your worker. Defaults to `0.1.0`.
+`pyproject.toml`
+: Configures the worker's Python package.

 `ci/build.sh`
 : Script that gets run by [CI](ci/index.md) pipelines
@@ -68,10 +68,10 @@ package, a Docker build, with the best development practices:
 TODO: For more information, see [Writing tests for your worker](tests).
 -->

-`worker_[slug]/__init__.py`
+`worker_[__module]/__init__.py`
 : Declares the folder as a Python package.

-`worker_[slug]/worker.py`
+`worker_[__module]/worker.py`
 : The core part of the worker. This is where you can write code that processes
  Arkindex elements.


--- a/docs/contents/workers/user_configuration/model_config.png
+++ b/docs/contents/workers/user_configuration/model_config.png
--- a/docs/contents/workers/yaml.md
+++ b/docs/contents/workers/yaml.md
@@ -54,7 +54,7 @@ All attributes are optional unless explicitly specified.
 : Mandatory. Name of the worker, for display purposes.

 `slug`
-: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or dashes.
+: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or hyphens.

 `type`
 : Mandatory. Type of the worker, for display purposes only. Some common values
@@ -80,7 +80,16 @@ include:
    : This worker does not support GPUs. It may run on a host that has a GPU, but it will ignore it.

 `model_usage`
-: Boolean. Whether or not this worker requires a model version to run. Defaults to `false`.
+: Whether or not this worker requires a model version to run. Defaults to `disabled`. May take one of the following values:
+
+    `required`
+    : This worker requires a model version, and will only be run on processes with a model.
+
+    `supported`
+    : This worker supports a model version, but may run on any processes, including those without model.
+
+    `disabled`
+    : This worker does not support model version. It may run on a process that has a model, but it will ignore it.

 `docker`
 : Regroups Docker-related configuration attributes:
@@ -137,6 +146,7 @@ A parameter is defined using the following settings:
    - `enum`
    - `list`
    - `dict`
+    - `model`

 `default`
 : Optional. A default value for the parameter. Must be of the defined parameter `type`.
@@ -272,7 +282,7 @@ Which will result in the following display for the user:

 #### Dictionary parameters

-Dictionary-type parameters must be defined using a `title`, the `dict` `type`. You can also set a `default` value for this parameter, which must be one a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.
+Dictionary-type parameters must be defined using a `title` and the `dict` `type`. You can also set a `default` value for this parameter, which must be a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.

 Dictionary-type parameters only accept strings as values.

@@ -293,6 +303,26 @@ Which will result in the following display for the user:

 ![Dictionary-type parameter](user_configuration/dict_config.png "Example dictionary-type parameter.")

+#### Model parameters
+
+Model-type parameters must be defined using a `title` and the `model` type. You can also set a `default` value for this parameter, which must be the UUID of an existing Model, and make it a `required` parameter, which prevents users from leaving it blank. You can use a model parameter to specify to which Model the Model Version that is created by a Training process will be attached.
+
+Model-type parameters only accept Model UUIDs as values.
+
+In the configuration form, model parameters are displayed as an input field. Users can select a model from a list of available Models: what they type into the input field filters that list, allowing them to search for a model using its name or UUID.
+
+For example, a model-type parameter can be defined like this:
+
+```yaml
+model_param:
+  title: Training Model
+  type: model
+```
+
+Which will result in the following display for the user:
+
+![Model-type parameter](user_configuration/model_config.png "Example model-type parameter.")
+
 #### Example user_configuration

 ```yaml
@@ -318,6 +348,9 @@ user_configuration:
      - 23
      - 56
    title: Another Parameter
+  a_model_parameter:
+    type: model
+    title: Model to train
 ```

 #### Fallback to free JSON input

--- a/docs/ref/git.md
+++ b/docs/ref/git.md
-# Git & Gitlab support
-
-::: arkindex_worker.git
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -115,7 +115,7 @@ Released on **8 November 2022** &bull; View on [Gitlab](https://gitlab.teklia.co
    - A new version of the cache was released with the updated Django models.
 - Improvements to our Machine Learning training API to allow workers to use models published on Arkindex.
 - Support workers that have no configuration.
- - Allow publishing metadatas with falsy but non-null values.
+ - Allow publishing metadata with falsy but non-null values.
 - Add `.polygon` attribute shortcut on `Element`.
 - Add a major test speedup on our worker template.
 - Support cache usage on our metadata API endpoint helpers.

--- a/hooks/pre-commit
+++ b/hooks/pre-commit
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+#!/usr/bin/env bash
 # File generated by pre-commit: https://pre-commit.com
 # ID: 138fd403232d2ddd5efb44317e38bf03
-import os
-import sys
-
-# we try our best, but the shebang of this script is difficult to determine:
-# - macos doesn't ship with python3
-# - windows executables are almost always `python.exe`
-# therefore we continue to support python2 for this small script
-if sys.version_info < (3, 3):
-    from distutils.spawn import find_executable as which
-else:
-    from shutil import which
-
-# work around https://github.com/Homebrew/homebrew-core/issues/30445
-os.environ.pop("__PYVENV_LAUNCHER__", None)

 # start templated
-INSTALL_PYTHON = "/usr/bin/python3"
-ARGS = [
-    "hook-impl",
-    "--config=.pre-commit-config.yaml",
-    "--hook-type=pre-commit",
-    "--skip-on-missing-config",
-]
+INSTALL_PYTHON=/usr/bin/python3
+ARGS=(hook-impl --config=.pre-commit-config.yaml --hook-type=pre-commit --skip-on-missing-config)
 # end templated
-ARGS.extend(("--hook-dir", os.path.realpath(os.path.dirname(__file__))))
-ARGS.append("--")
-ARGS.extend(sys.argv[1:])
-
-DONE = "`pre-commit` not found.  Did you forget to activate your virtualenv?"
-if os.access(INSTALL_PYTHON, os.X_OK):
-    CMD = [INSTALL_PYTHON, "-mpre_commit"]
-elif which("pre-commit"):
-    CMD = ["pre-commit"]
-else:
-    raise SystemExit(DONE)

-CMD.extend(ARGS)
-if sys.platform == "win32":  # https://bugs.python.org/issue19124
-    import subprocess
+HERE="$(cd "$(dirname "$0")" && pwd)"
+ARGS+=(--hook-dir "$HERE" -- "$@")

-    if sys.version_info < (3, 7):  # https://bugs.python.org/issue25942
-        raise SystemExit(subprocess.Popen(CMD).wait())
-    else:
-        raise SystemExit(subprocess.call(CMD))
-else:
-    os.execvp(CMD[0], CMD)
+if [ -x "$INSTALL_PYTHON" ]; then
+    exec "$INSTALL_PYTHON" -mpre_commit "${ARGS[@]}"
+elif command -v pre-commit > /dev/null; then
+    exec pre-commit "${ARGS[@]}"
+else
+    echo '`pre-commit` not found.  Did you forget to activate your virtualenv?' 1>&2
+    exit 1
+fi
--- a/hooks/pre_gen_project.py
+++ b/hooks/pre_gen_project.py
+# Normalize the slug to generate __package and __module private variables
+{{cookiecutter.update({"__package": cookiecutter.slug.lower().replace("_", "-")})}}  # noqa: F821
+{{cookiecutter.update({"__module": cookiecutter.slug.lower().replace("-", "_")})}}  # noqa: F821
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -36,7 +36,6 @@ plugins:
 - search
 - autorefs
 - mkdocstrings:
-    custom_templates: templates
    handlers:
      python:
        import: # enable auto refs to the doc
@@ -90,7 +89,6 @@ nav:
          - Transcription: ref/api/transcription.md
          - WorkerVersion: ref/api/worker_version.md
      - Models: ref/models.md
-      - Git & Gitlab support: ref/git.md
      - Image utilities: ref/image.md
      - Cache: ref/cache.md
      - Utils: ref/utils.md

--- a/pyproject.toml
+++ b/pyproject.toml
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "arkindex-base-worker"
+version = "0.3.6-rc5"
+description = "Base Worker to easily build Arkindex ML workflows"
+license = { file = "LICENSE" }
+dynamic = ["dependencies", "optional-dependencies"]
+authors = [
+    { name = "Teklia", email = "contact@teklia.com" },
+]
+maintainers = [
+    { name = "Teklia", email = "contact@teklia.com" },
+]
+requires-python = ">=3.10"
+readme = { file = "README.md", content-type = "text/markdown" }
+keywords = ["python"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "License :: OSI Approved :: MIT License",
+    # Specify the Python versions you support here.
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    # Topics
+    "Topic :: Text Processing :: Linguistic",
+]
+
+[project.urls]
+Homepage = "https://workers.arkindex.org"
+Documentation = "https://workers.arkindex.org"
+Repository = "https://gitlab.teklia.com/workers/base-worker"
+"Bug Tracker" = "https://gitlab.teklia.com/workers/base-worker/issues"
+Authors = "https://teklia.com"
+
+[tool.setuptools.dynamic]
+dependencies = { file = ["requirements.txt"] }
+optional-dependencies = { docs = { file = ["docs-requirements.txt"] } }
+
 [tool.ruff]
 exclude = [".git", "__pycache__"]
 ignore = ["E501"]
-select = ["E", "F", "T1", "W", "I"]
+select = [
+    # pycodestyle
+    "E",
+    "W",
+    # Pyflakes
+    "F",
+    # Flake8 Debugger
+    "T1",
+    # Isort
+    "I",
+    # Implicit Optional
+    "RUF013",
+    # Invalid pyproject.toml
+    "RUF200",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # flake8-pytest-style
+    "PT",
+    # flake8-use-pathlib
+    "PTH",
+]
+
+[tool.ruff.per-file-ignores]
+# Ignore `pytest-composite-assertion` rules of `flake8-pytest-style` linter for non-test files
+"arkindex_worker/**/*.py" = ["PT018"]

 [tool.ruff.isort]
 known-first-party = ["arkindex", "arkindex_common", "arkindex_worker"]
 known-third-party = [
    "PIL",
    "apistar",
-    "gitlab",
    "gnupg",
    "peewee",
    "playhouse",
@@ -16,7 +84,6 @@ known-third-party = [
    "requests",
    "responses",
    "setuptools",
-    "sh",
    "shapely",
    "tenacity",
    "yaml",

--- a/requirements.txt
+++ b/requirements.txt
 arkindex-client==1.0.14
 peewee==3.17.0
 Pillow==10.1.0
-pymdown-extensions==10.3.1
-python-gitlab==4.1.1
-python-gnupg==0.5.1
-sh==2.0.6
+pymdown-extensions==10.5
+python-gnupg==0.5.2
 shapely==2.0.2
 tenacity==8.2.3
 zstandard==0.22.0
--- a/setup.py
+++ b/setup.py
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from pathlib import Path
-
 from setuptools import find_packages, setup

-
-def requirements(path: Path):
-    assert path.exists(), "Missing requirements {}".format(path)
-    with path.open() as f:
-        return list(map(str.strip, f.read().splitlines()))
-
-
-with open("VERSION") as f:
-    VERSION = f.read()
-
-setup(
-    name="arkindex-base-worker",
-    version=VERSION,
-    description="Base Worker to easily build Arkindex ML workflows",
-    author="Teklia",
-    author_email="contact@teklia.com",
-    url="https://teklia.com",
-    python_requires=">=3.7",
-    install_requires=requirements(Path("requirements.txt")),
-    extras_require={"docs": requirements(Path("docs-requirements.txt"))},
-    packages=find_packages(),
-)
+setup(packages=find_packages())
--- a/tests/conftest.py
+++ b/tests/conftest.py
-# -*- coding: utf-8 -*-
 import hashlib
 import json
 import os
@@ -19,10 +18,10 @@ from arkindex_worker.cache import (
    CachedImage,
    CachedTranscription,
    Version,
+    create_tables,
    create_version_table,
    init_cache_db,
 )
-from arkindex_worker.git import GitHelper, GitlabHelper
 from arkindex_worker.models import Artifact, Dataset
 from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
 from arkindex_worker.worker.dataset import DatasetState
@@ -37,7 +36,7 @@ __yaml_cache = {}


 @pytest.fixture(autouse=True)
-def disable_sleep(monkeypatch):
+def _disable_sleep(monkeypatch):
    """
    Do not sleep at all in between API executions
    when errors occur in unit tests.
@@ -46,8 +45,8 @@ def disable_sleep(monkeypatch):
    monkeypatch.setattr(time, "sleep", lambda x: None)


-@pytest.fixture
-def cache_yaml(monkeypatch):
+@pytest.fixture()
+def _cache_yaml(monkeypatch):
    """
    Cache all calls to yaml.safe_load in order to speedup
    every test cases that load the OpenAPI schema
@@ -75,7 +74,7 @@ def cache_yaml(monkeypatch):


 @pytest.fixture(autouse=True)
-def setup_api(responses, monkeypatch, cache_yaml):
+def _setup_api(responses, monkeypatch, _cache_yaml):
    # Always use the environment variable first
    schema_url = os.environ.get("ARKINDEX_API_SCHEMA_URL")
    if schema_url is None:
@@ -106,13 +105,13 @@ def setup_api(responses, monkeypatch, cache_yaml):


 @pytest.fixture(autouse=True)
-def give_env_variable(request, monkeypatch):
+def _give_env_variable(monkeypatch):
    """Defines required environment variables"""
    monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")


-@pytest.fixture
-def mock_worker_run_api(responses):
+@pytest.fixture()
+def _mock_worker_run_api(responses):
    """Provide a mock API response to get worker run information"""
    payload = {
        "id": "56785678-5678-5678-5678-567856785678",
@@ -140,7 +139,7 @@ def mock_worker_run_api(responses):
            "docker_image_name": None,
            "state": "created",
            "gpu_usage": "disabled",
-            "model_usage": False,
+            "model_usage": "disabled",
            "worker": {
                "id": "deadbeef-1234-5678-1234-worker",
                "name": "Fake worker",
@@ -180,8 +179,8 @@ def mock_worker_run_api(responses):
    )


-@pytest.fixture
-def mock_worker_run_no_revision_api(responses):
+@pytest.fixture()
+def _mock_worker_run_no_revision_api(responses):
    """Provide a mock API response to get worker run not linked to a revision information"""
    payload = {
        "id": "56785678-5678-5678-5678-567856785678",
@@ -207,7 +206,7 @@ def mock_worker_run_no_revision_api(responses):
            "docker_image_name": None,
            "state": "created",
            "gpu_usage": "disabled",
-            "model_usage": False,
+            "model_usage": "disabled",
            "worker": {
                "id": "deadbeef-1234-5678-1234-worker",
                "name": "Fake worker",
@@ -247,8 +246,8 @@ def mock_worker_run_no_revision_api(responses):
    )


-@pytest.fixture
-def mock_activity_calls(responses):
+@pytest.fixture()
+def _mock_activity_calls(responses):
    """
    Mock responses when updating the activity state for multiple element of the same version
    """
@@ -259,8 +258,8 @@ def mock_activity_calls(responses):
    )


-@pytest.fixture
-def mock_elements_worker(monkeypatch, mock_worker_run_api):
+@pytest.fixture()
+def mock_elements_worker(monkeypatch, _mock_worker_run_api):
    """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
    monkeypatch.setattr(sys, "argv", ["worker"])
    worker = ElementsWorker()
@@ -268,7 +267,7 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
    return worker


-@pytest.fixture
+@pytest.fixture()
 def mock_elements_worker_read_only(monkeypatch):
    """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
    monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
@@ -277,7 +276,7 @@ def mock_elements_worker_read_only(monkeypatch):
    return worker


-@pytest.fixture
+@pytest.fixture()
 def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
    """
    Mock a worker instance to list and retrieve a single element
@@ -298,8 +297,19 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
    return mock_elements_worker


-@pytest.fixture
-def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
+@pytest.fixture()
+def mock_cache_db(tmp_path):
+    cache_path = tmp_path / "db.sqlite"
+
+    init_cache_db(cache_path)
+    create_version_table()
+    create_tables()
+
+    return cache_path
+
+
+@pytest.fixture()
+def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
    """Build a BaseWorker using SQLite cache, also mocking a PONOS_TASK"""
    monkeypatch.setattr(sys, "argv", ["worker"])

@@ -309,13 +319,10 @@ def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
    return worker


-@pytest.fixture
-def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
+@pytest.fixture()
+def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run_api):
    """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
-    cache_path = tmp_path / "db.sqlite"
-    init_cache_db(cache_path)
-    create_version_table()
-    monkeypatch.setattr(sys, "argv", ["worker", "-d", str(cache_path)])
+    monkeypatch.setattr(sys, "argv", ["worker", "-d", str(mock_cache_db)])

    worker = ElementsWorker(support_cache=True)
    worker.configure()
@@ -323,35 +330,34 @@ def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
    return worker


-@pytest.fixture
+@pytest.fixture()
 def fake_page_element():
-    with open(FIXTURES_DIR / "page_element.json", "r") as f:
-        return json.load(f)
+    return json.loads((FIXTURES_DIR / "page_element.json").read_text())


-@pytest.fixture
+@pytest.fixture()
 def fake_ufcn_worker_version():
-    with open(FIXTURES_DIR / "ufcn_line_historical_worker_version.json", "r") as f:
-        return json.load(f)
+    return json.loads(
+        (FIXTURES_DIR / "ufcn_line_historical_worker_version.json").read_text()
+    )


-@pytest.fixture
+@pytest.fixture()
 def fake_transcriptions_small():
-    with open(FIXTURES_DIR / "line_transcriptions_small.json", "r") as f:
-        return json.load(f)
+    return json.loads((FIXTURES_DIR / "line_transcriptions_small.json").read_text())


-@pytest.fixture
+@pytest.fixture()
 def model_file_dir():
    return SAMPLES_DIR / "model_files"


-@pytest.fixture
+@pytest.fixture()
 def model_file_dir_with_subfolder():
    return SAMPLES_DIR / "root_folder"


-@pytest.fixture
+@pytest.fixture()
 def fake_dummy_worker():
    api_client = MockApiClient()
    worker = ElementsWorker()
@@ -359,34 +365,8 @@ def fake_dummy_worker():
    return worker


-@pytest.fixture
-def fake_git_helper(mocker):
-    gitlab_helper = mocker.MagicMock()
-    return GitHelper(
-        "repo_url",
-        "/tmp/git_test/foo/",
-        "/tmp/test/path/",
-        "tmp_workflow_id",
-        gitlab_helper,
-    )
-
-
-@pytest.fixture
-def fake_gitlab_helper_factory():
-    # have to set up the responses, before creating the client
-    def run():
-        return GitlabHelper(
-            "balsac_exporter/balsac-exported-xmls-testing",
-            "https://gitlab.com",
-            "<GITLAB_TOKEN>",
-            "gitlab_branch",
-        )
-
-    return run
-
-
-@pytest.fixture
-def mock_cached_elements():
+@pytest.fixture()
+def _mock_cached_elements(mock_cache_db):
    """Insert few elements in local cache"""
    CachedElement.create(
        id=UUID("99999999-9999-9999-9999-999999999999"),
@@ -430,8 +410,8 @@ def mock_cached_elements():
    assert CachedElement.select().count() == 5


-@pytest.fixture
-def mock_cached_images():
+@pytest.fixture()
+def _mock_cached_images(mock_cache_db):
    """Insert few elements in local cache"""
    CachedImage.create(
        id=UUID("99999999-9999-9999-9999-999999999999"),
@@ -442,8 +422,8 @@ def mock_cached_images():
    assert CachedImage.select().count() == 1


-@pytest.fixture
-def mock_cached_transcriptions():
+@pytest.fixture()
+def _mock_cached_transcriptions(mock_cache_db):
    """Insert few transcriptions in local cache, on a shared element"""
    CachedElement.create(
        id=UUID("11111111-1111-1111-1111-111111111111"),
@@ -529,7 +509,7 @@ def mock_cached_transcriptions():
    )


-@pytest.fixture(scope="function")
+@pytest.fixture()
 def mock_databases(tmp_path):
    """
    Initialize several temporary databases
@@ -612,7 +592,7 @@ def mock_databases(tmp_path):
    return out


-@pytest.fixture
+@pytest.fixture()
 def default_dataset():
    return Dataset(
        **{
@@ -630,8 +610,8 @@ def default_dataset():
    )


-@pytest.fixture
-def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
+@pytest.fixture()
+def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
    monkeypatch.setenv("PONOS_TASK", "my_task")
    mocker.patch.object(sys, "argv", ["worker"])

@@ -644,7 +624,7 @@ def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
    return dataset_worker


-@pytest.fixture
+@pytest.fixture()
 def mock_dev_dataset_worker(mocker):
    mocker.patch.object(
        sys,
@@ -668,7 +648,7 @@ def mock_dev_dataset_worker(mocker):
    return dataset_worker


-@pytest.fixture
+@pytest.fixture()
 def default_artifact():
    return Artifact(
        **{
No results found