Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (11)
Showing
with 291 additions and 58 deletions
MIT License
Copyright (c) 2023 Teklia
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
......@@ -2,6 +2,12 @@
An easy to use Python 3 high level API client, to build ML tasks.
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
## Documentation
The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
## Create a new worker using our template
```
......
......@@ -86,8 +86,9 @@ class ElementMixin:
element: Element,
type: str,
name: str,
polygon: list[list[int | float]],
polygon: list[list[int | float]] | None = None,
confidence: float | None = None,
image: str | None = None,
slim_output: bool = True,
) -> str:
"""
......@@ -96,8 +97,10 @@ class ElementMixin:
:param Element element: The parent element.
:param type: Slug of the element type for this child element.
:param name: Name of the child element.
:param polygon: Polygon of the child element.
:param polygon: Optional polygon of the child element.
:param confidence: Optional confidence score, between 0.0 and 1.0.
:param image: Optional image ID of the child element.
:param slim_output: Whether to return the child ID or the full child.
:returns: UUID of the created element.
"""
assert element and isinstance(
......@@ -109,19 +112,29 @@ class ElementMixin:
assert name and isinstance(
name, str
), "name shouldn't be null and should be of type str"
assert polygon and isinstance(
assert polygon is None or isinstance(
polygon, list
), "polygon shouldn't be null and should be of type list"
assert len(polygon) >= 3, "polygon should have at least three points"
assert all(
isinstance(point, list) and len(point) == 2 for point in polygon
), "polygon points should be lists of two items"
assert all(
isinstance(coord, int | float) for point in polygon for coord in point
), "polygon points should be lists of two numbers"
), "polygon should be None or a list"
if polygon is not None:
assert len(polygon) >= 3, "polygon should have at least three points"
assert all(
isinstance(point, list) and len(point) == 2 for point in polygon
), "polygon points should be lists of two items"
assert all(
isinstance(coord, int | float) for point in polygon for coord in point
), "polygon points should be lists of two numbers"
assert confidence is None or (
isinstance(confidence, float) and 0 <= confidence <= 1
), "confidence should be None or a float in [0..1] range"
assert image is None or isinstance(image, str), "image should be None or string"
if image is not None:
# Make sure it's a valid UUID
try:
UUID(image)
except ValueError as e:
raise ValueError("image is not a valid uuid.") from e
if polygon and image is None:
assert element.zone, "An image or a parent with an image is required to create an element with a polygon."
assert isinstance(slim_output, bool), "slim_output should be of type bool"
if self.is_read_only:
......@@ -133,7 +146,7 @@ class ElementMixin:
body={
"type": type,
"name": name,
"image": element.zone.image.id,
"image": image,
"corpus": element.corpus.id,
"polygon": polygon,
"parent": element.id,
......@@ -270,6 +283,35 @@ class ElementMixin:
return created_ids
def create_element_parent(
self,
parent: Element,
child: Element,
) -> dict[str, str]:
"""
Link an element to a parent through the API.
:param parent: Parent element.
:param child: Child element.
:returns: A dict from the ``CreateElementParent`` API endpoint.
"""
assert parent and isinstance(
parent, Element
), "parent shouldn't be null and should be of type Element"
assert child and isinstance(
child, Element
), "child shouldn't be null and should be of type Element"
if self.is_read_only:
logger.warning("Cannot link elements as this worker is in read-only mode")
return
return self.request(
"CreateElementParent",
parent=parent.id,
child=child.id,
)
def partial_update_element(
self, element: Element | CachedElement, **kwargs
) -> dict:
......@@ -288,7 +330,7 @@ class ElementMixin:
* *image* (``UUID``): Optional ID of the image of this element
:returns: A dict from the ``PartialUpdateElement`` API endpoint,
:returns: A dict from the ``PartialUpdateElement`` API endpoint.
"""
assert element and isinstance(
element, Element | CachedElement
......
......@@ -331,8 +331,7 @@ class EntityMixin:
parent: Element | None = None,
):
"""
List all entities in the worker's corpus
This method does not support cache
List all entities in the worker's corpus and store them in the ``self.entities`` cache.
:param name: Filter entities by part of their name (case-insensitive)
:param parent: Restrict entities to those linked to all transcriptions of an element and all its descendants. Note that links to metadata are ignored.
"""
......@@ -346,8 +345,14 @@ class EntityMixin:
assert isinstance(parent, Element), "parent should be of type Element"
query_params["parent"] = parent.id
return self.api_client.paginate(
"ListCorpusEntities", id=self.corpus_id, **query_params
self.entities = {
entity["id"]: entity
for entity in self.api_client.paginate(
"ListCorpusEntities", id=self.corpus_id, **query_params
)
}
logger.info(
f"Loaded {len(self.entities)} entities in corpus ({self.corpus_id})"
)
def list_corpus_entity_types(
......
......@@ -131,9 +131,9 @@ to get a basic structure for your worker.
Cookiecutter will ask you for several options:
`slug`
: A slug for the worker. This should use lowercase alphanumeric characters or
underscores to meet the code formatting requirements that the template
automatically enforces via [black].
: A slug for the worker. This should use lowercase alphanumeric characters,
underscores or hyphens to meet the code formatting requirements that the
template automatically enforces via [black].
`name`
: A name for the worker, purely used for display purposes.
......@@ -159,6 +159,16 @@ Cookiecutter will ask you for several options:
`email`
: Your e-mail address. This will be used to contact you if any administrative need arise
Cookiecutter will also automatically normalize your worker's `slug` in new parameters:
`__package`
: The name of the Python package for your worker, generated by normalizing the `slug`
with characters' lowering and replacing underscores with hyphens.
`__module`
: The name of the Python module for your worker, generated by normalizing the `slug`
with characters' lowering and replacing hyphens with underscores.
### Pushing to GitLab
This section guides you through pushing the newly created worker from your
......@@ -169,7 +179,7 @@ This section assumes you have Maintainer or Owner access to the GitLab project.
#### To push to GitLab
1. Enter the newly created directory, starting in `worker-` and ending with your
worker's slug.
worker's `slug`.
2. Add your GitLab project as a Git remote:
......
......@@ -115,6 +115,6 @@ in the browser's address bar when browsing an element on Arkindex.
1. Activate the Python environment: run `workon X` where `X` is the name of
your Python environment.
2. Run `worker-X`, where `X` is the slug of your worker, followed by
2. Run `worker-X`, where `X` is the `__package` name of your worker, followed by
`--element=Y` where `Y` is the ID of an element. You can repeat `--element`
as many times as you need to process multiple elements.
......@@ -68,10 +68,10 @@ package, a Docker build, with the best development practices:
TODO: For more information, see [Writing tests for your worker](tests).
-->
`worker_[slug]/__init__.py`
`worker_[__module]/__init__.py`
: Declares the folder as a Python package.
`worker_[slug]/worker.py`
`worker_[__module]/worker.py`
: The core part of the worker. This is where you can write code that processes
Arkindex elements.
......
docs/contents/workers/user_configuration/model_config.png

50.4 KiB

......@@ -54,7 +54,7 @@ All attributes are optional unless explicitly specified.
: Mandatory. Name of the worker, for display purposes.
`slug`
: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or dashes.
: Mandatory. Slug of this worker. The slug must be unique across the repository and must only hold alphanumerical characters, underscores or hyphens.
`type`
: Mandatory. Type of the worker, for display purposes only. Some common values
......@@ -80,7 +80,16 @@ include:
: This worker does not support GPUs. It may run on a host that has a GPU, but it will ignore it.
`model_usage`
: Boolean. Whether or not this worker requires a model version to run. Defaults to `false`.
: Whether or not this worker requires a model version to run. Defaults to `disabled`. May take one of the following values:
`required`
: This worker requires a model version, and will only be run on processes with a model.
`supported`
: This worker supports a model version, but may run on any processes, including those without model.
`disabled`
: This worker does not support model version. It may run on a process that has a model, but it will ignore it.
`docker`
: Regroups Docker-related configuration attributes:
......@@ -137,6 +146,7 @@ A parameter is defined using the following settings:
- `enum`
- `list`
- `dict`
- `model`
`default`
: Optional. A default value for the parameter. Must be of the defined parameter `type`.
......@@ -272,7 +282,7 @@ Which will result in the following display for the user:
#### Dictionary parameters
Dictionary-type parameters must be defined using a `title`, the `dict` `type`. You can also set a `default` value for this parameter, which must be one a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.
Dictionary-type parameters must be defined using a `title` and the `dict` `type`. You can also set a `default` value for this parameter, which must be a dictionary, as well as make it a `required` parameter, which prevents users from leaving it blank. You can use dictionary parameters for example to specify a correspondence between the classes that are predicted by a worker and the elements that are created on Arkindex from these predictions.
Dictionary-type parameters only accept strings as values.
......@@ -293,6 +303,26 @@ Which will result in the following display for the user:
![Dictionary-type parameter](user_configuration/dict_config.png "Example dictionary-type parameter.")
#### Model parameters
Model-type parameters must be defined using a `title` and the `model` type. You can also set a `default` value for this parameter, which must be the UUID of an existing Model, and make it a `required` parameter, which prevents users from leaving it blank. You can use a model parameter to specify to which Model the Model Version that is created by a Training process will be attached.
Model-type parameters only accept Model UUIDs as values.
In the configuration form, model parameters are displayed as an input field. Users can select a model from a list of available Models: what they type into the input field filters that list, allowing them to search for a model using its name or UUID.
For example, a model-type parameter can be defined like this:
```yaml
model_param:
title: Training Model
type: model
```
Which will result in the following display for the user:
![Model-type parameter](user_configuration/model_config.png "Example model-type parameter.")
#### Example user_configuration
```yaml
......@@ -318,6 +348,9 @@ user_configuration:
- 23
- 56
title: Another Parameter
a_model_parameter:
type: model
title: Model to train
```
#### Fallback to free JSON input
......
# Normalize the slug to generate __package and __module private variables
{{cookiecutter.update({"__package": cookiecutter.slug.lower().replace("_", "-")})}} # noqa: F821
{{cookiecutter.update({"__module": cookiecutter.slug.lower().replace("-", "_")})}} # noqa: F821
......@@ -36,7 +36,6 @@ plugins:
- search
- autorefs
- mkdocstrings:
custom_templates: templates
handlers:
python:
import: # enable auto refs to the doc
......
......@@ -4,8 +4,9 @@ build-backend = "setuptools.build_meta"
[project]
name = "arkindex-base-worker"
version = "0.3.6-rc3"
version = "0.3.6-rc5"
description = "Base Worker to easily build Arkindex ML workflows"
license = { file = "LICENSE" }
dynamic = ["dependencies", "optional-dependencies"]
authors = [
{ name = "Teklia", email = "contact@teklia.com" },
......
......@@ -139,7 +139,7 @@ def _mock_worker_run_api(responses):
"docker_image_name": None,
"state": "created",
"gpu_usage": "disabled",
"model_usage": False,
"model_usage": "disabled",
"worker": {
"id": "deadbeef-1234-5678-1234-worker",
"name": "Fake worker",
......@@ -206,7 +206,7 @@ def _mock_worker_run_no_revision_api(responses):
"docker_image_name": None,
"state": "created",
"gpu_usage": "disabled",
"model_usage": False,
"model_usage": "disabled",
"worker": {
"id": "deadbeef-1234-5678-1234-worker",
"name": "Fake worker",
......
......@@ -428,19 +428,7 @@ def test_create_sub_element_wrong_name(mock_elements_worker):
def test_create_sub_element_wrong_polygon(mock_elements_worker):
elt = Element({"zone": None})
with pytest.raises(
AssertionError, match="polygon shouldn't be null and should be of type list"
):
mock_elements_worker.create_sub_element(
element=elt,
type="something",
name="0",
polygon=None,
)
with pytest.raises(
AssertionError, match="polygon shouldn't be null and should be of type list"
):
with pytest.raises(AssertionError, match="polygon should be None or a list"):
mock_elements_worker.create_sub_element(
element=elt,
type="something",
......@@ -504,6 +492,42 @@ def test_create_sub_element_wrong_confidence(mock_elements_worker, confidence):
)
@pytest.mark.parametrize(
("image", "error_type", "error_message"),
[
(1, AssertionError, "image should be None or string"),
("not a uuid", ValueError, "image is not a valid uuid."),
],
)
def test_create_sub_element_wrong_image(
mock_elements_worker, image, error_type, error_message
):
with pytest.raises(error_type, match=re.escape(error_message)):
mock_elements_worker.create_sub_element(
element=Element({"zone": None}),
type="something",
name="blah",
polygon=[[0, 0], [0, 10], [10, 10], [10, 0], [0, 0]],
image=image,
)
def test_create_sub_element_wrong_image_and_polygon(mock_elements_worker):
with pytest.raises(
AssertionError,
match=re.escape(
"An image or a parent with an image is required to create an element with a polygon."
),
):
mock_elements_worker.create_sub_element(
element=Element({"zone": None}),
type="something",
name="blah",
polygon=[[0, 0], [0, 10], [10, 10], [10, 0], [0, 0]],
image=None,
)
def test_create_sub_element_api_error(responses, mock_elements_worker):
elt = Element(
{
......@@ -580,7 +604,7 @@ def test_create_sub_element(responses, mock_elements_worker, slim_output):
assert json.loads(responses.calls[-1].request.body) == {
"type": "something",
"name": "0",
"image": "22222222-2222-2222-2222-222222222222",
"image": None,
"corpus": "11111111-1111-1111-1111-111111111111",
"polygon": [[1, 1], [2, 2], [2, 1], [1, 2]],
"parent": "12341234-1234-1234-1234-123412341234",
......@@ -625,7 +649,7 @@ def test_create_sub_element_confidence(responses, mock_elements_worker):
assert json.loads(responses.calls[-1].request.body) == {
"type": "something",
"name": "0",
"image": "22222222-2222-2222-2222-222222222222",
"image": None,
"corpus": "11111111-1111-1111-1111-111111111111",
"polygon": [[1, 1], [2, 2], [2, 1], [1, 2]],
"parent": "12341234-1234-1234-1234-123412341234",
......@@ -1217,6 +1241,94 @@ def test_create_elements_integrity_error(
assert list(CachedElement.select()) == []
@pytest.mark.parametrize(
("params", "error_message"),
[
(
{"parent": None, "child": None},
"parent shouldn't be null and should be of type Element",
),
(
{"parent": "not an element", "child": None},
"parent shouldn't be null and should be of type Element",
),
(
{"parent": Element(zone=None), "child": None},
"child shouldn't be null and should be of type Element",
),
(
{"parent": Element(zone=None), "child": "not an element"},
"child shouldn't be null and should be of type Element",
),
],
)
def test_create_element_parent_invalid_params(
mock_elements_worker, params, error_message
):
with pytest.raises(AssertionError, match=re.escape(error_message)):
mock_elements_worker.create_element_parent(**params)
def test_create_element_parent_api_error(responses, mock_elements_worker):
parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
child = Element({"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"})
responses.add(
responses.POST,
"http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
status=500,
)
with pytest.raises(ErrorResponse):
mock_elements_worker.create_element_parent(
parent=parent,
child=child,
)
assert len(responses.calls) == len(BASE_API_CALLS) + 5
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
# We retry 5 times the API call
(
"POST",
"http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
),
] * 5
def test_create_element_parent(responses, mock_elements_worker):
parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
child = Element({"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"})
responses.add(
responses.POST,
"http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
status=200,
json={
"parent": "12341234-1234-1234-1234-123412341234",
"child": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
},
)
created_element_parent = mock_elements_worker.create_element_parent(
parent=parent,
child=child,
)
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
(
"POST",
"http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
),
]
assert created_element_parent == {
"parent": "12341234-1234-1234-1234-123412341234",
"child": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
}
@pytest.mark.parametrize(
("payload", "error"),
[
......
......@@ -741,12 +741,13 @@ def test_list_corpus_entities(responses, mock_elements_worker):
},
)
# list is required to actually do the request
assert list(mock_elements_worker.list_corpus_entities()) == [
{
mock_elements_worker.list_corpus_entities()
assert mock_elements_worker.entities == {
"fake_entity_id": {
"id": "fake_entity_id",
}
]
}
assert len(responses.calls) == len(BASE_API_CALLS) + 1
assert [
......
......@@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=non-interactive
RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends curl
# Install worker as a package
COPY worker_{{cookiecutter.slug}} worker_{{cookiecutter.slug}}
COPY worker_{{cookiecutter.__module}} worker_{{cookiecutter.__module}}
COPY requirements.txt setup.py pyproject.toml ./
RUN pip install . --no-cache-dir
......@@ -15,4 +15,4 @@ RUN pip install . --no-cache-dir
RUN curl https://assets.teklia.com/teklia_dev_ca.pem > /usr/local/share/ca-certificates/arkindex-dev.crt && update-ca-certificates
ENV REQUESTS_CA_BUNDLE /etc/ssl/certs/ca-certificates.crt
CMD ["worker-{{ cookiecutter.slug }}"]
CMD ["worker-{{ cookiecutter.__package }}"]
# {{ cookiecutter.slug }}
# {{ cookiecutter.name }}
{{ cookiecutter.description }}
......
......@@ -3,7 +3,7 @@ requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "worker_{{ cookiecutter.slug }}"
name = "worker_{{ cookiecutter.__module }}"
version = "0.1.0"
description = "{{ cookiecutter.description }}"
dynamic = ["dependencies"]
......@@ -24,7 +24,7 @@ classifiers = [
]
[project.scripts]
worker-{{ cookiecutter.slug }} = "worker_{{ cookiecutter.slug }}.worker:main"
worker-{{ cookiecutter.__package }} = "worker_{{ cookiecutter.__module }}.worker:main"
[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }
......@@ -60,7 +60,7 @@ select = [
[tool.ruff.per-file-ignores]
# Ignore `pytest-composite-assertion` rules of `flake8-pytest-style` linter for non-test files
"worker_{{ cookiecutter.slug }}/**/*.py" = ["PT018"]
"worker_{{ cookiecutter.__module }}/**/*.py" = ["PT018"]
[tool.ruff.isort]
known-first-party = ["arkindex", "arkindex_worker"]
......
......@@ -7,6 +7,6 @@ def test_dummy():
def test_import():
"""Import our newly created module, through importlib to avoid parsing issues"""
worker = importlib.import_module("worker_{{ cookiecutter.slug }}.worker")
worker = importlib.import_module("worker_{{ cookiecutter.__module }}.worker")
assert hasattr(worker, "Demo")
assert hasattr(worker.Demo, "process_element")
[tox]
envlist = worker-{{ cookiecutter.slug }}
envlist = worker-{{ cookiecutter.__package }}
[testenv]
passenv = ARKINDEX_API_SCHEMA_URL
......