Compare revisions

Teklia Bot · Yoann Schneider · Bastien Abadie · Yoann Schneider · Bastien Abadie · Yoann Schneider
--- a/VERSION
+++ b/VERSION
-0.3.2-rc7
+0.3.2
--- a/arkindex_worker/worker/__init__.py
+++ b/arkindex_worker/worker/__init__.py
@@ -235,7 +235,7 @@ class ElementsWorker(
                self.report.error(element_id, e)

        # Save report as local artifact
-        self.report.save(os.path.join(self.work_dir, "ml_report.json"))
+        self.report.save(self.work_dir / "ml_report.json")

        if failed:
            logger.error(

--- a/arkindex_worker/worker/base.py
+++ b/arkindex_worker/worker/base.py
@@ -112,15 +112,15 @@ class BaseWorker(object):

        # Setup workdir either in Ponos environment or on host's home
        if os.environ.get("PONOS_DATA"):
-            self.work_dir = os.path.join(os.environ["PONOS_DATA"], "current")
+            self.work_dir = Path(os.environ["PONOS_DATA"], "current")
        else:
            # We use the official XDG convention to store file for developers
            # https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
            xdg_data_home = os.environ.get(
                "XDG_DATA_HOME", os.path.expanduser("~/.local/share")
            )
-            self.work_dir = os.path.join(xdg_data_home, "arkindex")
-            os.makedirs(self.work_dir, exist_ok=True)
+            self.work_dir = Path(xdg_data_home, "arkindex")
+            self.work_dir.mkdir(parents=True, exist_ok=True)

        # Store task ID. This is only available when running in production
        # through a ponos agent
@@ -377,7 +377,7 @@ class BaseWorker(object):
        if self.task_id:
            # When running in production with ponos, the agent
            # downloads the model and set it in the current task work dir
-            return Path(self.work_dir)
+            return self.work_dir
        else:
            model_dir = self.config.get("model_dir", self.args.model_dir)
            if model_dir is None:

--- a/arkindex_worker/worker/training.py
+++ b/arkindex_worker/worker/training.py
@@ -102,12 +102,12 @@ def build_clean_payload(**kwargs):

 def skip_if_read_only(func):
    """
-    Return shortly in case the read_only attribute is evaluated to True
+    Return shortly in case the is_read_only property is evaluated to True
    """

    @functools.wraps(func)
    def wrapper(self, *args, **kwargs):
-        if getattr(self, "read_only", False):
+        if getattr(self, "is_read_only", False):
            logger.warning(
                "Cannot perform this operation as the worker is in read-only mode"
            )

--- a/docs-requirements.txt
+++ b/docs-requirements.txt
 black==23.1.0
 doc8==1.1.1
 mkdocs==1.4.2
-mkdocs-material==9.0.15
+mkdocs-material==9.1.0
 mkdocstrings==0.20.0
 mkdocstrings-python==0.8.3
 recommonmark==0.7.1
--- a/docs/releases.md
+++ b/docs/releases.md
 # Releases

+## 0.3.2
+
+Released on **8 March 2023** &bull; View on [Gitlab](https://gitlab.com/teklia/workers/base-worker/-/releases/0.3.2)
+
+
+- A helper to use the new [API endpoint](https://demo.arkindex.org/api-docs/#tag/entities/operation/CreateTranscriptionEntities) to create transcription entities more efficiently was implemented.
+- Training workers may now publish a model configuration when creating a new model version on Arkindex. This will make the execution of a *generic* worker much smoother.
+- The model version API endpoints were updated in the [latest Arkindex release](https://teklia.com/solutions/arkindex/releases/1-4-0/) and a new helper was introduced subsequently. However, there are no breaking changes and the main helper, `publish_model_version`, still has the same signature and behaviour.
+- The latest Arkindex release changed the way NER entities are stored and published.
+    - The `EntityType` enum was removed as type slug are no longer restrcited to a small options,
+    - [create_entity](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.create_entity) now expects a type slug as a String,
+    - a new helper [list_corpus_entity_types](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.list_corpus_entity_types) was added to load the Entity types in the corpus,
+    - a new helper [check_required_entity_types](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.check_required_entity_types) to make sure that needed entity types are available in the corpus was added. Missing ones are created by default (this can be disabled).
+- The [create_classifications](https://workers.arkindex.org/ref/api/classification/#arkindex_worker.worker.classification.ClassificationMixin.create_classifications) helper now expects the UUID of each MLClass instead of their name.
+- In developer mode, the only way to set the `corpus_id` attribute is to use the `ARKINDEX_CORPUS_ID` environment variable. When it's not set, all API requests using the `corpus_id` as path parameter will fail with `500` status code. A warning log was added to help developers troubleshoot this error by advising them to set this variable.
+- The [create_transcriptions](https://workers.arkindex.org/ref/api/transcription/#arkindex_worker.worker.transcription.TranscriptionMixin.create_transcriptions) helper no longer makes the API call in developer mode. This behaviour aligns with all other publication helpers.
+- Fixes hash computation when publishing a model using [publish_model_version](https://workers.arkindex.org/ref/api/training/#arkindex_worker.worker.training.TrainingMixin.publish_model_version).
+- If a process is linked to a model version, its id will be available to the worker through its `model_version_id` attribute.
+- The URLs of the API endpoint related to Ponos were changed in the latest Arkindex release. Some changes were needed in the test suite.
+- The `classes` attribute no directly contains the classes of the corpus of the processed element.
+```python
+# Old usage
+self.classes = {
+    "corpus_id": {
+        "ml_class_1": "class_uuid",
+        ...
+    }
+}
+
+# New usage
+self.classes = {
+    "ml_class_1": "class_uuid",
+    ...
+}
+```

 ## 0.3.1


--- a/tests-requirements.txt
+++ b/tests-requirements.txt
-pytest==7.2.1
+pytest==7.2.2
 pytest-mock==3.10.0
 pytest-responses==0.5.1
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -17,7 +17,7 @@ from arkindex_worker.worker.base import ModelNotFoundError
 def test_init_default_local_share(monkeypatch):
    worker = BaseWorker()

-    assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex")
+    assert str(worker.work_dir) == os.path.expanduser("~/.local/share/arkindex")


 def test_init_default_xdg_data_home(monkeypatch):
@@ -25,13 +25,13 @@ def test_init_default_xdg_data_home(monkeypatch):
    monkeypatch.setenv("XDG_DATA_HOME", path)
    worker = BaseWorker()

-    assert worker.work_dir == f"{path}/arkindex"
+    assert str(worker.work_dir) == f"{path}/arkindex"


 def test_init_with_local_cache(monkeypatch):
    worker = BaseWorker(support_cache=True)

-    assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex")
+    assert str(worker.work_dir) == os.path.expanduser("~/.local/share/arkindex")
    assert worker.support_cache is True


@@ -40,7 +40,7 @@ def test_init_var_ponos_data_given(monkeypatch):
    monkeypatch.setenv("PONOS_DATA", path)
    worker = BaseWorker()

-    assert worker.work_dir == f"{path}/current"
+    assert str(worker.work_dir) == f"{path}/current"


 def test_init_var_worker_run_id_missing(monkeypatch):

--- a/tests/test_elements_worker/test_training.py
+++ b/tests/test_elements_worker/test_training.py
@@ -101,7 +101,10 @@ def test_handle_s3_uploading_errors(mock_training_worker, model_file_dir):
 )
 def test_training_mixin_read_only(mock_training_worker, method, caplog):
    """All operations related to models versions returns early if the worker is configured as read only"""
-    mock_training_worker.read_only = True
+    # Set worker in read_only mode
+    mock_training_worker.worker_run_id = None
+    assert mock_training_worker.is_read_only
+
    assert mock_training_worker.model_version is None
    getattr(mock_training_worker, method)()
    assert mock_training_worker.model_version is None

--- a/worker-{{cookiecutter.slug}}/requirements.txt
+++ b/worker-{{cookiecutter.slug}}/requirements.txt
-arkindex-base-worker==0.3.1
+arkindex-base-worker==0.3.2
No results found