Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Commits on Source (7)
0.3.2-rc7
0.3.2
......@@ -235,7 +235,7 @@ class ElementsWorker(
self.report.error(element_id, e)
# Save report as local artifact
self.report.save(os.path.join(self.work_dir, "ml_report.json"))
self.report.save(self.work_dir / "ml_report.json")
if failed:
logger.error(
......
......@@ -112,15 +112,15 @@ class BaseWorker(object):
# Setup workdir either in Ponos environment or on host's home
if os.environ.get("PONOS_DATA"):
self.work_dir = os.path.join(os.environ["PONOS_DATA"], "current")
self.work_dir = Path(os.environ["PONOS_DATA"], "current")
else:
# We use the official XDG convention to store file for developers
# https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
xdg_data_home = os.environ.get(
"XDG_DATA_HOME", os.path.expanduser("~/.local/share")
)
self.work_dir = os.path.join(xdg_data_home, "arkindex")
os.makedirs(self.work_dir, exist_ok=True)
self.work_dir = Path(xdg_data_home, "arkindex")
self.work_dir.mkdir(parents=True, exist_ok=True)
# Store task ID. This is only available when running in production
# through a ponos agent
......@@ -377,7 +377,7 @@ class BaseWorker(object):
if self.task_id:
# When running in production with ponos, the agent
# downloads the model and set it in the current task work dir
return Path(self.work_dir)
return self.work_dir
else:
model_dir = self.config.get("model_dir", self.args.model_dir)
if model_dir is None:
......
......@@ -102,12 +102,12 @@ def build_clean_payload(**kwargs):
def skip_if_read_only(func):
"""
Return shortly in case the read_only attribute is evaluated to True
Return shortly in case the is_read_only property is evaluated to True
"""
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if getattr(self, "read_only", False):
if getattr(self, "is_read_only", False):
logger.warning(
"Cannot perform this operation as the worker is in read-only mode"
)
......
black==23.1.0
doc8==1.1.1
mkdocs==1.4.2
mkdocs-material==9.0.15
mkdocs-material==9.1.0
mkdocstrings==0.20.0
mkdocstrings-python==0.8.3
recommonmark==0.7.1
# Releases
## 0.3.2
Released on **8 March 2023** • View on [Gitlab](https://gitlab.com/teklia/workers/base-worker/-/releases/0.3.2)
- A helper to use the new [API endpoint](https://demo.arkindex.org/api-docs/#tag/entities/operation/CreateTranscriptionEntities) to create transcription entities more efficiently was implemented.
- Training workers may now publish a model configuration when creating a new model version on Arkindex. This will make the execution of a *generic* worker much smoother.
- The model version API endpoints were updated in the [latest Arkindex release](https://teklia.com/solutions/arkindex/releases/1-4-0/) and a new helper was introduced subsequently. However, there are no breaking changes and the main helper, `publish_model_version`, still has the same signature and behaviour.
- The latest Arkindex release changed the way NER entities are stored and published.
- The `EntityType` enum was removed as type slug are no longer restrcited to a small options,
- [create_entity](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.create_entity) now expects a type slug as a String,
- a new helper [list_corpus_entity_types](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.list_corpus_entity_types) was added to load the Entity types in the corpus,
- a new helper [check_required_entity_types](https://workers.arkindex.org/ref/api/entity/#arkindex_worker.worker.entity.EntityMixin.check_required_entity_types) to make sure that needed entity types are available in the corpus was added. Missing ones are created by default (this can be disabled).
- The [create_classifications](https://workers.arkindex.org/ref/api/classification/#arkindex_worker.worker.classification.ClassificationMixin.create_classifications) helper now expects the UUID of each MLClass instead of their name.
- In developer mode, the only way to set the `corpus_id` attribute is to use the `ARKINDEX_CORPUS_ID` environment variable. When it's not set, all API requests using the `corpus_id` as path parameter will fail with `500` status code. A warning log was added to help developers troubleshoot this error by advising them to set this variable.
- The [create_transcriptions](https://workers.arkindex.org/ref/api/transcription/#arkindex_worker.worker.transcription.TranscriptionMixin.create_transcriptions) helper no longer makes the API call in developer mode. This behaviour aligns with all other publication helpers.
- Fixes hash computation when publishing a model using [publish_model_version](https://workers.arkindex.org/ref/api/training/#arkindex_worker.worker.training.TrainingMixin.publish_model_version).
- If a process is linked to a model version, its id will be available to the worker through its `model_version_id` attribute.
- The URLs of the API endpoint related to Ponos were changed in the latest Arkindex release. Some changes were needed in the test suite.
- The `classes` attribute no directly contains the classes of the corpus of the processed element.
```python
# Old usage
self.classes = {
"corpus_id": {
"ml_class_1": "class_uuid",
...
}
}
# New usage
self.classes = {
"ml_class_1": "class_uuid",
...
}
```
## 0.3.1
......
pytest==7.2.1
pytest==7.2.2
pytest-mock==3.10.0
pytest-responses==0.5.1
......@@ -17,7 +17,7 @@ from arkindex_worker.worker.base import ModelNotFoundError
def test_init_default_local_share(monkeypatch):
worker = BaseWorker()
assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex")
assert str(worker.work_dir) == os.path.expanduser("~/.local/share/arkindex")
def test_init_default_xdg_data_home(monkeypatch):
......@@ -25,13 +25,13 @@ def test_init_default_xdg_data_home(monkeypatch):
monkeypatch.setenv("XDG_DATA_HOME", path)
worker = BaseWorker()
assert worker.work_dir == f"{path}/arkindex"
assert str(worker.work_dir) == f"{path}/arkindex"
def test_init_with_local_cache(monkeypatch):
worker = BaseWorker(support_cache=True)
assert worker.work_dir == os.path.expanduser("~/.local/share/arkindex")
assert str(worker.work_dir) == os.path.expanduser("~/.local/share/arkindex")
assert worker.support_cache is True
......@@ -40,7 +40,7 @@ def test_init_var_ponos_data_given(monkeypatch):
monkeypatch.setenv("PONOS_DATA", path)
worker = BaseWorker()
assert worker.work_dir == f"{path}/current"
assert str(worker.work_dir) == f"{path}/current"
def test_init_var_worker_run_id_missing(monkeypatch):
......
......@@ -101,7 +101,10 @@ def test_handle_s3_uploading_errors(mock_training_worker, model_file_dir):
)
def test_training_mixin_read_only(mock_training_worker, method, caplog):
"""All operations related to models versions returns early if the worker is configured as read only"""
mock_training_worker.read_only = True
# Set worker in read_only mode
mock_training_worker.worker_run_id = None
assert mock_training_worker.is_read_only
assert mock_training_worker.model_version is None
getattr(mock_training_worker, method)()
assert mock_training_worker.model_version is None
......
arkindex-base-worker==0.3.1
arkindex-base-worker==0.3.2