diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d3cd9ec2a1f99a58608c3480e5a2258a04a575ef..6b26a4c3d87404c92429d305ffe3b47925b2f0ad 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,7 +4,7 @@ stages: - release test: - image: python:3 + image: python:3.11 stage: test cache: @@ -28,7 +28,7 @@ test: - tox -- --junitxml=test-report.xml --durations=50 lint: - image: python:3 + image: python:3.11 cache: paths: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 93d07e6993e07300b856e6c8457e9074805baa49..e5d093234d490ce32e53c903478a3340dfacb444 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,18 +4,18 @@ repos: hooks: - id: isort - repo: https://github.com/ambv/black - rev: 23.1.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: - 'flake8-coding==1.3.2' - 'flake8-debugger==4.1.2' - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-ast - id: check-docstring-first @@ -32,7 +32,7 @@ repos: - id: check-json - id: requirements-txt-fixer - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.6 hooks: - id: codespell args: ['--write-changes'] diff --git a/Dockerfile b/Dockerfile index 90a7be25544148b18184577eea0397eb6ffa8262..cb7e56782eb37373f4974882a41a4e8ab72ffc40 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3 +FROM python:3.11 WORKDIR /src diff --git a/requirements.txt b/requirements.txt index 918e5dcd3ef8e9b1acc437a056e2a7ae70eabf64..76c4b51fd4edf0656848f6d7f6036bd3d460f0ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -arkindex-base-worker==0.3.3-rc3 -arkindex-export==0.1.2 +arkindex-base-worker==0.3.4 +arkindex-export==0.1.7 diff --git a/setup.py b/setup.py index 130a32ccefa323ccd88c9d3cdf8afaeb27ff5ba1..81f4440affe641e4266ae08efa2af63c9b40c62c 100755 --- a/setup.py +++ b/setup.py @@ -1,28 +1,38 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import re from pathlib import Path +from typing import List from setuptools import find_packages, setup MODULE = "worker_generic_training_dataset" COMMAND = "worker-generic-training-dataset" +SUBMODULE_PATTERN = re.compile("-e ((?:(?!#egg=).)*)(?:#egg=)?(.*)") -def parse_requirements_line(line): - """Special case for git requirements""" + +def parse_requirements_line(line: str) -> str: + # Special case for git requirements if line.startswith("git+http"): assert "@" in line, "Branch should be specified with suffix (ex: @master)" assert ( "#egg=" in line ), "Package name should be specified with suffix (ex: #egg=kraken)" - package_name = line.split("#egg=")[-1] + package_name: str = line.split("#egg=")[-1] return f"{package_name} @ {line}" + # Special case for submodule requirements + elif line.startswith("-e"): + package_path, package_name = SUBMODULE_PATTERN.match(line).groups() + package_path: Path = Path(package_path).resolve() + # Package name is optional: use folder name by default + return f"{package_name or package_path.name} @ file://{package_path}" else: return line -def parse_requirements(): +def parse_requirements() -> List[str]: path = Path(__file__).parent.resolve() / "requirements.txt" assert path.exists(), f"Missing requirements: {path}" return list( diff --git a/tests/conftest.py b/tests/conftest.py index 98a69b3ce3c72cf6e3adc07319ae599225ea06d7..f40f4e7374b8993464dbb6e4023d96011bb0f2e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,7 +29,10 @@ def setup_environment(responses, monkeypatch): os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-generic-training-dataset" # Setup a mock api client instead of using a real one - monkeypatch.setattr(BaseWorker, "setup_api_client", lambda _: MockApiClient()) + def mock_setup_api_client(self): + self.api_client = MockApiClient() + + monkeypatch.setattr(BaseWorker, "setup_api_client", mock_setup_api_client) @pytest.fixture(scope="session", autouse=True) diff --git a/tests/data/arkindex_export.sqlite b/tests/data/arkindex_export.sqlite index bfefcefb2c4db70f61787d85ecea51e62c4ad8cf..6540376a5890c3e81db6f596f77b4e210427e8b0 100644 Binary files a/tests/data/arkindex_export.sqlite and b/tests/data/arkindex_export.sqlite differ diff --git a/worker_generic_training_dataset/worker.py b/worker_generic_training_dataset/worker.py index f547e42b86574943bd3c422951b2ab3f88f91374..b20a04dae21d43471ced457e49172555b9d8013e 100644 --- a/worker_generic_training_dataset/worker.py +++ b/worker_generic_training_dataset/worker.py @@ -158,6 +158,7 @@ class DatasetExtractor(BaseWorker): class_name=classification.class_name, confidence=classification.confidence, state=classification.state, + worker_run_id=classification.worker_run, ) for classification in list_classifications(element.id) ] @@ -178,12 +179,10 @@ class DatasetExtractor(BaseWorker): id=transcription.id, element=element, text=transcription.text, - # Dodge not-null constraint for now - confidence=transcription.confidence or 1.0, + confidence=transcription.confidence, orientation=DEFAULT_TRANSCRIPTION_ORIENTATION, - worker_version_id=transcription.worker_version.id - if transcription.worker_version - else None, + worker_version_id=transcription.worker_version, + worker_run_id=transcription.worker_run, ) for transcription in list_transcriptions(element.id) ] @@ -208,6 +207,7 @@ class DatasetExtractor(BaseWorker): name=transcription_entity.entity.name, validated=transcription_entity.entity.validated, metas=transcription_entity.entity.metas, + worker_run_id=transcription_entity.entity.worker_run, ) entities.append(entity) transcription_entities.append( @@ -218,6 +218,7 @@ class DatasetExtractor(BaseWorker): offset=transcription_entity.offset, length=transcription_entity.length, confidence=transcription_entity.confidence, + worker_run_id=transcription_entity.worker_run, ) ) if entities: @@ -284,9 +285,8 @@ class DatasetExtractor(BaseWorker): polygon=element.polygon, rotation_angle=element.rotation_angle, mirrored=element.mirrored, - worker_version_id=element.worker_version.id - if element.worker_version - else None, + worker_version_id=element.worker_version, + worker_run_id=element.worker_run, confidence=element.confidence, )