diff --git a/.arkindex.yml b/.arkindex.yml new file mode 100644 index 0000000000000000000000000000000000000000..1f435a4bbdf7f6476795a1af19d462a5ac1b4331 --- /dev/null +++ b/.arkindex.yml @@ -0,0 +1,11 @@ +--- +version: 2 + +type: worker + +workers: + - slug: generic-training-dataset + name: Generic Training Dataset Extractor + type: data-extract + docker: + build: Dockerfile diff --git a/.cookiecutter.json b/.cookiecutter.json new file mode 100644 index 0000000000000000000000000000000000000000..9cbe71b55868b43f8b6d930af081deb77531d38e --- /dev/null +++ b/.cookiecutter.json @@ -0,0 +1,8 @@ +{ + "slug": "generic-training-dataset", + "name": "Generic Training Dataset Extractor", + "description": "Fill base-worker cache with information about dataset and extract images", + "worker_type": "data-extract", + "author": "Teklia", + "email": "contact@teklia.com" +} diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..e64c35dd6b8480f97c8b387bb681f762b705895b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.tox +.git diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..7a3797fc6b71677df500d8386cf70e3173a7f79f --- /dev/null +++ b/.flake8 @@ -0,0 +1,4 @@ +[flake8] +max-line-length = 150 +exclude = .git,__pycache__ +ignore = E203,E501,W503 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1287c575804425c85bfb8ea466ca806fd0810199 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.pyc +*.egg-info/ +.tox/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..23f062fa642bf0a4068aa3e274a8861b8625c0ba --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,84 @@ +stages: + - test + - build + - release + +test: + image: python:3 + + stage: test + cache: + paths: + - .cache/pip + + variables: + PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" + ARKINDEX_API_SCHEMA_URL: schema.yml + + before_script: + - pip install tox + + # Download OpenAPI schema from last backend build + - curl https://assets.teklia.com/arkindex/openapi.yml > schema.yml + + except: + - schedules + + script: + - tox -- --junitxml=test-report.xml --durations=50 + +lint: + image: python:3 + + cache: + paths: + - .cache/pip + - .cache/pre-commit + + variables: + PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" + PRE_COMMIT_HOME: "$CI_PROJECT_DIR/.cache/pre-commit" + + before_script: + - pip install pre-commit + + except: + - schedules + + script: + - pre-commit run -a + +docker-build: + stage: build + image: docker:19.03.1 + services: + - docker:dind + variables: + DOCKER_DRIVER: overlay2 + DOCKER_HOST: tcp://docker:2375/ + + except: + - schedules + + script: + - ci/build.sh + +release-notes: + stage: release + image: registry.gitlab.com/teklia/devops:latest + + only: + - tags + + script: + - devops release-notes + +bump-python-deps: + stage: release + image: registry.gitlab.com/teklia/devops:latest + + only: + - schedules + + script: + - devops python-deps requirements.txt diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f5257078ed83767ea2142f9099320f0f4319ee56 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +# Compatible with black +profile = black + +default_section=FIRSTPARTY +known_first_party = arkindex,arkindex_worker +known_third_party = pytest,setuptools diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d07e6993e07300b856e6c8457e9074805baa49 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,41 @@ +repos: + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/ambv/black + rev: 23.1.0 + hooks: + - id: black + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: + - 'flake8-coding==1.3.2' + - 'flake8-debugger==4.1.2' + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-ast + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-symlinks + - id: debug-statements + - id: trailing-whitespace + - id: check-yaml + args: [--allow-multiple-documents] + - id: mixed-line-ending + - id: name-tests-test + args: ['--django'] + - id: check-json + - id: requirements-txt-fixer + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + args: ['--write-changes'] + - repo: meta + hooks: + - id: check-useless-excludes diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..90a7be25544148b18184577eea0397eb6ffa8262 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3 + +WORKDIR /src + +# Install worker as a package +COPY worker_generic_training_dataset worker_generic_training_dataset +COPY requirements.txt setup.py VERSION ./ +RUN pip install . + +# Add archi local CA +RUN curl https://assets.teklia.com/teklia_dev_ca.pem > /usr/local/share/ca-certificates/arkindex-dev.crt && update-ca-certificates +ENV REQUESTS_CA_BUNDLE /etc/ssl/certs/ca-certificates.crt + +CMD ["worker-generic-training-dataset"] diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..fd959fa8501e56bc4f1869e363b4a2118a86edce --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include requirements.txt +include VERSION diff --git a/README.md b/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..af907a18f989a703b0039b869f4abd64c2a7f243 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,32 @@ +# generic-training-dataset + +Fill base-worker cache with information about dataset and extract images + +### Development + +For development and tests purpose it may be useful to install the worker as a editable package with pip. + +```shell +pip3 install -e . +``` + +### Linter + +Code syntax is analyzed before submitting the code.\ +To run the linter tools suite you may use pre-commit. + +```shell +pip install pre-commit +pre-commit run -a +``` + +### Run tests + +Tests are executed with tox using [pytest](https://pytest.org). + +```shell +pip install tox +tox +``` + +To recreate tox virtual environment (e.g. a dependencies update), you may run `tox -r` diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000000000000000000000000000000000..6e8bf73aa550d4c57f6f35830f1bcdc7a4a62f38 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/ci/build.sh b/ci/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..f29f50f27b88056216e6f880bb713012fc4e9956 --- /dev/null +++ b/ci/build.sh @@ -0,0 +1,32 @@ +#!/bin/sh -e +# Build the tasks Docker image. +# Requires CI_PROJECT_DIR and CI_REGISTRY_IMAGE to be set. +# VERSION defaults to latest. +# Will automatically login to a registry if CI_REGISTRY, CI_REGISTRY_USER and CI_REGISTRY_PASSWORD are set. +# Will only push an image if $CI_REGISTRY is set. + +if [ -z "$VERSION" ]; then + VERSION=${CI_COMMIT_TAG:-latest} +fi + +if [ -z "$VERSION" -o -z "$CI_PROJECT_DIR" -o -z "$CI_REGISTRY_IMAGE" ]; then + echo Missing environment variables + exit 1 +fi + +IMAGE_TAG="$CI_REGISTRY_IMAGE:$VERSION" + +cd $CI_PROJECT_DIR +docker build -f Dockerfile . -t "$IMAGE_TAG" + +# Publish the image on the main branch or on a tag +if [ "$CI_COMMIT_REF_NAME" = "main" -o -n "$CI_COMMIT_TAG" ]; then + if [ -n "$CI_REGISTRY" -a -n "$CI_REGISTRY_USER" -a -n "$CI_REGISTRY_PASSWORD" ]; then + echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY + docker push $IMAGE_TAG + else + echo "Missing environment variables to log in to the container registry…" + fi +else + echo "The build was not published to the repository registry (only for main branch or tags)…" +fi diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ff1be6e274e323aa1ab7d829d7653efed63ff8c --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +arkindex-base-worker==0.3.2 diff --git a/setup.py b/setup.py new file mode 100755 index 0000000000000000000000000000000000000000..130a32ccefa323ccd88c9d3cdf8afaeb27ff5ba1 --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pathlib import Path + +from setuptools import find_packages, setup + +MODULE = "worker_generic_training_dataset" +COMMAND = "worker-generic-training-dataset" + + +def parse_requirements_line(line): + """Special case for git requirements""" + if line.startswith("git+http"): + assert "@" in line, "Branch should be specified with suffix (ex: @master)" + assert ( + "#egg=" in line + ), "Package name should be specified with suffix (ex: #egg=kraken)" + package_name = line.split("#egg=")[-1] + return f"{package_name} @ {line}" + else: + return line + + +def parse_requirements(): + path = Path(__file__).parent.resolve() / "requirements.txt" + assert path.exists(), f"Missing requirements: {path}" + return list( + map(parse_requirements_line, map(str.strip, path.read_text().splitlines())) + ) + + +setup( + name=MODULE, + version=open("VERSION").read(), + description="Fill base-worker cache with information about dataset and extract images", + author="Teklia", + author_email="contact@teklia.com", + install_requires=parse_requirements(), + entry_points={"console_scripts": [f"{COMMAND}={MODULE}.worker:main"]}, + packages=find_packages(), +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..14dfec177864ca1da9e6470d5bcb84f1a51e530d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +import os + +import pytest + +from arkindex.mock import MockApiClient +from arkindex_worker.worker.base import BaseWorker + + +@pytest.fixture(autouse=True) +def setup_environment(responses, monkeypatch): + """Setup needed environment variables""" + + # Allow accessing remote API schemas + # defaulting to the prod environment + schema_url = os.environ.get( + "ARKINDEX_API_SCHEMA_URL", + "https://arkindex.teklia.com/api/v1/openapi/?format=openapi-json", + ) + responses.add_passthru(schema_url) + + # Set schema url in environment + os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url + # Setup a fake worker run ID + os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-generic-training-dataset" + + # Setup a mock api client instead of using a real one + monkeypatch.setattr(BaseWorker, "setup_api_client", lambda _: MockApiClient()) diff --git a/tests/test_worker.py b/tests/test_worker.py new file mode 100644 index 0000000000000000000000000000000000000000..bb38787b81cbba08837e9c16035209e7c37fdcdd --- /dev/null +++ b/tests/test_worker.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +import importlib + + +def test_dummy(): + assert True + + +def test_import(): + """Import our newly created module, through importlib to avoid parsing issues""" + worker = importlib.import_module("worker_generic_training_dataset.worker") + assert hasattr(worker, "Demo") + assert hasattr(worker.Demo, "process_element") diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..dcc4e6c9ff0ae34f0df3801c145121eb61438eb4 --- /dev/null +++ b/tox.ini @@ -0,0 +1,12 @@ +[tox] +envlist = worker-generic-training-dataset + +[testenv] +passenv = ARKINDEX_API_SCHEMA_URL +commands = + pytest {posargs} + +deps = + pytest + pytest-responses + -rrequirements.txt diff --git a/worker_generic_training_dataset/__init__.py b/worker_generic_training_dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/worker_generic_training_dataset/worker.py b/worker_generic_training_dataset/worker.py new file mode 100644 index 0000000000000000000000000000000000000000..8489d561aeba8354d60155803cdcb9c405d68112 --- /dev/null +++ b/worker_generic_training_dataset/worker.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +from arkindex_worker.worker import ElementsWorker + + +class Demo(ElementsWorker): + def process_element(self, element): + print("Demo processing element", element) + + +def main(): + Demo( + description="Fill base-worker cache with information about dataset and extract images" + ).run() + + +if __name__ == "__main__": + main()