Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • workers/base-worker
1 result
Show changes
Showing with 108 additions and 56 deletions
......@@ -673,7 +673,7 @@ def default_artifact():
return Artifact(
**{
"id": "artifact_id",
"path": "dataset_id.zstd",
"path": "dataset_id.tar.zst",
"size": 42,
"content_type": "application/zstd",
"s3_put_url": None,
......
......@@ -454,7 +454,10 @@ def test_configure_load_model_configuration(mocker, monkeypatch, responses):
"configuration": None,
"model_version": {
"id": "12341234-1234-1234-1234-123412341234",
"name": "Model version 1337",
"model": {
"id": "43214321-4321-4321-4321-432143214321",
"name": "Model 1337",
},
"configuration": {
"param1": "value1",
"param2": 2,
......@@ -489,6 +492,10 @@ def test_configure_load_model_configuration(mocker, monkeypatch, responses):
"param3": None,
}
assert worker.model_version_id == "12341234-1234-1234-1234-123412341234"
assert worker.model_details == {
"id": "43214321-4321-4321-4321-432143214321",
"name": "Model 1337",
}
def test_load_missing_secret():
......@@ -697,7 +704,7 @@ def test_extract_parent_archives(tmp_path):
]
worker.task_data_dir = FIXTURES_DIR / "extract_parent_archives"
worker.extract_parent_archives("arkindex_data.zstd", tmp_path)
worker.extract_parent_archives("arkindex_data.tar.zst", tmp_path)
extracted_files = [
# Test
......
......@@ -44,7 +44,7 @@ def test_download_dataset_artifact_download_api_error(
expected_results = [
{
"id": "artifact_1",
"path": "dataset_id.zstd",
"path": "dataset_id.tar.zst",
"size": 42,
"content_type": "application/zstd",
"s3_put_url": None,
......@@ -69,7 +69,7 @@ def test_download_dataset_artifact_download_api_error(
)
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst",
status=500,
)
......@@ -82,11 +82,11 @@ def test_download_dataset_artifact_download_api_error(
] == BASE_API_CALLS + [
("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
# The API call is retried 5 times
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
]
......@@ -132,7 +132,10 @@ def test_download_dataset_artifact(
):
task_id = default_dataset.task_id
archive_path = (
FIXTURES_DIR / "extract_parent_archives" / "first_parent" / "arkindex_data.zstd"
FIXTURES_DIR
/ "extract_parent_archives"
/ "first_parent"
/ "arkindex_data.tar.zst"
)
mocker.patch(
"arkindex_worker.worker.base.BaseWorker.find_extras_directory",
......@@ -142,7 +145,7 @@ def test_download_dataset_artifact(
expected_results = [
{
"id": "artifact_1",
"path": "dataset_id.zstd",
"path": "dataset_id.tar.zst",
"size": 42,
"content_type": "application/zstd",
"s3_put_url": None,
......@@ -167,14 +170,14 @@ def test_download_dataset_artifact(
)
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst",
status=200,
body=archive_path.read_bytes(),
content_type="application/zstd",
)
archive = mock_dataset_worker.download_dataset_artifact(default_dataset)
assert archive == tmp_path / "dataset_id.zstd"
assert archive == tmp_path / "dataset_id.tar.zst"
assert archive.read_bytes() == archive_path.read_bytes()
archive.unlink()
......@@ -183,7 +186,7 @@ def test_download_dataset_artifact(
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
]
......@@ -701,7 +704,7 @@ def test_run(
FIXTURES_DIR
/ "extract_parent_archives"
/ "first_parent"
/ "arkindex_data.zstd"
/ "arkindex_data.tar.zst"
)
responses.add(
responses.GET,
......@@ -711,7 +714,7 @@ def test_run(
)
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
status=200,
body=archive_path.read_bytes(),
content_type="application/zstd",
......@@ -723,7 +726,7 @@ def test_run(
),
(
"GET",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
),
]
extra_logs += [
......@@ -799,7 +802,7 @@ def test_run_read_only(
FIXTURES_DIR
/ "extract_parent_archives"
/ "first_parent"
/ "arkindex_data.zstd"
/ "arkindex_data.tar.zst"
)
responses.add(
responses.GET,
......@@ -809,7 +812,7 @@ def test_run_read_only(
)
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
status=200,
body=archive_path.read_bytes(),
content_type="application/zstd",
......@@ -821,7 +824,7 @@ def test_run_read_only(
),
(
"GET",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
),
]
extra_logs += [
......
......@@ -60,7 +60,7 @@ def test_list_artifacts(
expected_results = [
{
"id": "artifact_1",
"path": "dataset_id.zstd",
"path": "dataset_id.tar.zst",
"size": 42,
"content_type": "application/zstd",
"s3_put_url": None,
......@@ -153,7 +153,7 @@ def test_download_artifact_wrong_param_artifact(
def test_download_artifact_api_error(responses, mock_dataset_worker, default_artifact):
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst",
status=500,
)
......@@ -167,11 +167,11 @@ def test_download_artifact_api_error(responses, mock_dataset_worker, default_art
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
# The API call is retried 5 times
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
]
......@@ -181,11 +181,14 @@ def test_download_artifact(
default_artifact,
):
archive_path = (
FIXTURES_DIR / "extract_parent_archives" / "first_parent" / "arkindex_data.zstd"
FIXTURES_DIR
/ "extract_parent_archives"
/ "first_parent"
/ "arkindex_data.tar.zst"
)
responses.add(
responses.GET,
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd",
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst",
status=200,
body=archive_path.read_bytes(),
content_type="application/zstd",
......@@ -202,5 +205,5 @@ def test_download_artifact(
assert [
(call.request.method, call.request.url) for call in responses.calls
] == BASE_API_CALLS + [
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.zstd"),
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
]
......@@ -3,9 +3,18 @@ stages:
- build
- release
# GitLab provides a template to ensure pipelines run only for branches and tags, not for merge requests
# This prevents duplicate pipelines in merge requests.
# See https://docs.gitlab.com/ee/ci/troubleshooting.html#job-may-allow-multiple-pipelines-to-run-for-a-single-action
include:
- template: 'Workflows/Branch-Pipelines.gitlab-ci.yml'
variables:
VERSION: $CI_COMMIT_SHA
DEBIAN_FRONTEND: non-interactive
test:
# Pinned to <3.12 till next arkindex-base-worker release
image: python:3.11
image: python:slim
stage: test
cache:
......@@ -19,6 +28,9 @@ test:
before_script:
- pip install tox
# Install curl
- apt-get update -q -y && apt-get install -q -y --no-install-recommends curl
# Download OpenAPI schema from last backend build
- curl https://assets.teklia.com/arkindex/openapi.yml > schema.yml
......@@ -29,7 +41,7 @@ test:
- tox -- --junitxml=test-report.xml --durations=50
lint:
image: python:3
image: python:slim
cache:
paths:
......@@ -43,6 +55,9 @@ lint:
before_script:
- pip install pre-commit
# Install git
- apt-get update -q -y && apt-get install -q -y --no-install-recommends git
except:
- schedules
......@@ -58,8 +73,15 @@ docker-build:
DOCKER_DRIVER: overlay2
DOCKER_HOST: tcp://docker:2375/
except:
- schedules
rules:
# Never run on scheduled pipelines
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
# Use commit tag when running on tagged commit
- if: $CI_COMMIT_TAG
variables:
VERSION: $CI_COMMIT_TAG
- when: on_success
script:
- ci/build.sh
......@@ -68,6 +90,7 @@ release-notes:
stage: release
image: registry.gitlab.teklia.com/infra/devops:latest
# Only run on tags
only:
- tags
......@@ -83,3 +106,26 @@ bump-python-deps:
script:
- devops python-deps requirements.txt
publish-worker:
stage: release
image: registry.gitlab.teklia.com/arkindex/cli:latest
script:
- arkindex -p "$ARKINDEX_INSTANCE" --gitlab-secure-file arkindex-cli.yaml worker publish "$CI_REGISTRY_IMAGE:$VERSION"
rules:
# Never run on scheduled pipelines
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
# Use commit tag when running on tagged commit
- if: $CI_COMMIT_TAG
variables:
VERSION: $CI_COMMIT_TAG
- when: on_success
parallel:
matrix:
- ARKINDEX_INSTANCE:
# Publish worker on https://demo.arkindex.org
- demo
......@@ -5,8 +5,8 @@ repos:
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/ambv/black
rev: 23.1.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
......
#!/bin/sh -e
# Build the tasks Docker image.
# Requires CI_PROJECT_DIR and CI_REGISTRY_IMAGE to be set.
# VERSION defaults to latest.
# Will automatically login to a registry if CI_REGISTRY, CI_REGISTRY_USER and CI_REGISTRY_PASSWORD are set.
# Will only push an image if $CI_REGISTRY is set.
if [ -z "$VERSION" ]; then
VERSION=${CI_COMMIT_TAG:-latest}
fi
if [ -z "$VERSION" -o -z "$CI_PROJECT_DIR" -o -z "$CI_REGISTRY_IMAGE" ]; then
echo Missing environment variables
exit 1
echo Missing environment variables
exit 1
fi
IMAGE_TAG="$CI_REGISTRY_IMAGE:$VERSION"
......@@ -19,14 +14,9 @@ IMAGE_TAG="$CI_REGISTRY_IMAGE:$VERSION"
cd $CI_PROJECT_DIR
docker build -f Dockerfile . -t "$IMAGE_TAG"
# Publish the image on the main branch or on a tag
if [ "$CI_COMMIT_REF_NAME" = "$CI_DEFAULT_BRANCH" -o -n "$CI_COMMIT_TAG" ]; then
if [ -n "$CI_REGISTRY" -a -n "$CI_REGISTRY_USER" -a -n "$CI_REGISTRY_PASSWORD" ]; then
echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
docker push $IMAGE_TAG
else
echo "Missing environment variables to log in to the container registry…"
fi
if [ -n "$CI_REGISTRY" -a -n "$CI_REGISTRY_USER" -a -n "$CI_REGISTRY_PASSWORD" ]; then
echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
docker push $IMAGE_TAG
else
echo "The build was not published to the repository registry (only for main branch or tags)…"
echo "Missing environment variables to log in to the container registry…"
fi
arkindex-base-worker==0.3.4
arkindex-base-worker==0.3.5
......@@ -49,4 +49,5 @@ setup(
install_requires=parse_requirements(),
entry_points={"console_scripts": [f"{COMMAND}={MODULE}.worker:main"]},
packages=find_packages(),
python_requires=">=3.10",
)
......@@ -15,7 +15,7 @@ def setup_environment(responses, monkeypatch) -> None:
# defaulting to the prod environment
schema_url = os.environ.get(
"ARKINDEX_API_SCHEMA_URL",
"https://arkindex.teklia.com/api/v1/openapi/?format=openapi-json",
"https://demo.arkindex.org/api/v1/openapi/?format=openapi-json",
)
responses.add_passthru(schema_url)
......@@ -23,6 +23,8 @@ def setup_environment(responses, monkeypatch) -> None:
os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
# Setup a fake worker run ID
os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-{{ cookiecutter.slug }}"
# Setup a fake corpus ID
os.environ["ARKINDEX_CORPUS_ID"] = "1234-corpus-id"
# Setup a mock api client instead of using a real one
def mock_setup_api_client(self):
......