Compare revisions

Bastien Abadie · Valentin Rigal · Bastien Abadie · ml bonhomme · Bastien Abadie · Bastien Abadie
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,6 +2,9 @@
 .git
 .eggs
 *.egg
-logs
 **/__pycache__/
 **/*.pyc
+docker/
+Makefile
+test-report.xml
+arkindex/config.yml
--- a/.flake8
+++ b/.flake8
@@ -5,3 +5,4 @@ exclude=build,.cache,.eggs,.git,src,arkindex/*/migrations/0001_initial.py
 # the only interesting ignore is W503, which goes against PEP8.
 # See https://lintlyci.github.io/Flake8Rules/rules/W503.html
 ignore = E203,E501,W503
+inline-quotes = "
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ htmlcov
 *.key
 arkindex/config.yml
 test-report.xml
+docker/ssl/*.pem
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,10 +18,8 @@ include:
      - .cache/pip

  before_script:
-    # Custom line to install our own deps from Git using GitLab CI credentials
-    - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/license#egg=teklia-license"
-    - pip install -r tests-requirements.txt
-    - "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH"
+    - "echo database: {host: postgres, port: 5432} > $CONFIG_PATH"
+    - pip install -e .[test]

  # Those jobs require the base image; they might fail if the image is not up to date.
  # Allow them to fail when building a new base image, to prevent them from blocking a new base image build
@@ -60,7 +58,7 @@ backend-tests:
        - test-report.xml

  script:
-    - python3 setup.py test
+    - arkindex test

 backend-lint:
  image: python:3.10
@@ -93,8 +91,7 @@ backend-migrations:
      alias: postgres

  script:
-    - pip install -e .
-    - arkindex/manage.py makemigrations --check --noinput --dry-run -v 3
+    - arkindex makemigrations --check --noinput --dry-run -v 3

 backend-openapi:
  extends: .backend-setup
@@ -156,29 +153,7 @@ backend-build:
    - when: never

  script:
-    - ci/build.sh Dockerfile
-
-backend-build-binary-docker:
-  stage: build
-  image: docker:19.03.1
-  services:
-    - docker:dind
-  variables:
-    DOCKER_DRIVER: overlay2
-    DOCKER_HOST: tcp://docker:2375/
-
-  # Run this on master and tags except base tags and schedules
-  rules:
-    - if: '$CI_PIPELINE_SOURCE == "schedule"'
-      when: never
-    - if: '$CI_COMMIT_BRANCH == "master"'
-      when: on_success
-    - if: '$CI_COMMIT_TAG && $CI_COMMIT_TAG !~ /^base-.*/'
-      when: on_success
-    - when: never
-
-  script:
-    - ci/build.sh Dockerfile.binary "-binary"
+    - ci/build.sh

 # Make sure arkindex is always compatible with Nuitka
 backend-build-binary:

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,6 +10,12 @@ repos:
        additional_dependencies:
          - 'flake8-copyright==0.2.2'
          - 'flake8-debugger==3.1.0'
+          - 'flake8-quotes==3.3.2'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.11
+    hooks:
+      - id: ruff
+        args: [--fix]
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.1.0
    hooks:

--- a/Dockerfile
+++ b/Dockerfile
@@ -6,19 +6,6 @@ ADD . build
 RUN cd build && python3 setup.py sdist

 FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia
-ARG LICENSE_BRANCH=master
-ARG LICENSE_ID=37
-
-# Auth token expires on 01/07/2024
-ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa"
-
-# Install teklia-license from private repo
-RUN \
-  mkdir /tmp/teklia-license && \
-  wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
-  tar --strip-components=1 -xvf /tmp/teklia-license.tar.gz -C /tmp/teklia-license && \
-  cd /tmp/teklia-license && pip install --disable-pip-version-check --no-cache-dir --quiet . && \
-  rm -rf /tmp/teklia-license

 # Install arkindex and its deps
 # Uses a source archive instead of full local copy to speedup docker build
@@ -32,10 +19,13 @@ RUN chown -R ark:teklia /backend_static
 # Copy Version file
 COPY VERSION /etc/arkindex.version

-HEALTHCHECK --start-period=1m --interval=1m --timeout=5s \
-  CMD wget --spider --quiet http://localhost/api/v1/public-key/ || exit 1
+ENV PORT 8000
+HEALTHCHECK --start-period=10s --interval=30s --timeout=5s \
+  CMD wget --spider --quiet http://localhost:$PORT/api/v1/corpus/ || exit 1
+
+# Allow usage of django-admin by exposing our settings
+ENV DJANGO_SETTINGS_MODULE "arkindex.project.settings"

 # Run with Gunicorn
-ENV PORT 8000
 EXPOSE $PORT
-CMD manage.py gunicorn --host=0.0.0.0 --port $PORT
+CMD arkindex gunicorn --host=0.0.0.0 --port $PORT
--- a/Dockerfile.binary
+++ b/Dockerfile.binary
-# syntax=docker/dockerfile:1
-FROM python:3.10-slim-bookworm AS compilation
-
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential wget
-
-RUN pip install nuitka
-
-ARG LICENSE_BRANCH=master
-ARG LICENSE_ID=37
-
-# Auth token expires on 01/07/2024
-ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa"
-
-# We build in /usr/share because Django will try to load some files relative to that path
-# once executed in the binary (management commands, ...)
-WORKDIR /usr/share
-
-# Add our own source code
-ADD arkindex /usr/share/arkindex
-ADD base/requirements.txt /tmp/requirements-base-arkindex.txt
-ADD requirements.txt /tmp/requirements-arkindex.txt
-
-# Install teklia-license from private repo
-RUN \
-  mkdir /tmp/teklia-license && \
-  wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
-  tar --strip-components=1 -xvf /tmp/teklia-license.tar.gz -C /tmp/teklia-license && \
-  mv /tmp/teklia-license/teklia_license /usr/share && \
-  cp /tmp/teklia-license/requirements.txt /tmp/requirements-license-arkindex.txt
-
-# Build full requirements, removing relative or remote references to arkindex projects
-RUN cat /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|teklia-license' > /requirements.txt
-
-# List all management commands
-RUN find /usr/share/arkindex/*/management -name '*.py' -not -name '__init__.py' > /commands.txt
-
-# Remove arkindex unit tests
-RUN find /usr/share/arkindex -type d -name tests | xargs rm -rf
-
-# This configuration is needed to avoid a compilation crash at linking stage
-# It only seems to happen on recent gcc
-# See https://github.com/Nuitka/Nuitka/issues/959
-ENV NUITKA_RESOURCE_MODE=linker
-
-# Compile all our python source code
-# Do not use the -O or -OO python flags here as it removes assert statements (see backend#432)
-RUN python -m nuitka \
-      --nofollow-imports \
-      --include-package=arkindex \
-      --include-package=teklia_license \
-      --show-progress \
-      --lto=yes \
-      --output-dir=/build \
-      arkindex/manage.py
-
-# Start over from a clean setup
-FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia as build
-
-# Import files from compilation
-RUN mkdir /usr/share/arkindex
-COPY --from=compilation /build/manage.bin /usr/bin/arkindex
-COPY --from=compilation /requirements.txt /usr/share/arkindex
-COPY --from=compilation /commands.txt /usr/share/arkindex
-
-# Install open source Python dependencies
-# We also add gunicorn, to be able to run `arkindex gunicorn`
-RUN pip install -r /usr/share/arkindex/requirements.txt gunicorn
-
-# Setup Arkindex VERSION
-COPY VERSION /etc/arkindex.version
-
-# Copy templates in base dir for binary
-ENV BASE_DIR=/usr/share/arkindex
-COPY arkindex/templates /usr/share/arkindex/templates
-COPY arkindex/documents/export/*.sql /usr/share/arkindex/documents/export/
-
-# Touch python files for needed management commands
-# Otherwise Django will not load the compiled module
-RUN for cmd in $(cat /usr/share/arkindex/commands.txt); do mkdir -p $(dirname $cmd); touch $cmd; done
-
-HEALTHCHECK --start-period=1m --start-interval=1s --interval=1m --timeout=5s \
-  CMD wget --spider --quiet http://localhost/api/v1/public-key/ || exit 1
-
-# Run gunicorn server
-ENV PORT=80
-EXPOSE $PORT
-CMD arkindex gunicorn --host=0.0.0.0 --port $PORT
--- a/LICENSE
+++ b/LICENSE
--- a/MANIFEST.in
+++ b/MANIFEST.in
 include VERSION
+include LICENSE
 include requirements.txt
 include base/requirements.txt
 include tests-requirements.txt

--- a/Makefile
+++ b/Makefile
 ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 IMAGE_TAG=registry.gitlab.teklia.com/arkindex/backend

-.PHONY: all release
+.PHONY: all release services

 all: clean build

@@ -13,39 +13,36 @@ clean:
 	find . -name '*.pyc' -exec rm {} \;

 build:
-	CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh Dockerfile
-
-binary:
-	CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh Dockerfile.binary -binary
+	CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh

 worker:
-	arkindex/manage.py rqworker -v 2 default high
+	arkindex rqworker -v 2 default high tasks

 test-fixtures:
 	$(eval export PGPASSWORD=devdata)
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_dev RENAME TO arkindex_tmp_fixtures' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'CREATE DATABASE arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_dev RENAME TO arkindex_tmp_fixtures' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'CREATE DATABASE arkindex_dev' template1
 	# A "try...finally" block in a Makefile: ensure we bring back the dev database even when test-fixtures fails
 	-$(MAKE) test-fixtures-run
 	$(MAKE) test-fixtures-restore

 test-fixtures-run:
-	arkindex/manage.py migrate
-	arkindex/manage.py build_fixtures
-	arkindex/manage.py dumpdata --indent 4 process documents images users auth ponos training > arkindex/documents/fixtures/data.json
+	arkindex migrate
+	arkindex build_fixtures
+	arkindex dumpdata --indent 4 process documents images users auth ponos training > arkindex/documents/fixtures/data.json

 test-fixtures-restore:
 	# This first renaming ensures that arkindex_tmp_fixtures exists; we don't want to drop arkindex_dev without a backup
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_tmp_fixtures RENAME TO arkindex_dev_replace' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'DROP DATABASE arkindex_dev' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_dev_replace RENAME TO arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_tmp_fixtures RENAME TO arkindex_dev_replace' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'DROP DATABASE arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_dev_replace RENAME TO arkindex_dev' template1

 require-version:
 	@if [ ! "$(version)" ]; then echo "Missing version to publish"; exit 1; fi
 	@git rev-parse $(version) >/dev/null 2>&1 && (echo "Version $(version) already exists on local git repo !" && exit 1) || true

 schema:
-	./arkindex/manage.py spectacular --fail-on-warn --validate --file schema.yml
+	arkindex spectacular --fail-on-warn --validate --file schema.yml

 release:
 	$(eval version:=$(shell cat VERSION))
@@ -53,3 +50,21 @@ release:
 	git commit VERSION -m "Version $(version)"
 	git tag $(version)
 	git push origin master $(version)
+
+clean-docker:
+	$(eval containers:=$(shell docker ps -a -q))
+	@if [ -n "$(containers)" ]; then \
+		echo "Cleaning up past containers\n" \
+		docker rm -f $(containers) ; \
+	fi
+
+stack: docker/ssl/ark-cert.pem
+	docker compose -p arkindex up --build
+
+services: docker/ssl/ark-cert.pem
+	docker compose -p arkindex -f docker/docker-compose.services.yml up
+
+docker/ssl/ark-cert.pem:
+	$(eval export CAROOT=$(ROOT_DIR)/docker/ssl)
+	mkcert -install
+	mkcert -cert-file=$(ROOT_DIR)/docker/ssl/ark-cert.pem -key-file=$(ROOT_DIR)/docker/ssl/ark-key.pem ark.localhost *.ark.localhost *.iiif.ark.localhost
--- a/README.md
+++ b/README.md
-Backend for Historical Manuscripts Indexing
-===========================================
+# Arkindex Backend

 [![pipeline status](https://gitlab.teklia.com/arkindex/backend/badges/master/pipeline.svg)](https://gitlab.teklia.com/arkindex/backend/commits/master)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
+
+This project is the open-source backend of Arkindex, used to manage and process image documents with Machine Learning tools.
+
+It is licensed under the [AGPL-v3 license](./LICENSE).

 ## Requirements

-* Clone of the [architecture](https://gitlab.teklia.com/arkindex/architecture)
 * Git
 * Make
-* Python 3.6+
+* Python 3.10+
 * pip
 * [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/)
+* [Docker 24+](https://docs.docker.com/engine/install/#supported-platforms)
+* [mkcert](https://github.com/FiloSottile/mkcert?tab=readme-ov-file#installation)
+* [GeoDjango system dependencies](https://docs.djangoproject.com/en/3.1/ref/contrib/gis/install/geolibs/): `sudo apt install binutils libproj-dev gdal-bin`

-## Dev Setup
+## Setup for developers

-```
+You'll also need the [Arkindex frontend](https://gitlab.teklia.com/arkindex/frontend) to be able to develop on the whole platform.
+
+```console
 git clone git@gitlab.teklia.com:arkindex/backend.git
+git clone git@gitlab.teklia.com:arkindex/frontend.git
 cd backend
-mkvirtualenv ark -a .
+mkvirtualenv ark -a . -p /usr/bin/python3.10
 pip install -e .[test]
 ```

-When the [architecture](https://gitlab.teklia.com/arkindex/architecture) is running locally to provide required services:
+The Arkindex backend relies on some open-source services to store data and communicate to asynchronous workers.
+To run all the required services, please run in a dedicated shell:

-```
-arkindex/manage.py migrate
-arkindex/manage.py createsuperuser
+```console
+make services
 ```

-### Local configuration
+On a first run, you'll need to:

-For development purposes, you can customize the Arkindex settings by adding a YAML file as `arkindex/config.yml`. This file is not tracked by Git; if it exists, any configuration directive set in this file will be used for exposed settings from `settings.py`. You can view the full list of settings [on the wiki](https://wiki.vpn/en/arkindex/deploy/configuration).
+1. Configure the instance by enabling the sample configuration.
+2. Populate the database structure.
+3. Initialize some fields in the database.
+4. Create an administration account.

+All of these steps are done through:

-Another mean to customize your Arkindex instance is to add a Python file in `arkindex/project/local_settings.py`. Here you are not limited to exposed settings, and can customize any setting, or even load Python dependencies at boot time. This is not recommended, as your customization may not be available to real-world Arkindex instances.
+```console
+cp config.yml.sample arkindex/config.yml
+arkindex migrate
+arkindex bootstrap
+arkindex createsuperuser
+```

-### ImageMagick setup
+Finally, you can run the backend:

-PDF and image imports in Arkindex will require ImageMagick. Due to its ability to take any computer down if you give it the right parameters (for example, converting a 1000-page PDF file into JPEG files at 30 000 DPI), it has a security policy file. By default, on Ubuntu, PDF conversion is forbidden.
+```console
+arkindex runserver
+```

-You will need to edit the ImageMagick policy file to get PDF and Image imports to work in Arkindex. The file is located at `/etc/ImageMagick-6/policy.xml`.
+At this stage, you can use `http://localhost:8000/admin` to access the administration interface.

-The line that sets the PDF policy is `<policy domain="coder" rights="none" pattern="PDF" />`. Replace `none` with `read|write` for it to work. See [this StackOverflow question](https://stackoverflow.com/questions/52998331) for more info.
+### Asycnhronous tasks

-### GitLab OAuth setup
+To run asynchronous tasks, run in another shell:

-Arkindex uses OAuth to let a user connect their GitLab account(s) and register Git repositories. In local development, you will need to register Arkindex as a GitLab OAuth application for it to work.
+```console
+make worker
+```

-Go to GitLab's [Applications settings](https://gitlab.teklia.com/profile/applications) and create a new application with the `api` scope and add the following callback URIs:
+### Dockerized stack

-```
-http://127.0.0.1:8000/api/v1/oauth/providers/gitlab/callback/
-http://ark.localhost:8000/api/v1/oauth/providers/gitlab/callback/
-https://ark.localhost/api/v1/oauth/providers/gitlab/callback/
-```
+It is also possible to run the whole Arkindex stack through Docker containers. This is useful to quickly test the platform.

-Once the application is created, GitLab will provide you with an application ID and a secret. Use the `arkindex/config.yml` file to set them:
+This command will build all the required Docker images (backend & frontend) and run them as Docker containers:

-```yaml
-gitlab:
-  app_id: 24cacf5004bf68ae9daad19a5bba391d85ad1cb0b31366e89aec86fad0ab16cb
-  app_secret: 9d96d9d5b1addd7e7e6119a23b1e5b5f68545312bfecb21d1cdc6af22b8628b8
+```console
+make stack
 ```

+You'll be able to access the platform at the url `https://ark.localhost`.
+
+### Local configuration
+
+For development purposes, you can customize the Arkindex settings by adding a YAML file as `arkindex/config.yml`. This file is not tracked by Git; if it exists, any configuration directive set in this file will be used for exposed settings from `settings.py`. You can view the full list of settings [on the wiki](https://redmine.teklia.com/projects/arkindex/wiki/Backend_configuration).
+
+Another way to customize your Arkindex instance is to add a Python file in `arkindex/project/local_settings.py`. Here you are not limited to exposed settings, and can customize any setting, or even load Python dependencies at boot time. This is not recommended, as your customization may not be available to real-world Arkindex instances.
+
 ### Local image server

 Arkindex splits up image URLs in their image server and the image path. For example, a IIIF server at `http://iiif.irht.cnrs.fr/iiif/` and an image at `/Paris/JJ042/1.jpg` would be represented as an ImageServer instance holding one Image. Since Arkindex has a local IIIF server for image uploads and thumbnails, a special instance of ImageServer is required to point to this local server. In local development, this server should be available at `https://ark.localhost/iiif`. You will therefore need to create an ImageServer via the Django admin or the Django shell with this URL. To set the local server ID, you can add a custom setting in `arkindex/config.yml`:
@@ -73,19 +97,14 @@ local_imageserver_id: 999

 Here is how to quickly create the ImageServer using the shell:

-```
-backend/arkindex$ ./manage.py shell
+```python
+$ arkindex shell
 >>> from arkindex.images.models import ImageServer
 >>> ImageServer.objects.create(id=1, display_name='local', url='https://ark.localhost/iiif')
 ```

 Note that this local server will only work inside Docker.

-### User groups
-
-We use a custom group model in `arkindex.users.models` (not the `django.contrib.auth` one).
-In this early version groups do not define any right yet.
-
 ## Usage

 ### Makefile
@@ -95,31 +114,28 @@ At the root of the repository is a Makefile that provides commands for common op
 * `make` or `make all`: Clean and build;
 * `make base`: Create and push the `arkindex-base` Docker image that is used to build the `arkindex-app` image;
 * `make clean`: Cleanup the Python package build and cache files;
+* `make clean-docker`: Deletes all running containers to avoid naming and network ports conflicts;
 * `make build`: Build the arkindex Python package and recreate the `arkindex-app:latest` without pushing to the GitLab container registry;
 * `make test-fixtures`: Create the unit tests fixtures on a temporary PostgreSQL database and save them to the `data.json` file used by most Django unit tests.

 ### Django commands

-Aside from the usual Django commands, some custom commands are available via `manage.py`:
+Aside from the usual Django commands, some custom commands are available via `arkindex`:

-* `build_fixtures`: Create a set of database elements designed for use by unit tests in a fixture (see `make test-fixtures`);
-* `from_csv`: Import manifests and index files from a CSV list;
-* `import_annotations`: Import index files from a folder into a specific volume;
-* `import_acts`: Import XML surface files and CSV act files;
-* `delete_corpus`: Delete a big corpus using an RQ task;
-* `reindex`: Reindex elements into Solr;
-* `telegraf`: A special command with InfluxDB-compatible output for Grafana statistics.
-* `move_lines_to_parents`: Moves element children to their geographical parents;
+* `build_fixtures`: Create a set of database elements designed for use by unit tests in a fixture (see `make test-fixtures`).
+* `delete_corpus`: Delete a big corpus using an RQ task.
+* `reindex`: Reindex elements into Solr.
+* `move_lines_to_parents`: Moves element children to their geographical parents.

-See `manage.py <command> --help` to view more details about a specific command.
+See `arkindex <command> --help` to view more details about a specific command.

 ## Code validation

 Once your code appears to be working on a local server, a few checks have to be performed:

-* **Migrations:** Ensure that all migrations have been created by typing `./manage.py makemigrations`.
-* **Unit tests:** Run `./manage.py test` to perform unit tests.
-   - Use `./manage.py test module_name` to perform tests on a single module, if you wish to spend less time waiting for all tests to complete.
+* **Migrations:** Ensure that all migrations have been created by typing `arkindex makemigrations`.
+* **Unit tests:** Run `arkindex test` to perform unit tests.
+   - Use `arkindex test module_name` to perform tests on a single module, if you wish to spend less time waiting for all tests to complete.

 ### Linting

@@ -127,9 +143,9 @@ We use [pre-commit](https://pre-commit.com/) to check the Python source code syn

 To be efficient, you should run pre-commit before committing (hence the name...).

-To do that, run once :
+To do that, run once:

-```
+```console
 pip install pre-commit
 pre-commit install
 ```
@@ -142,13 +158,13 @@ If you want to run the full workflow on all the files: `pre-commit run -a`.

 Run `pip install ipython django-debug-toolbar django_extensions` to install all the available optional dev tools for the backend.

-IPython will give you a nicer shell with syntax highlighting, auto reloading and much more via `./manage.py shell`.
+IPython will give you a nicer shell with syntax highlighting, auto reloading and much more via `arkindex shell`.

 [Django Debug Toolbar](https://django-debug-toolbar.readthedocs.io/en/latest/) provides you with a neat debug sidebar that will help diagnosing slow API endpoints or weird template bugs. Since the Arkindex frontend is completely decoupled from the backend, you will need to browse to an API endpoint to see the debug toolbar.

-[Django Extensions](https://django-extensions.readthedocs.io/en/latest/) adds a *lot* of `manage.py` commands ; the most important one is `./manage.py shell_plus` which runs the usual shell but with all the available models pre-imported. You can add your own imports with the `local_settings.py` file. Here is an example that imports most of the backend's enums and some special QuerySet features:
+[Django Extensions](https://django-extensions.readthedocs.io/en/latest/) adds a *lot* of `arkindex` commands ; the most important one is `arkindex shell_plus` which runs the usual shell but with all the available models pre-imported. You can add your own imports with the `local_settings.py` file. Here is an example that imports some of the backend's enums and some special QuerySet features:

-``` python
+```python
 SHELL_PLUS_POST_IMPORTS = [
    ('django.db.models', ('Value', )),
    ('django.db.models.functions', '*'),
@@ -157,31 +173,43 @@ SHELL_PLUS_POST_IMPORTS = [
        'Right',
    )),
    ('arkindex.process.models', (
-        'DataImportMode',
+        'ProcessMode',
    )),
    ('arkindex.project.aws', (
        'S3FileStatus',
-    )),
-    ('arkindex.users.models', (
-        'OAuthStatus',
    ))
 ]
 ```

 ## Asynchronous tasks

-We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org/project/django-rq/), to run tasks without blocking an API request or causing timeouts. To call them in Python code, you should use the trigger methods in `arkindex.project.triggers`; those will do some safety checks to make catching some errors easier in dev. The actual tasks are in `arkindex.documents.tasks`. The following tasks exist:
+We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org/project/django-rq/), to run tasks without blocking an API request or causing timeouts. To call them in Python code, you should use the trigger methods in `arkindex.project.triggers`; those will do some safety checks to make catching some errors easier in dev. The actual tasks are in `arkindex.documents.tasks`, or in other `tasks` modules within each Django app. The following tasks exist:

 * Delete a corpus: `corpus_delete`
 * Delete a list of elements: `element_trash`
 * Delete worker results (transcriptions, classifications, etc. of a worker version): `worker_results_delete`
 * Move an element to another parent: `move_element`
-* Create `WorkerActivity` instances for all elements of a process: `intitialize_activity`
+* Create `WorkerActivity` instances for all elements of a process: `initialize_activity`
 * Delete a process and its worker activities: `process_delete`
 * Export a corpus to an SQLite database: `export_corpus`

-To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make slim` or `make` in the architecture will provide it. `make` in the architecture also provides a RQ worker running in Docker from a binary build.
+To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make services` will provide it. `make stack` also provides an RQ worker running in Docker from a binary build.

 ## Metrics
+
 The application serves metrics for Prometheus under the `/metrics` prefix.
 A specific port can be used by setting the `PROMETHEUS_METRICS_PORT` environment variable, thus separating the application from the metrics API.
+
+## Migration from `architecture` setup
+
+If you were using the `architecture` repository previously to run Arkindex, you'll need to migrate MinIO data from a static path on your computer towards a new docker volume.
+
+```console
+docker volume create arkindex_miniodata
+mv /usr/share/arkindex/s3/data/iiif /var/lib/docker/volumes/arkindex_miniodata/_data/uploads
+mv /usr/share/arkindex/s3/data/{export,iiif-cache,ponos-logs,ponos-artifacts,staging,thumbnails,training} /var/lib/docker/volumes/arkindex_miniodata/_data/
+```
+
+You will also need to setup [mkcert](https://github.com/FiloSottile/mkcert?tab=readme-ov-file#installation) as we do not use Teklia development Certificate Authority anymore. `mkcert` will take care of SSL certificates automatically, updating your browsers and system certificate store !
+
+Finally, you can remove the `architecture` project from your work folder, as it's now archived and could be confusing.
--- a/VERSION
+++ b/VERSION
-1.5.3-rc2
+1.6.0
--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -11,8 +11,6 @@ from arkindex.documents.models import (
    Element,
    ElementType,
    Entity,
-    EntityLink,
-    EntityRole,
    EntityType,
    MetaData,
    MLClass,
@@ -22,8 +20,8 @@ from arkindex.documents.models import (

 class ElementTypeInline(admin.TabularInline):
    model = ElementType
-    fields = ('slug', 'display_name', 'folder', 'indexable')
-    readonly_fields = ('slug', 'display_name', 'folder')
+    fields = ("slug", "display_name", "folder", "indexable")
+    readonly_fields = ("slug", "display_name", "folder")

    def has_add_permission(self, request, obj=None):
        return False
@@ -33,66 +31,66 @@ class ElementTypeInline(admin.TabularInline):


 class CorpusAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'public', 'top_level_type', 'created')
-    search_fields = ('name', )
+    list_display = ("id", "name", "public", "top_level_type", "created")
+    search_fields = ("name", )
    inlines = (ElementTypeInline, )
-    ordering = ('-created', )
+    ordering = ("-created", )

    def has_delete_permission(self, request, obj=None):
        # Require everyone to use the asynchronous corpus deletion
        return False

    def get_queryset(self, request):
-        return super().get_queryset(request).select_related('top_level_type')
+        return super().get_queryset(request).select_related("top_level_type")

    def get_form(self, request, obj=None, **kwargs):
        form = super().get_form(request, obj, **kwargs)
        if obj:
            # Limit top level type queryset to the types of this corpus
-            form.base_fields['top_level_type'].queryset = ElementType.objects.filter(corpus=obj)
+            form.base_fields["top_level_type"].queryset = ElementType.objects.filter(corpus=obj)
        return form


 class CorpusExportAdmin(admin.ModelAdmin):
-    list_display = ('id', 'corpus', 'user', 'state')
-    ordering = ('-created', )
+    list_display = ("id", "corpus", "user", "state")
+    ordering = ("-created", )


 class ClassificationInline(admin.TabularInline):
    model = Classification
-    readonly_fields = ('confidence', 'high_confidence', )
-    raw_id_fields = ('worker_version', 'worker_run', 'moderator', 'ml_class')
+    readonly_fields = ("confidence", "high_confidence", )
+    raw_id_fields = ("worker_version", "worker_run", "moderator", "ml_class")


 class AllowedMetaDataAdmin(admin.ModelAdmin):
-    list_display = ('id', 'corpus', 'type', 'name')
-    readonly_fields = ('id', )
+    list_display = ("id", "corpus", "type", "name")
+    readonly_fields = ("id", )

    def get_form(self, *args, **kwargs):
        form = super().get_form(*args, **kwargs)
-        form.base_fields['corpus'].queryset = Corpus.objects.order_by('name', 'id')
+        form.base_fields["corpus"].queryset = Corpus.objects.order_by("name", "id")
        return form


 class MetaDataAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'type', )
-    list_filter = [('type', EnumFieldListFilter), ]
-    readonly_fields = ('id', )
-    raw_id_fields = ('element', 'entity', 'worker_version', 'worker_run')
+    list_display = ("id", "name", "type", )
+    list_filter = [("type", EnumFieldListFilter), ]
+    readonly_fields = ("id", )
+    raw_id_fields = ("element", "entity", "worker_version", "worker_run")


 class MetaDataInline(admin.TabularInline):
    model = MetaData
-    raw_id_fields = ('entity', 'worker_version', 'worker_run')
+    raw_id_fields = ("entity", "worker_version", "worker_run")


 class ElementAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'type', 'corpus', )
-    list_filter = ['type__slug', 'corpus']
-    fields = ('id', 'type', 'name', 'image', 'corpus', 'confidence', 'worker_run', )
-    raw_id_fields = ('worker_run',)
-    readonly_fields = ('id', 'corpus', 'image',)
-    search_fields = ('name', )
+    list_display = ("id", "name", "type", "corpus", )
+    list_filter = ["type__slug", "corpus"]
+    fields = ("id", "type", "name", "image", "corpus", "confidence", "worker_run", )
+    raw_id_fields = ("worker_run",)
+    readonly_fields = ("id", "corpus", "image",)
+    search_fields = ("name", )
    inlines = (MetaDataInline, ClassificationInline)

    # Disable element creation through the admin to prevent element_type conflicts
@@ -100,34 +98,34 @@ class ElementAdmin(admin.ModelAdmin):
        return False

    def formfield_for_foreignkey(self, db_field, request, **kwargs):
-        if db_field.name == 'type':
+        if db_field.name == "type":
            # Only display types available on the element's corpus
            # It would make no sense to pick a type from another corpus
            # Display all elements on creation, as we do not know yet the corpus
-            element_id = request.resolver_match.kwargs.get('object_id')
+            element_id = request.resolver_match.kwargs.get("object_id")
            if element_id:
                element = self.get_object(request, element_id)
-                kwargs['queryset'] = ElementType.objects.filter(corpus=element.corpus).order_by('display_name')
+                kwargs["queryset"] = ElementType.objects.filter(corpus=element.corpus).order_by("display_name")

        return super().formfield_for_foreignkey(db_field, request, **kwargs)


 class TranscriptionAdmin(admin.ModelAdmin):
-    list_display = ('id', 'text', 'confidence', 'orientation', 'element', )
-    fields = ('id', 'text', 'confidence', 'orientation', 'element', )
-    readonly_fields = ('id', )
-    raw_id_fields = ('element', )
+    list_display = ("id", "text", "confidence", "orientation", "element", )
+    fields = ("id", "text", "confidence", "orientation", "element", )
+    readonly_fields = ("id", )
+    raw_id_fields = ("element", )


 class MLClassAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'corpus')
-    list_filter = ('corpus',)
-    search_fields = ('name',)
-    fields = ('name', 'corpus')
+    list_display = ("id", "name", "corpus")
+    list_filter = ("corpus",)
+    search_fields = ("name",)
+    fields = ("name", "corpus")

    def get_form(self, *args, **kwargs):
        form = super().get_form(*args, **kwargs)
-        form.base_fields['corpus'].queryset = Corpus.objects.order_by('name', 'id')
+        form.base_fields["corpus"].queryset = Corpus.objects.order_by("name", "id")
        return form


@@ -135,39 +133,25 @@ class EntityMetaForm(forms.ModelForm):
    metas = HStoreFormField()


-class EntityLinkInLine(admin.TabularInline):
-    model = EntityLink
-    fk_name = 'parent'
-    raw_id_fields = ('child', )
-
-
 class EntityAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'type')
-    list_filter = ['corpus', 'type']
-    readonly_fields = ('id', )
-    raw_id_fields = ('worker_version', 'worker_run', )
-    search_fields = ('name', )
-    inlines = (EntityLinkInLine, )
+    list_display = ("id", "name", "type")
+    list_filter = ["corpus", "type"]
+    readonly_fields = ("id", )
+    raw_id_fields = ("worker_version", "worker_run", )
+    search_fields = ("name", )
    form = EntityMetaForm


-class EntityRoleAdmin(admin.ModelAdmin):
-    list_display = ('id', 'corpus', 'parent_name', 'child_name')
-    list_filter = ('corpus', )
-    readonly_fields = ('id', )
-    ordering = ('corpus', 'parent_name', 'child_name')
-
-
 class EntityTypeAdmin(admin.ModelAdmin):
-    list_display = ('id', 'corpus', 'name', 'color')
-    list_filter = ('corpus', )
+    list_display = ("id", "corpus", "name", "color")
+    list_filter = ("corpus", )

    def get_readonly_fields(self, request, obj=None):
        # Make the corpus field read-only only for existing entity types.
        # Otherwise, new EntityTypes would be created with corpus=None
        if obj:
-            return ('id', 'corpus')
-        return ('id', )
+            return ("id", "corpus")
+        return ("id", )

    def has_delete_permission(self, request, obj=None):
        # Require everyone to use the frontend or DestroyEntityType
@@ -180,7 +164,6 @@ admin.site.register(Transcription, TranscriptionAdmin)
 admin.site.register(MLClass, MLClassAdmin)
 admin.site.register(MetaData, MetaDataAdmin)
 admin.site.register(Entity, EntityAdmin)
-admin.site.register(EntityRole, EntityRoleAdmin)
 admin.site.register(EntityType, EntityTypeAdmin)
 admin.site.register(AllowedMetaData, AllowedMetaDataAdmin)
 admin.site.register(CorpusExport, CorpusExportAdmin)
--- a/arkindex/documents/api/elements.py
+++ b/arkindex/documents/api/elements.py
--- a/arkindex/documents/api/entities.py
+++ b/arkindex/documents/api/entities.py
--- a/arkindex/documents/api/export.py
+++ b/arkindex/documents/api/export.py
@@ -2,93 +2,119 @@ from datetime import timedelta
 from textwrap import dedent

 from django.conf import settings
+from django.shortcuts import get_object_or_404
 from django.utils import timezone
+from django.utils.functional import cached_property
 from drf_spectacular.utils import extend_schema, extend_schema_view
-from rest_framework import serializers, status
-from rest_framework.exceptions import ValidationError
-from rest_framework.generics import ListCreateAPIView, RetrieveAPIView
+from rest_framework import permissions, serializers, status
+from rest_framework.exceptions import PermissionDenied, ValidationError
+from rest_framework.generics import ListCreateAPIView, RetrieveDestroyAPIView
 from rest_framework.response import Response

 from arkindex.documents.models import Corpus, CorpusExport, CorpusExportState
 from arkindex.documents.serializers.export import CorpusExportSerializer
-from arkindex.project.mixins import CorpusACLMixin
 from arkindex.project.permissions import IsVerified
-from arkindex.users.models import Role


-@extend_schema(tags=['exports'])
+@extend_schema(tags=["exports"])
 @extend_schema_view(
    get=extend_schema(
-        operation_id='ListExports',
+        operation_id="ListExports",
        description=(
-            'List all exports on a corpus.\n\n'
-            'Guest access is required on private corpora.'
+            "List all exports on a corpus.\n\n"
+            "Guest access is required on private corpora."
        ),
    ),
    post=extend_schema(
-        operation_id='StartExport',
-        request=None,
+        operation_id="StartExport",
        description=dedent(
            f"""
            Start a corpus export job.

            A user must wait for {settings.EXPORT_TTL_SECONDS} seconds after the last successful import
-            before being able to generate a new export of the same corpus.
+            before being able to generate a new export of the same corpus from the same source.

            Contributor access is required.
            """
        ),
    )
 )
-class CorpusExportAPIView(CorpusACLMixin, ListCreateAPIView):
+class CorpusExportAPIView(ListCreateAPIView):
    permission_classes = (IsVerified, )
    serializer_class = CorpusExportSerializer
    queryset = CorpusExport.objects.none()

+    @cached_property
+    def corpus(self):
+        qs = Corpus.objects.readable(self.request.user)
+        corpus = get_object_or_404(qs, pk=self.kwargs["pk"])
+        if self.request.method not in permissions.SAFE_METHODS and not corpus.is_writable(self.request.user):
+            raise PermissionDenied(detail="You do not have write access to this corpus.")
+        return corpus
+
    def get_queryset(self):
        return CorpusExport \
            .objects \
-            .filter(corpus=self.get_corpus(self.kwargs['pk'])) \
-            .select_related('user') \
-            .order_by('-created')
-
-    def post(self, *args, **kwargs):
-        corpus = self.get_corpus(self.kwargs['pk'], role=Role.Contributor)
-
-        if corpus.exports.filter(state__in=(CorpusExportState.Created, CorpusExportState.Running)).exists():
-            raise ValidationError('An export is already running for this corpus.')
+            .filter(corpus=self.corpus) \
+            .select_related("user") \
+            .order_by("-created")

-        available_exports = corpus.exports.filter(
-            state=CorpusExportState.Done,
-            created__gte=timezone.now() - timedelta(seconds=settings.EXPORT_TTL_SECONDS)
-        )
-        if available_exports.exists():
-            raise ValidationError(f'An export has already been made for this corpus in the last {settings.EXPORT_TTL_SECONDS} seconds.')
+    def get_serializer_context(self):
+        context = super().get_serializer_context()
+        context["corpus"] = self.corpus
+        return context

-        export = corpus.exports.create(user=self.request.user)
-        export.start()
-        return Response(CorpusExportSerializer(export).data, status=status.HTTP_201_CREATED)

+@extend_schema(
+    tags=["exports"],
+)
+@extend_schema_view(
+    get=extend_schema(
+        operation_id="DownloadExport",
+        description=dedent(
+            """
+            Download a corpus export.

-class DownloadExport(RetrieveAPIView):
-    """
-    Download a corpus export.
+            Guest access is required on private corpora.
+            """
+        ),
+        responses={302: serializers.Serializer},
+    ),
+    delete=extend_schema(
+        operation_id="DestroyExport",
+        description=dedent(
+            """
+            Delete a corpus export.

-    Guest access is required on private corpora.
-    """
+            Requires either an admin access to the corpus, or for the user to be
+            the export's creator and have contributor access to the corpus.
+            """
+        )
+    )
+)
+class ManageExport(RetrieveDestroyAPIView):
    queryset = CorpusExport.objects.none()
    permission_classes = (IsVerified, )
+    serializer_class = CorpusExportSerializer

    def get_queryset(self):
-        return CorpusExport.objects.filter(
+        states = {CorpusExportState.Done}
+        if self.request.method not in permissions.SAFE_METHODS:
+            states.add(CorpusExportState.Running)
+        return CorpusExport.objects.select_related("corpus").filter(
            corpus__in=Corpus.objects.readable(self.request.user),
-            state=CorpusExportState.Done
-        ).only('id')
+            state__in=states
+        )

-    @extend_schema(
-        operation_id='DownloadExport',
-        tags=['exports'],
-        responses={302: serializers.Serializer},
-    )
    def get(self, *args, **kwargs):
-        return Response(status=status.HTTP_302_FOUND, headers={'Location': self.get_object().s3_url})
+        return Response(status=status.HTTP_302_FOUND, headers={"Location": self.get_object().s3_url})
+
+    def check_object_permissions(self, request, obj):
+        super().check_object_permissions(request, obj)
+        if request.method in permissions.SAFE_METHODS:
+            return
+        if not obj.is_deletable(request.user):
+            raise PermissionDenied(detail="You do not have sufficient rights to delete this export.")
+        # Allow deleting running exports if they have not been updated in longer than EXPORT_TTL_SECONDS (not actually still running)
+        if obj.state == CorpusExportState.Running and obj.updated + timedelta(seconds=settings.EXPORT_TTL_SECONDS) > timezone.now():
+            raise ValidationError("You cannot delete an export that is still running.")
--- a/arkindex/documents/api/iiif.py
+++ b/arkindex/documents/api/iiif.py
@@ -21,9 +21,9 @@ class FolderManifest(RetrieveAPIView):

    @method_decorator(cache_page(3600))
    @extend_schema(
-        operation_id='RetrieveFolderManifest',
-        responses={200: {'type': 'object'}},
-        tags=['iiif'],
+        operation_id="RetrieveFolderManifest",
+        responses={200: {"type": "object"}},
+        tags=["iiif"],
    )
    def get(self, *args, **kwargs):
        return super().get(*args, **kwargs)
@@ -45,9 +45,9 @@ class ElementAnnotationList(RetrieveAPIView):

    @method_decorator(cache_page(3600))
    @extend_schema(
-        operation_id='RetrieveElementAnnotationList',
-        responses={200: {'type': 'object'}},
-        tags=['iiif'],
+        operation_id="RetrieveElementAnnotationList",
+        responses={200: {"type": "object"}},
+        tags=["iiif"],
    )
    def get(self, *args, **kwargs):
        return super().get(*args, **kwargs)
--- a/arkindex/documents/api/ml.py
+++ b/arkindex/documents/api/ml.py
--- a/arkindex/documents/api/search.py
+++ b/arkindex/documents/api/search.py
--- a/arkindex/documents/apps.py
+++ b/arkindex/documents/apps.py
@@ -2,7 +2,7 @@ from django.apps import AppConfig


 class DocumentsConfig(AppConfig):
-    name = 'arkindex.documents'
+    name = "arkindex.documents"

    def ready(self):
        from arkindex.documents import signals  # noqa: F401
No results found