From f453ddcde5e4c4c6f5dd4a6b2300fea8f4fedbac Mon Sep 17 00:00:00 2001
From: Bastien Abadie <abadie@teklia.com>
Date: Tue, 12 Mar 2024 13:48:36 +0000
Subject: [PATCH] Boot quickly Arkindex

---
 .dockerignore                                 |   5 +-
 .gitignore                                    |   1 +
 Makefile                                      |  23 ++-
 README.md                                     | 116 +++++++++-----
 .../management/commands/bootstrap.py          | 120 ++++++++------
 arkindex/images/models.py                     |   4 +-
 arkindex/project/aws.py                       |  14 ++
 arkindex/project/config.py                    |   2 +-
 arkindex/project/settings.py                  |   2 +-
 .../tests/config_samples/defaults.yaml        |   2 +-
 arkindex/project/tests/test_aws.py            |  12 ++
 base/Dockerfile                               |   5 -
 base/dev-ca.pem                               |  23 ---
 config.yml.sample                             |  36 +++++
 docker-compose.yml                            |  66 ++++++++
 docker/cantaloupe.properties                  |  43 ++++++
 docker/config.yml                             |  54 +++++++
 docker/docker-compose.services.yml            | 146 ++++++++++++++++++
 docker/ssl/traefik.toml                       |   3 +
 docker/traefik.toml                           |  29 ++++
 20 files changed, 576 insertions(+), 130 deletions(-)
 create mode 100644 arkindex/project/tests/test_aws.py
 delete mode 100644 base/dev-ca.pem
 create mode 100644 config.yml.sample
 create mode 100644 docker-compose.yml
 create mode 100644 docker/cantaloupe.properties
 create mode 100644 docker/config.yml
 create mode 100644 docker/docker-compose.services.yml
 create mode 100644 docker/ssl/traefik.toml
 create mode 100644 docker/traefik.toml

diff --git a/.dockerignore b/.dockerignore
index 83782ddb8a..47fc5cf3ae 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,6 +2,9 @@
 .git
 .eggs
 *.egg
-logs
 **/__pycache__/
 **/*.pyc
+docker/
+Makefile
+test-report.xml
+arkindex/config.yml
diff --git a/.gitignore b/.gitignore
index 7cde2291ad..eb7fbe52d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ htmlcov
 *.key
 arkindex/config.yml
 test-report.xml
+docker/ssl/*.pem
diff --git a/Makefile b/Makefile
index ce25922e98..75c24b64ea 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 IMAGE_TAG=registry.gitlab.teklia.com/arkindex/backend
 
-.PHONY: all release
+.PHONY: all release services
 
 all: clean build
 
@@ -20,8 +20,8 @@ worker:
 
 test-fixtures:
 	$(eval export PGPASSWORD=devdata)
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_dev RENAME TO arkindex_tmp_fixtures' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'CREATE DATABASE arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_dev RENAME TO arkindex_tmp_fixtures' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'CREATE DATABASE arkindex_dev' template1
 	# A "try...finally" block in a Makefile: ensure we bring back the dev database even when test-fixtures fails
 	-$(MAKE) test-fixtures-run
 	$(MAKE) test-fixtures-restore
@@ -33,9 +33,9 @@ test-fixtures-run:
 
 test-fixtures-restore:
 	# This first renaming ensures that arkindex_tmp_fixtures exists; we don't want to drop arkindex_dev without a backup
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_tmp_fixtures RENAME TO arkindex_dev_replace' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'DROP DATABASE arkindex_dev' template1
-	psql -h 127.0.0.1 -p 9100 -U devuser -c 'ALTER DATABASE arkindex_dev_replace RENAME TO arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_tmp_fixtures RENAME TO arkindex_dev_replace' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'DROP DATABASE arkindex_dev' template1
+	psql -h 127.0.0.1 -p 5432 -U devuser -c 'ALTER DATABASE arkindex_dev_replace RENAME TO arkindex_dev' template1
 
 require-version:
 	@if [ ! "$(version)" ]; then echo "Missing version to publish"; exit 1; fi
@@ -50,3 +50,14 @@ release:
 	git commit VERSION -m "Version $(version)"
 	git tag $(version)
 	git push origin master $(version)
+
+stack: docker/ssl/ark-cert.pem
+	docker compose -p arkindex up --build
+
+services: docker/ssl/ark-cert.pem
+	docker compose -p arkindex -f docker/docker-compose.services.yml up
+
+docker/ssl/ark-cert.pem:
+	$(eval export CAROOT=$(ROOT_DIR)/docker/ssl)
+	mkcert -install
+	mkcert -cert-file=$(ROOT_DIR)/docker/ssl/ark-cert.pem -key-file=$(ROOT_DIR)/docker/ssl/ark-key.pem ark.localhost *.ark.localhost *.iiif.ark.localhost
diff --git a/README.md b/README.md
index 7c9bfdcbfd..0e7b72a7d0 100644
--- a/README.md
+++ b/README.md
@@ -6,43 +6,82 @@ Backend for Historical Manuscripts Indexing
 
 ## Requirements
 
-* Clone of the [architecture](https://gitlab.teklia.com/arkindex/architecture)
 * Git
 * Make
-* Python 3.6+
+* Python 3.10+
 * pip
 * [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/)
+* Docker 24+
+* [mkcert](https://github.com/FiloSottile/mkcert?tab=readme-ov-file#installation)
 
-## Dev Setup
+## Setup for developers
 
-```
+You'll also need the [Arkindex frontend](https://gitlab.teklia.com/arkindex/frontend) to be able to develop on the whole platform.
+
+```console
 git clone git@gitlab.teklia.com:arkindex/backend.git
+git clone git@gitlab.teklia.com:arkindex/frontend.git
 cd backend
-mkvirtualenv ark -a .
+mkvirtualenv ark -a . -p /usr/bin/python3.10
 pip install -e .[test]
 ```
 
-When the [architecture](https://gitlab.teklia.com/arkindex/architecture) is running locally to provide required services:
+The Arkindex backend relies on some open-source services to store data and communicate to asynchronous workers.
+To run all the required services, please run in a dedicated shell:
 
+```console
+make services
 ```
+
+On a first run, you'll need to:
+
+1. Configure the instance by enabling the sample configuration.
+2. Populate the database structure.
+3. Initialize some fields in the database.
+4. Create an administration account.
+
+All of these steps are done through:
+
+```console
+cp config.yml.sample arkindex/config.yml
 arkindex migrate
+arkindex bootstrap
 arkindex createsuperuser
 ```
 
-### Local configuration
+Finally, you can run the backend:
+
+```console
+arkindex runserver
+```
+
+At this stage, you can use `http://localhost:8000/admin` to access the administration interface.
+
+### Asycnhronous tasks
+
+To run asynchronous tasks, run in another shell:
+
+```console
+make worker
+```
 
-For development purposes, you can customize the Arkindex settings by adding a YAML file as `arkindex/config.yml`. This file is not tracked by Git; if it exists, any configuration directive set in this file will be used for exposed settings from `settings.py`. You can view the full list of settings [on the wiki](https://wiki.vpn/en/arkindex/deploy/configuration).
+### Dockerized stack
 
+It is also possible to run the whole Arkindex stack through Docker containers. This is useful to quickly test the platform.
 
-Another mean to customize your Arkindex instance is to add a Python file in `arkindex/project/local_settings.py`. Here you are not limited to exposed settings, and can customize any setting, or even load Python dependencies at boot time. This is not recommended, as your customization may not be available to real-world Arkindex instances.
+This command will build all the required Docker images (backend & frontend) and run them as Docker containers:
 
-### ImageMagick setup
+```console
+make stack
+```
+
+You'll be able to access the platform at the url `https://ark.localhost`.
 
-PDF and image imports in Arkindex will require ImageMagick. Due to its ability to take any computer down if you give it the right parameters (for example, converting a 1000-page PDF file into JPEG files at 30 000 DPI), it has a security policy file. By default, on Ubuntu, PDF conversion is forbidden.
+### Local configuration
 
-You will need to edit the ImageMagick policy file to get PDF and Image imports to work in Arkindex. The file is located at `/etc/ImageMagick-6/policy.xml`.
+For development purposes, you can customize the Arkindex settings by adding a YAML file as `arkindex/config.yml`. This file is not tracked by Git; if it exists, any configuration directive set in this file will be used for exposed settings from `settings.py`. You can view the full list of settings [on the wiki](https://redmine.teklia.com/projects/arkindex/wiki/Backend_configuration).
 
-The line that sets the PDF policy is `<policy domain="coder" rights="none" pattern="PDF" />`. Replace `none` with `read|write` for it to work. See [this StackOverflow question](https://stackoverflow.com/questions/52998331) for more info.
+Another way to customize your Arkindex instance is to add a Python file in `arkindex/project/local_settings.py`. Here you are not limited to exposed settings, and can customize any setting, or even load Python dependencies at boot time. This is not recommended, as your customization may not be available to real-world Arkindex instances.
 
 ### Local image server
 
@@ -54,7 +93,7 @@ local_imageserver_id: 999
 
 Here is how to quickly create the ImageServer using the shell:
 
-```
+```python
 $ arkindex shell
 >>> from arkindex.images.models import ImageServer
 >>> ImageServer.objects.create(id=1, display_name='local', url='https://ark.localhost/iiif')
@@ -62,11 +101,6 @@ $ arkindex shell
 
 Note that this local server will only work inside Docker.
 
-### User groups
-
-We use a custom group model in `arkindex.users.models` (not the `django.contrib.auth` one).
-In this early version groups do not define any right yet.
-
 ## Usage
 
 ### Makefile
@@ -83,14 +117,10 @@ At the root of the repository is a Makefile that provides commands for common op
 
 Aside from the usual Django commands, some custom commands are available via `arkindex`:
 
-* `build_fixtures`: Create a set of database elements designed for use by unit tests in a fixture (see `make test-fixtures`);
-* `from_csv`: Import manifests and index files from a CSV list;
-* `import_annotations`: Import index files from a folder into a specific volume;
-* `import_acts`: Import XML surface files and CSV act files;
-* `delete_corpus`: Delete a big corpus using an RQ task;
-* `reindex`: Reindex elements into Solr;
-* `telegraf`: A special command with InfluxDB-compatible output for Grafana statistics.
-* `move_lines_to_parents`: Moves element children to their geographical parents;
+* `build_fixtures`: Create a set of database elements designed for use by unit tests in a fixture (see `make test-fixtures`).
+* `delete_corpus`: Delete a big corpus using an RQ task.
+* `reindex`: Reindex elements into Solr.
+* `move_lines_to_parents`: Moves element children to their geographical parents.
 
 See `arkindex <command> --help` to view more details about a specific command.
 
@@ -108,9 +138,9 @@ We use [pre-commit](https://pre-commit.com/) to check the Python source code syn
 
 To be efficient, you should run pre-commit before committing (hence the name...).
 
-To do that, run once :
+To do that, run once:
 
-```
+```console
 pip install pre-commit
 pre-commit install
 ```
@@ -127,9 +157,9 @@ IPython will give you a nicer shell with syntax highlighting, auto reloading and
 
 [Django Debug Toolbar](https://django-debug-toolbar.readthedocs.io/en/latest/) provides you with a neat debug sidebar that will help diagnosing slow API endpoints or weird template bugs. Since the Arkindex frontend is completely decoupled from the backend, you will need to browse to an API endpoint to see the debug toolbar.
 
-[Django Extensions](https://django-extensions.readthedocs.io/en/latest/) adds a *lot* of `arkindex` commands ; the most important one is `arkindex shell_plus` which runs the usual shell but with all the available models pre-imported. You can add your own imports with the `local_settings.py` file. Here is an example that imports most of the backend's enums and some special QuerySet features:
+[Django Extensions](https://django-extensions.readthedocs.io/en/latest/) adds a *lot* of `arkindex` commands ; the most important one is `arkindex shell_plus` which runs the usual shell but with all the available models pre-imported. You can add your own imports with the `local_settings.py` file. Here is an example that imports some of the backend's enums and some special QuerySet features:
 
-``` python
+```python
 SHELL_PLUS_POST_IMPORTS = [
     ('django.db.models', ('Value', )),
     ('django.db.models.functions', '*'),
@@ -138,7 +168,7 @@ SHELL_PLUS_POST_IMPORTS = [
         'Right',
     )),
     ('arkindex.process.models', (
-        'DataImportMode',
+        'ProcessMode',
     )),
     ('arkindex.project.aws', (
         'S3FileStatus',
@@ -148,23 +178,29 @@ SHELL_PLUS_POST_IMPORTS = [
 
 ## Asynchronous tasks
 
-We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org/project/django-rq/), to run tasks without blocking an API request or causing timeouts. To call them in Python code, you should use the trigger methods in `arkindex.project.triggers`; those will do some safety checks to make catching some errors easier in dev. The actual tasks are in `arkindex.documents.tasks`. The following tasks exist:
+We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org/project/django-rq/), to run tasks without blocking an API request or causing timeouts. To call them in Python code, you should use the trigger methods in `arkindex.project.triggers`; those will do some safety checks to make catching some errors easier in dev. The actual tasks are in `arkindex.documents.tasks`, or in other `tasks` modules within each Django app. The following tasks exist:
 
 * Delete a corpus: `corpus_delete`
 * Delete a list of elements: `element_trash`
 * Delete worker results (transcriptions, classifications, etc. of a worker version): `worker_results_delete`
 * Move an element to another parent: `move_element`
-* Create `WorkerActivity` instances for all elements of a process: `intitialize_activity`
+* Create `WorkerActivity` instances for all elements of a process: `initialize_activity`
 * Delete a process and its worker activities: `process_delete`
 * Export a corpus to an SQLite database: `export_corpus`
 
-To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make slim` or `make` in the architecture will provide it. `make` in the architecture also provides a RQ worker running in Docker from a binary build.
-
-Process tasks are run in RQ by default (Community Edition). Two RQ workers must be running at the same time to actually run a process with worker activities, so the initialisation task can wait for the worker activity task to finish:
-```sh
-$ arkindex rqworker -v 3 default high & arkindex rqworker -v 3 tasks
-```
+To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make services` will provide it. `make stack` also provides an RQ worker running in Docker from a binary build.
 
 ## Metrics
+
 The application serves metrics for Prometheus under the `/metrics` prefix.
 A specific port can be used by setting the `PROMETHEUS_METRICS_PORT` environment variable, thus separating the application from the metrics API.
+
+## Migration from `architecture` setup
+
+If you were using the `architecture` repository previously to run Arkindex, you'll need to migrate MinIO data from a static path on your computer towards a new docker volume.
+
+```console
+docker volume create arkindex_miniodata
+mv /usr/share/arkindex/s3/data/iiif /var/lib/docker/volumes/arkindex_miniodata/_data/uploads
+mv /usr/share/arkindex/s3/data/{export,iiif-cache,ponos-logs,ponos-artifacts,staging,thumbnails,training} /var/lib/docker/volumes/arkindex_miniodata/_data/
+```
diff --git a/arkindex/documents/management/commands/bootstrap.py b/arkindex/documents/management/commands/bootstrap.py
index 580ea91591..3ca2dacd4a 100644
--- a/arkindex/documents/management/commands/bootstrap.py
+++ b/arkindex/documents/management/commands/bootstrap.py
@@ -14,9 +14,14 @@ from arkindex.process.models import FeatureUsage, Repository, Worker, WorkerType
 from arkindex.users.models import User
 
 # Constants used in architecture project
-IMAGE_SERVER_ID = 12345
-IMAGE_SERVER_BUCKET = "iiif"
-IMAGE_SERVER_REGION = "local"
+UPLOADS_IMAGE_SERVER_ID = 12345
+UPLOADS_IMAGE_SERVER_URL = "https://uploads.iiif.ark.localhost/iiif/2"
+UPLOADS_IMAGE_SERVER_BUCKET = "uploads"
+UPLOADS_IMAGE_SERVER_REGION = "local"
+INGEST_IMAGE_SERVER_ID = 67890
+INGEST_IMAGE_SERVER_URL = "https://ingest.iiif.ark.localhost/iiif/2"
+INGEST_IMAGE_SERVER_BUCKET = "ingest"
+INGEST_IMAGE_SERVER_REGION = "local"
 PONOS_FARM_ID = "001e411a-1111-2222-3333-444455556666"
 PONOS_FARM_NAME = "Bootstrap farm"
 PONOS_FARM_SEED = "b12868101dab84984481741663d809d2393784894d6e807ceee0bd95051bf971"
@@ -52,6 +57,46 @@ class Command(BaseCommand):
             user.save()
             self.warn(f"Updated user {user} to admin")
 
+    def create_image_server(self, id, url, bucket, region, display_name):
+        try:
+            server = ImageServer.objects.get(Q(id=id) | Q(url=url))
+            if server.id != id:
+                # Migrate existing images & server id in a single transaction
+                with transaction.atomic():
+                    server.images.update(server_id=id)
+                    ImageServer.objects.filter(id=server.id).update(id=id)
+                self.warn(f"Image server {server.id} updated to {id}")
+
+                # Update internal reference for updates below
+                server.id = id
+
+            if server.url != url:
+                server.url = url
+                server.save()
+
+            # Update base settings
+            if server.s3_bucket != bucket or server.s3_region != region:
+                server.s3_bucket = bucket
+                server.s3_region = region
+                server.save()
+                self.warn(f"Updated image server {server.id} S3 settings")
+            else:
+                self.success(f"Image server {server.id} valid")
+        except ImageServer.DoesNotExist:
+            try:
+                server = ImageServer.objects.create(
+                    id=id,
+                    url=url,
+                    s3_bucket=bucket,
+                    s3_region=region,
+                    display_name=display_name,
+                )
+                self.success(f"Image server {server.id} created")
+            except IntegrityError as e:
+                self.fail(f"Failed to create image server: {e}")
+                return
+        return server
+
     def handle(self, **options):
         # Never allow running this script in production
         if not settings.DEBUG:
@@ -108,47 +153,18 @@ class Command(BaseCommand):
             self.success(f"Created token {ADMIN_API_TOKEN}")
 
         # an image server for local cantaloupe https://ark.localhost/iiif/2
-        try:
-            server = ImageServer.objects.get(url="https://ark.localhost/iiif/2")
-            if server.id != IMAGE_SERVER_ID:
-                # Migrate existing images & server id in a single transaction
-                with transaction.atomic():
-                    server.images.update(server_id=IMAGE_SERVER_ID)
-                    ImageServer.objects.filter(id=server.id).update(id=IMAGE_SERVER_ID)
-                self.warn(f"Image server {server.id} updated to {IMAGE_SERVER_ID}")
-
-                # Update internal reference for updates below
-                server.id = IMAGE_SERVER_ID
-
-            # Update base settings
-            if server.s3_bucket != IMAGE_SERVER_BUCKET or server.s3_region != IMAGE_SERVER_REGION:
-                server.s3_bucket = IMAGE_SERVER_BUCKET
-                server.s3_region = IMAGE_SERVER_REGION
-                server.save()
-                self.warn("Updated image server S3 settings")
-            else:
-                self.success(f"Image server {server.id} valid")
-        except ImageServer.DoesNotExist:
-            try:
-                server = ImageServer.objects.create(
-                    id=IMAGE_SERVER_ID,
-                    url="https://ark.localhost/iiif/2",
-                    s3_bucket=IMAGE_SERVER_BUCKET,
-                    s3_region=IMAGE_SERVER_REGION,
-                    display_name="Development local server",
-                )
-                self.success("Image server created")
-            except IntegrityError as e:
-                self.fail(f"Failed to create image server: {e}")
-                return
+        uploads_server = self.create_image_server(UPLOADS_IMAGE_SERVER_ID , UPLOADS_IMAGE_SERVER_URL, UPLOADS_IMAGE_SERVER_BUCKET , UPLOADS_IMAGE_SERVER_REGION , "Local IIIF server for user uploaded files through frontend")
+        if uploads_server is None:
+            return
+        self.create_image_server(INGEST_IMAGE_SERVER_ID , INGEST_IMAGE_SERVER_URL, INGEST_IMAGE_SERVER_BUCKET , INGEST_IMAGE_SERVER_REGION , "Local IIIF server for ingested files from minio")
 
         # Check there is not already a local server with invalid path
         # We'll merge its image into the new one
         # This bad server may have been created by automatic IIIF server detection
         try:
-            bad_server = ImageServer.objects.get(url="https://ark.localhost/iiif")
-            bad_server.merge_into(server)
-            self.warn(f"Merged images from {bad_server.id} into {server.id}")
+            bad_server = ImageServer.objects.get(url="https://uploads.iiif.ark.localhost/iiif")
+            bad_server.merge_into(uploads_server)
+            self.warn(f"Merged images from {bad_server.id} into {uploads_server.id}")
 
             bad_server.delete()
             self.warn("Deleted old server")
@@ -194,17 +210,21 @@ class Command(BaseCommand):
             )
             self.success(f"Created revision {revision.hash}")
 
-        version, created = worker.versions.get_or_create(
-            revision=revision,
-            defaults={
-                "id": IMPORT_WORKER_VERSION_ID,
-                "configuration": {},
-                "state": WorkerVersionState.Created,
-                "gpu_usage": FeatureUsage.Disabled,
-                "docker_image": None,
-                "docker_image_iid": None,
-            }
-        )
+        try:
+            version = WorkerVersion.objects.get(id=IMPORT_WORKER_VERSION_ID)
+            created = False
+        except WorkerVersion.DoesNotExist:
+            version, created = worker.versions.get_or_create(
+                revision=revision,
+                defaults={
+                    "id": IMPORT_WORKER_VERSION_ID,
+                    "configuration": {},
+                    "state": WorkerVersionState.Created,
+                    "gpu_usage": FeatureUsage.Disabled,
+                    "docker_image": None,
+                    "docker_image_iid": None,
+                }
+            )
         if created:
             self.success(f"Created worker version {version.slug}")
         else:
diff --git a/arkindex/images/models.py b/arkindex/images/models.py
index 17b3f44b0e..a682541439 100644
--- a/arkindex/images/models.py
+++ b/arkindex/images/models.py
@@ -15,7 +15,7 @@ from django.utils.text import slugify
 from enumfields import EnumField
 
 from arkindex.images.managers import ImageServerManager
-from arkindex.project.aws import S3FileMixin, S3FileStatus
+from arkindex.project.aws import S3FileMixin, S3FileStatus, should_verify_cert
 from arkindex.project.fields import LStripTextField, MD5HashField, StripSlashURLField
 from arkindex.project.models import IndexableModel
 
@@ -238,7 +238,7 @@ class Image(S3FileMixin, IndexableModel):
             requests_exception = None
             try:
                 # Load info
-                resp = requests.get(info_url, timeout=15, allow_redirects=True)
+                resp = requests.get(info_url, timeout=15, allow_redirects=True, verify=should_verify_cert(info_url))
                 resp.raise_for_status()
                 try:
                     payload = resp.json()
diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py
index 0fb1483945..ef0edfb725 100644
--- a/arkindex/project/aws.py
+++ b/arkindex/project/aws.py
@@ -1,6 +1,7 @@
 import logging
 from functools import wraps
 from io import BytesIO
+from urllib.parse import urlparse
 
 import boto3.session
 from botocore.config import Config
@@ -13,6 +14,18 @@ from tenacity import retry, retry_if_exception, stop_after_delay
 logger = logging.getLogger(__name__)
 
 
+def should_verify_cert(url):
+    """
+    Skip SSL certification validation when hitting a development instance
+    """
+    # Special case when no url is provided
+    if url is None:
+        return True
+
+    host = urlparse(url).netloc
+    return not host.endswith("ark.localhost")
+
+
 def get_s3_resource(
         access_key_id=settings.AWS_ACCESS_KEY,
         secret_access_key=settings.AWS_SECRET_KEY,
@@ -35,6 +48,7 @@ def get_s3_resource(
         "s3",
         endpoint_url=endpoint,
         config=config,
+        verify=should_verify_cert(endpoint),
     )
 
 
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 8e373583ac..147e8528cc 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -97,7 +97,7 @@ def get_settings_parser(base_dir):
     database_parser = parser.add_subparser("database", default={})
     database_parser.add_option("name", type=str, default="arkindex_dev")
     database_parser.add_option("host", type=str, default="localhost")
-    database_parser.add_option("port", type=int, default=9100)
+    database_parser.add_option("port", type=int, default=5432)
     database_parser.add_option("user", type=str, default="devuser")
     database_parser.add_option("password", type=str, default="devdata")
 
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 4399e4c525..e9db6bcd61 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -27,7 +27,7 @@ BASE_DIR = Path(_base_dir) if _base_dir else Path(__file__).resolve().parent.par
 # Used for special cases during configuration parsing and settings loading
 TEST_ENV = "test" in sys.argv
 
-# Matches ./manage.py shell[_plus] and arkindex shell[_plus] to disable Sentry reporting
+# Matches arkindex shell[_plus] to disable Sentry reporting
 DJANGO_SHELL = len(sys.argv) > 1 and sys.argv[1] in ("shell", "shell_plus")
 
 CONFIG_PATH = Path(os.environ.get("CONFIG_PATH", BASE_DIR / "config.yml"))
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index bd19a0a3ef..0fd43ab993 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -24,7 +24,7 @@ database:
   host: localhost
   name: arkindex_dev
   password: devdata
-  port: 9100
+  port: 5432
   replica: null
   user: devuser
 docker:
diff --git a/arkindex/project/tests/test_aws.py b/arkindex/project/tests/test_aws.py
new file mode 100644
index 0000000000..7aae4cb45d
--- /dev/null
+++ b/arkindex/project/tests/test_aws.py
@@ -0,0 +1,12 @@
+from django.test import TestCase
+
+from arkindex.project.aws import should_verify_cert  # noqa
+
+
+class AWSTestCase(TestCase):
+
+    def test_should_verify_cert(self):
+        self.assertTrue(should_verify_cert("https://google.fr/whatever"))
+        self.assertFalse(should_verify_cert("https://minio.ark.localhost/bucket/object"))
+        self.assertFalse(should_verify_cert("https://ark.localhost/corpus"))
+        self.assertTrue(should_verify_cert(None))
diff --git a/base/Dockerfile b/base/Dockerfile
index 4f7b450ba7..5bf809476c 100644
--- a/base/Dockerfile
+++ b/base/Dockerfile
@@ -6,8 +6,3 @@ RUN /bootstrap.sh
 
 # Add unprivilegied user
 RUN addgroup --gid 1000 teklia && adduser --disabled-login --uid 1000 --ingroup teklia ark
-
-# Add archi local CA
-COPY ./dev-ca.pem /usr/local/share/ca-certificates/arkindex-dev.crt
-RUN update-ca-certificates
-ENV REQUESTS_CA_BUNDLE /etc/ssl/certs/ca-certificates.crt
diff --git a/base/dev-ca.pem b/base/dev-ca.pem
deleted file mode 100644
index 0184b64712..0000000000
--- a/base/dev-ca.pem
+++ /dev/null
@@ -1,23 +0,0 @@
------BEGIN CERTIFICATE-----
-MIIDzTCCArWgAwIBAgIJAMIk32qc3uH5MA0GCSqGSIb3DQEBCwUAMH0xCzAJBgNV
-BAYTAkZSMREwDwYDVQQIDAhJc8ODwqhyZTERMA8GA1UEBwwIR3Jlbm9ibGUxDzAN
-BgNVBAoMBlRla2xpYTERMA8GA1UEAwwIQXJraW5kZXgxJDAiBgkqhkiG9w0BCQEW
-FWtlcm1vcnZhbnRAdGVrbGlhLmNvbTAeFw0xODA0MDkwODI1MzBaFw00MjExMzAw
-ODI1MzBaMH0xCzAJBgNVBAYTAkZSMREwDwYDVQQIDAhJc8ODwqhyZTERMA8GA1UE
-BwwIR3Jlbm9ibGUxDzANBgNVBAoMBlRla2xpYTERMA8GA1UEAwwIQXJraW5kZXgx
-JDAiBgkqhkiG9w0BCQEWFWtlcm1vcnZhbnRAdGVrbGlhLmNvbTCCASIwDQYJKoZI
-hvcNAQEBBQADggEPADCCAQoCggEBALDSzuXMJotLPqA8rK8c1GCK9G54VQKgieG8
-agLOd0a3ALh+Qz9uLSPEPz40zxjXLLMVIYqHW9CynP5su62gdcpZ0CVImF1e0bgF
-U+x0RpNFtceh/RixNL5b9XA9Y3By67jpZfjLC9d0WRQOaIOSW/wUTGWUbW0y/OWg
-dc5Qihn32icVit8ogfUCBoH8v0OypiF+AmJHFUq2rjCB0fmvLLZscSdMe4XsYLa0
-7eFRdnKesfE3ooQODnoL2zDkDqhY31PRsCrHquHLO0U7v5NhsfUJs5K9COQeCHW3
-q03kOIecoi1otPYGf07MWKn3AR399HifYHjm5+YYBZ9t7MhOkScCAwEAAaNQME4w
-HQYDVR0OBBYEFLj6DCMMKOYYQE8KvRRjFJEwxbXUMB8GA1UdIwQYMBaAFLj6DCMM
-KOYYQE8KvRRjFJEwxbXUMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEB
-AHPpwfg/N4QNgzmK0BV//H6n96C+Vu5E3A71zKsAZj231K+pcwvOWEZHPV/h9fcV
-jHQg5crQsZy7CoV2VdTKbprL/F7W+JsUEPrk3xnqnqqIexDm1m8pua1XCLurU+Sy
-588XbzNlOGDzfI8kWhWS9rEJWpVvadQ4PhOlORlU7oRgAjCOXZHNLHs6IdS4yUH5
-TqG9Tv3n7503Jyc5gnLzjJdUg7a3r/7awGr6nWZrdSE29ErLVY+NAUQmjkD7A0kD
-ds4tZOSq44zthZiwI33Jj56eGcN+MjRcqFuziZnJt/NWPX7F0+4XenGmqmcjvlKO
-zFjKKiOsA01MRJmxSUw6CF8=
------END CERTIFICATE-----
diff --git a/config.yml.sample b/config.yml.sample
new file mode 100644
index 0000000000..539c78314d
--- /dev/null
+++ b/config.yml.sample
@@ -0,0 +1,36 @@
+---
+
+public_hostname: http://localhost:8000
+
+local_imageserver_id: 12345
+
+s3:
+  access_key_id: minio1234
+  secret_access_key: minio1234
+  endpoint: https://minio.ark.localhost
+  region: localdev
+
+ponos:
+  default_env:
+    ARKINDEX_API_TOKEN: deadbeefTestToken
+
+features:
+  signup: yes
+  search: yes
+
+docker:
+  tasks_image: registry.gitlab.teklia.com/arkindex/tasks:latest
+
+imports_worker_version: f2bb8dd7-55e9-49ae-9bd9-b1d2e5d491b9
+
+solr:
+  api_url: http://localhost:8983/solr/
+
+ingest:
+  access_key_id: minio1234
+  secret_access_key: minio1234
+  endpoint: https://minio.ark.localhost
+  region: localdev
+  imageserver_id: 3
+  extra_buckets:
+    - ingest
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000..40e5a91e08
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,66 @@
+---
+include:
+  - docker/docker-compose.services.yml
+
+services:
+
+  backend:
+    container_name: ark-backend
+    build: .
+
+    depends_on:
+      - db
+      - redis
+      - lb
+
+    labels:
+      traefik.enable: true
+      traefik.http.routers.backend.rule: Host(`ark.localhost`) && (PathPrefix(`/api/`) || PathPrefix(`/api-docs/`) || PathPrefix(`/admin/`) || PathPrefix(`/rq/`) || PathPrefix(`/static/`))
+      traefik.http.routers.backend.tls: true
+
+    environment:
+      CONFIG_PATH: /arkindex.yml
+
+    volumes:
+      - ./docker/config.yml:/arkindex.yml:ro
+
+    healthcheck:
+      # start_interval is not fully implemented in Docker, until then we will use a short interval all the time
+      # https://github.com/moby/moby/issues/45897
+      interval: 5s
+
+  worker:
+    container_name: ark-worker
+    build: .
+    command: arkindex rqworker-pool --num-workers 2 -v 1 default high tasks
+
+    depends_on:
+      - db
+      - redis
+      - backend
+    environment:
+      CONFIG_PATH: /arkindex.yml
+
+    volumes:
+      - ./docker/config.yml:/arkindex.yml:ro
+
+      # Required to host temporary ponos data
+      # and share common paths between host and containers
+      - /tmp:/tmp
+
+      # Required to run process tasks
+      - /var/run/docker.sock:/var/run/docker.sock
+
+  front:
+    container_name: ark-front
+
+    build: ../frontend
+
+    depends_on:
+      - lb
+      - backend
+
+    labels:
+      traefik.enable: true
+      traefik.http.routers.frontend.rule: Host(`ark.localhost`)
+      traefik.http.routers.frontend.tls: true
diff --git a/docker/cantaloupe.properties b/docker/cantaloupe.properties
new file mode 100644
index 0000000000..3b18a34972
--- /dev/null
+++ b/docker/cantaloupe.properties
@@ -0,0 +1,43 @@
+# Base http setup behind traefik
+http.enabled = true
+http.host = 0.0.0.0
+http.port = 80
+http.http2.enabled = false
+https.enabled = false
+
+# Explicitly run only IIIF 2
+endpoint.iiif.1.enabled = false
+endpoint.iiif.2.enabled = true
+
+# Use minio with multiple buckets (for uploads and ingest)
+source.static = S3Source
+S3Source.endpoint = http://minio:9000
+S3Source.region = local
+S3Source.top_domain = iiif.ark.localhost
+S3Source.lookup_strategy = ScriptLookupStrategy
+S3Source.BasicLookupStrategy.bucket.name =
+
+# Use minio also for cache, in a dedicated bucket, for a full week
+cache.server.derivative.enabled = true
+cache.server.derivative = S3Cache
+cache.server.derivative.ttl_seconds = 604800
+S3Cache.endpoint = http://minio:9000
+S3Cache.region = local
+S3Cache.bucket.name = iiif-cache
+
+# Display info level on console
+log.application.level = info
+log.application.ConsoleAppender.enabled = true
+
+# Log all 4xx/5xx errors. This can cause duplicate logs to show up, since some
+# errors will already be logged by some exception handlers, but some errors
+# might not be logged otherwise.
+log_error_responses = true
+
+# Configure light heap cache
+HeapCache.target_size = 2G
+HeapCache.persist = false
+
+# Setup JPEG processors compatible with libjpegturbo
+processor.downscale_filter = lanczos3
+processor.upscale_filter = lanczos3
diff --git a/docker/config.yml b/docker/config.yml
new file mode 100644
index 0000000000..f9a36f9de3
--- /dev/null
+++ b/docker/config.yml
@@ -0,0 +1,54 @@
+cache:
+  type: memory
+
+static:
+  static_root: /backend_static
+
+database:
+  host: ark-database
+  port: 5432
+  name: arkindex_dev
+  user: devuser
+  password: devdata
+
+redis:
+  host: ark-redis
+
+s3:
+  access_key_id: minio1234
+  secret_access_key: minio1234
+  endpoint: https://minio.ark.localhost
+  region: local
+
+allowed_hosts:
+  - .ark.localhost
+
+public_hostname: https://ark.localhost
+
+session:
+  cookie_domain: ark.localhost
+
+csrf:
+  cookie_domain: ark.localhost
+  trusted_origins:
+    - 'https://*.ark.localhost'
+
+cors:
+  origin_whitelist:
+    - http://localhost:8080
+    - http://127.0.0.1:8080
+    - https://ark.localhost
+
+local_imageserver_id: 12345
+imports_worker_version: f2bb8dd7-55e9-49ae-9bd9-b1d2e5d491b9
+
+features:
+  signup: yes
+  search: yes
+
+solr:
+  api_url: http://ark-solr:8983/solr/
+
+ponos:
+  default_env:
+    ARKINDEX_API_URL: https://ark.localhost/api/v1/
diff --git a/docker/docker-compose.services.yml b/docker/docker-compose.services.yml
new file mode 100644
index 0000000000..1dc148c7b6
--- /dev/null
+++ b/docker/docker-compose.services.yml
@@ -0,0 +1,146 @@
+---
+services:
+
+  lb:
+    container_name: ark-lb
+    image: traefik:2.11
+    ports:
+      # No need to expose on 0.0.0.0 as other users would not resolve
+      # the .localhost domain on an external IP
+      - 127.0.0.1:80:80
+      - 127.0.0.1:443:443
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+      - ./traefik.toml:/traefik.toml
+      - ./ssl:/certs
+
+    networks:
+      default:
+        aliases:
+          - ark.localhost
+          - traefik.ark.localhost
+
+          # Required for backend to resolve services using the public DNS
+          - minio.ark.localhost
+          - uploads.iiif.ark.localhost
+          - ingest.iiif.ark.localhost
+
+    labels:
+      # Expose traefik dashboard on traefik.ark.localhost
+      traefik.enable: true
+      traefik.http.routers.traefik.rule: Host(`traefik.ark.localhost`)
+      traefik.http.routers.traefik.service: api@internal
+      traefik.http.routers.traefik.tls: true
+
+  redis:
+    container_name: ark-redis
+    image: redis:alpine
+    volumes:
+      - redisdata:/data
+    ports:
+      - 127.0.0.1:6379:6379
+
+  # Solr image
+  solr:
+    image: solr:9
+    container_name: ark-solr
+    command:
+      - solr
+      - -f
+      - -cloud
+      - -noprompt
+    volumes:
+      - solrdata:/var/solr
+    ports:
+      - 127.0.0.1:8983:8983
+
+  # Dev exposes postgresql
+  # and uses local postgresql data
+  db:
+    container_name: ark-database
+    image: postgis/postgis:14-3.2
+    shm_size: '512mb'
+
+    ports:
+      - 127.0.0.1:5432:5432
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+    environment:
+      POSTGRES_USER: devuser
+      POSTGRES_PASSWORD: devdata
+      POSTGRES_DB: arkindex_dev
+
+  # IIIF server
+  cantaloupe:
+    container_name: ark-cantaloupe
+    image: registry.gitlab.teklia.com/iiif/cantaloupe:5.0.5-p2
+    expose:
+      - 80
+    environment:
+      AWS_ACCESS_KEY_ID: minio1234
+      AWS_SECRET_ACCESS_KEY: minio1234
+    labels:
+      traefik.enable: true
+      traefik.http.routers.cantaloupe.rule: HostRegexp(`iiif.ark.localhost`, `{subdomain:[a-z]+}.iiif.ark.localhost`)
+      traefik.http.routers.cantaloupe.tls: true
+    volumes:
+      - ./cantaloupe.properties:/etc/cantaloupe.properties:ro
+    depends_on:
+      - minio
+      - lb
+
+  minio:
+    container_name: ark-minio
+    image: minio/minio:RELEASE.2021-10-02T16-31-05Z.fips
+    command: server /data --compat --console-address :9001
+    environment:
+      MINIO_BROWSER_REDIRECT_URL: https://minio-console.ark.localhost
+      MINIO_SERVER_URL: https://minio.ark.localhost
+      MINIO_ROOT_USER: minio1234
+      MINIO_ROOT_PASSWORD: minio1234
+    expose:
+      - 9000
+      - 9001
+    volumes:
+      - miniodata:/data
+
+      # Embed our internal CA cert to allow the console
+      # to verify the backend through traefik using real urls
+      - type: bind
+        target: /root/.minio/certs/CAs/arkindex-dev.pem
+        source: ./ssl/rootCA.pem
+    labels:
+      traefik.enable: true
+      traefik.http.routers.minio.rule: Host(`minio.ark.localhost`)
+      traefik.http.routers.minio.tls: true
+      traefik.http.routers.minio.service:  minio-service
+      traefik.http.services.minio-service.loadbalancer.server.port: 9000
+      traefik.http.routers.minio-console.rule: Host(`minio-console.ark.localhost`)
+      traefik.http.routers.minio-console.tls: true
+      traefik.http.routers.minio-console.service:  minio-console-service
+      traefik.http.services.minio-console-service.loadbalancer.server.port: 9001
+    depends_on:
+      - lb
+
+  minio-buckets:
+    container_name: ark-minio-buckets
+    image: minio/mc:latest
+
+    environment:
+      MC_HOST_ark: http://minio1234:minio1234@minio:9000
+
+    # Create all required buckets on the minio described above
+    command: mb -p ark/export ark/uploads ark/iiif-cache ark/ponos-artifacts ark/ponos-logs ark/staging ark/thumbnails ark/ingest
+
+    depends_on:
+      - minio
+
+volumes:
+  solrdata:
+    driver: local
+  redisdata:
+    driver: local
+  miniodata:
+    driver: local
+  pgdata:
+    driver: local
diff --git a/docker/ssl/traefik.toml b/docker/ssl/traefik.toml
new file mode 100644
index 0000000000..261e7b3225
--- /dev/null
+++ b/docker/ssl/traefik.toml
@@ -0,0 +1,3 @@
+[[tls.certificates]]
+  certFile = "/certs/ark-cert.pem"
+  keyFile = "/certs/ark-key.pem"
diff --git a/docker/traefik.toml b/docker/traefik.toml
new file mode 100644
index 0000000000..0951d2e10d
--- /dev/null
+++ b/docker/traefik.toml
@@ -0,0 +1,29 @@
+defaultEntryPoints = ["websecure"]
+logLevel = "DEBUG"
+
+[providers]
+  [providers.docker]
+    endpoint = "unix:///var/run/docker.sock"
+    defaultRule = "Host(`localhost`)"
+    watch = true
+    exposedByDefault = false
+
+  [providers.file]
+    directory = "/certs"
+    watch = true
+
+[accessLog]
+
+[api]
+  dashboard = true
+
+[entryPoints]
+  [entryPoints.web]
+    address = ":80"
+
+    [entryPoints.web.http.redirections.entryPoint]
+      to = "websecure"
+      scheme = "https"
+
+  [entryPoints.websecure]
+    address = ":443"
-- 
GitLab