Compare revisions

Bastien Abadie · ml bonhomme · Erwan Rouchet · Bastien Abadie · Bastien Abadie · ml bonhomme
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
 repos:
-  - repo: https://github.com/pre-commit/mirrors-isort
-    rev: v5.10.1
-    hooks:
-      - id: isort
-  - repo: https://github.com/pycqa/flake8
-    rev: 3.9.2
-    hooks:
-      - id: flake8
-        additional_dependencies:
-          - 'flake8-copyright==0.2.2'
-          - 'flake8-debugger==3.1.0'
-          - 'flake8-quotes==3.3.2'
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.11
+    # Ruff version.
+    rev: v0.3.7
    hooks:
+      # Run the linter.
      - id: ruff
        args: [--fix]
  - repo: https://github.com/pre-commit/pre-commit-hooks

--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ build:
 	CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh

 worker:
-	arkindex rqworker -v 2 default high tasks
+	arkindex rqworker -v 2 default high tasks export

 test-fixtures:
 	$(eval export PGPASSWORD=devdata)
@@ -54,7 +54,7 @@ release:
 clean-docker:
 	$(eval containers:=$(shell docker ps -a -q))
 	@if [ -n "$(containers)" ]; then \
-		echo "Cleaning up past containers\n" \
+		echo "Cleaning up past containers\n" ; \
 		docker rm -f $(containers) ; \
 	fi


--- a/VERSION
+++ b/VERSION
-1.6.0-beta3
+1.6.1-beta2
--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -11,8 +11,6 @@ from arkindex.documents.models import (
    Element,
    ElementType,
    Entity,
-    EntityLink,
-    EntityRole,
    EntityType,
    MetaData,
    MLClass,
@@ -135,29 +133,15 @@ class EntityMetaForm(forms.ModelForm):
    metas = HStoreFormField()


-class EntityLinkInLine(admin.TabularInline):
-    model = EntityLink
-    fk_name = "parent"
-    raw_id_fields = ("child", )
-
-
 class EntityAdmin(admin.ModelAdmin):
    list_display = ("id", "name", "type")
    list_filter = ["corpus", "type"]
    readonly_fields = ("id", )
    raw_id_fields = ("worker_version", "worker_run", )
    search_fields = ("name", )
-    inlines = (EntityLinkInLine, )
    form = EntityMetaForm


-class EntityRoleAdmin(admin.ModelAdmin):
-    list_display = ("id", "corpus", "parent_name", "child_name")
-    list_filter = ("corpus", )
-    readonly_fields = ("id", )
-    ordering = ("corpus", "parent_name", "child_name")
-
-
 class EntityTypeAdmin(admin.ModelAdmin):
    list_display = ("id", "corpus", "name", "color")
    list_filter = ("corpus", )
@@ -180,7 +164,6 @@ admin.site.register(Transcription, TranscriptionAdmin)
 admin.site.register(MLClass, MLClassAdmin)
 admin.site.register(MetaData, MetaDataAdmin)
 admin.site.register(Entity, EntityAdmin)
-admin.site.register(EntityRole, EntityRoleAdmin)
 admin.site.register(EntityType, EntityTypeAdmin)
 admin.site.register(AllowedMetaData, AllowedMetaDataAdmin)
 admin.site.register(CorpusExport, CorpusExportAdmin)
--- a/arkindex/documents/api/entities.py
+++ b/arkindex/documents/api/entities.py
@@ -3,32 +3,18 @@ from textwrap import dedent
 from uuid import UUID

 from django.core.exceptions import ValidationError as DjangoValidationError
-from django.db.models import Q
 from django.shortcuts import get_object_or_404
-from drf_spectacular.utils import OpenApiExample, OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view
+from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view
 from rest_framework import permissions, serializers, status
 from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
-from rest_framework.generics import CreateAPIView, ListAPIView, ListCreateAPIView, RetrieveUpdateDestroyAPIView
+from rest_framework.generics import CreateAPIView, ListAPIView, RetrieveUpdateDestroyAPIView
 from rest_framework.response import Response

-from arkindex.documents.models import (
-    Corpus,
-    Element,
-    Entity,
-    EntityLink,
-    EntityRole,
-    EntityType,
-    Transcription,
-    TranscriptionEntity,
-)
+from arkindex.documents.models import Corpus, Element, Entity, EntityType, Transcription, TranscriptionEntity
 from arkindex.documents.serializers.elements import ElementTinySerializer
 from arkindex.documents.serializers.entities import (
    BaseEntitySerializer,
-    CreateEntityRoleErrorResponseSerializer,
    EntityCreateSerializer,
-    EntityLinkCreateSerializer,
-    EntityLinkSerializer,
-    EntityRoleSerializer,
    EntitySerializer,
    EntityTypeCreateSerializer,
    EntityTypeSerializer,
@@ -44,53 +30,6 @@ from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly
 from arkindex.users.models import Role


-@extend_schema(tags=["entities"])
-@extend_schema_view(
-    get=extend_schema(operation_id="ListCorpusRoles", description="List all roles of a corpus"),
-    post=extend_schema(
-        description="Create a new entity role",
-        responses={
-            200: EntityRoleSerializer,
-            400: CreateEntityRoleErrorResponseSerializer
-        },
-        examples=[OpenApiExample(
-            status_codes=["400"],
-            response_only=True,
-            name="role-exists",
-            value={"id": "55cd009d-cd4b-4ec2-a475-b060f98f9138", "corpus": ["Role already exists in this corpus"]},
-            description="Role already exists."
-        )]
-    )
-)
-class CorpusRoles(CorpusACLMixin, ListCreateAPIView):
-    """
-    List all roles in a corpus
-    """
-    permission_classes = (IsVerifiedOrReadOnly, )
-    serializer_class = EntityRoleSerializer
-    queryset = EntityRole.objects.none()
-
-    def get_queryset(self):
-        return EntityRole.objects \
-            .filter(corpus=self.get_corpus(self.kwargs["pk"])) \
-            .order_by("parent_name", "child_name")
-
-    def perform_create(self, serializer):
-        data = self.request.data
-        if EntityRole.objects.filter(
-            parent_name=data["parent_name"],
-            child_name=data["child_name"],
-            parent_type=data["parent_type_id"],
-            child_type=data["child_type_id"],
-            corpus_id=self.request.parser_context["kwargs"]["pk"]
-        ).exists():
-            raise serializers.ValidationError({
-                "corpus": ["Role already exists in this corpus"],
-                "id": self.request.parser_context["kwargs"]["pk"]
-            })
-        super().perform_create(serializer)
-
-
 @extend_schema(tags=["entities"])
 @extend_schema_view(
    get=extend_schema(operation_id="ListCorpusEntityTypes", description="List all entity types in a corpus"),
@@ -173,8 +112,6 @@ class EntityTypeUpdate(ACLMixin, RetrieveUpdateDestroyAPIView):
    def perform_destroy(self, instance):
        if instance.entities.exists():
            raise ValidationError({"detail": ["Some entities are using this entity type."]})
-        if EntityRole.objects.filter(Q(parent_type_id=instance.id) | Q(child_type_id=instance.id)).exists():
-            raise ValidationError({"detail": ["Some entity roles are using this entity type."]})
        super().perform_destroy(instance)


@@ -196,14 +133,6 @@ class EntityDetails(ACLMixin, RetrieveUpdateDestroyAPIView):
            .select_related("corpus", "type") \
            .filter(corpus__in=Corpus.objects.readable(self.request.user)) \
            .prefetch_related(
-                "parents__role__parent_type",
-                "parents__role__child_type",
-                "children__role__parent_type",
-                "children__role__child_type",
-                "parents__child__type",
-                "parents__parent__type",
-                "children__parent__type",
-                "children__child__type",
                "corpus",
            )

@@ -307,15 +236,6 @@ class EntityCreate(CreateAPIView):
        return Response(entity.data, status=status_code, headers=headers)


-@extend_schema_view(post=extend_schema(operation_id="CreateEntityLink", tags=["entities"]))
-class EntityLinkCreate(CreateAPIView):
-    """
-    Create a new link between two entities with a role
-    """
-    permission_classes = (IsVerified, )
-    serializer_class = EntityLinkCreateSerializer
-
-
 @extend_schema_view(post=extend_schema(
    operation_id="CreateTranscriptionEntity",
    tags=["entities"],
@@ -519,41 +439,6 @@ class CorpusEntities(CorpusACLMixin, ListAPIView):
        return queryset


-@extend_schema_view(get=extend_schema(operation_id="ListElementLinks", tags=["entities"]))
-class ElementLinks(CorpusACLMixin, ListAPIView):
-    """
-    List all links where parent and child are linked to the element.\n\n
-    Requires a **guest** access to the element corpus
-    """
-    serializer_class = EntityLinkSerializer
-
-    def get_queryset(self):
-        try:
-            element = Element.objects.select_related("corpus").only("id", "corpus").get(id=self.kwargs["pk"])
-        except Element.DoesNotExist:
-            raise NotFound
-
-        if not self.has_read_access(element.corpus):
-            raise PermissionDenied(detail="You do not have access to this element.")
-
-        # Load entities linked by transcriptions
-        entities_tr = Entity.objects.filter(transcriptions__element_id=element.id).prefetch_related("transcriptions")
-
-        # Load entities linked by metadatas
-        entities_meta = Entity.objects.filter(metadatas__element_id=element.id).prefetch_related("metadatas")
-
-        # Now load all links belonging to those entities
-        # It's several times faster to combine the queries in the final one
-        # than combining them at the lower level (entities is slower than entities_tr + entities_meta)
-        # We need to support cross references between transcriptions & metadata entities
-        return EntityLink.objects.filter(
-            Q(parent__in=entities_tr, child__in=entities_tr)
-            | Q(parent__in=entities_tr, child__in=entities_meta)
-            | Q(parent__in=entities_meta, child__in=entities_tr)
-            | Q(parent__in=entities_meta, child__in=entities_meta)
-        ).select_related("role", "child__type", "parent__type").order_by("parent__name")
-
-
 @extend_schema_view(
    post=extend_schema(
        operation_id="CreateTranscriptionEntities",

--- a/arkindex/documents/api/ml.py
+++ b/arkindex/documents/api/ml.py
 import uuid
+from textwrap import dedent

 from django.db import transaction
 from django.utils.functional import cached_property
@@ -300,16 +301,22 @@ class CorpusMLClassPagination(PageNumberPagination):
 @extend_schema_view(
    get=extend_schema(
        operation_id="ListCorpusMLClasses",
+        description=dedent("""
+            List available classes in a corpus.
+
+            Requires a **guest** access to the corpus.
+        """),
    ),
    post=extend_schema(
        operation_id="CreateMLClass",
-        description="Create an ML class in a corpus",
+        description=dedent("""
+            Create an ML class in a corpus.
+
+            Requires an **admin** access to the corpus.
+        """),
    )
 )
 class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
-    """
-    List available classes in a corpus
-    """
    serializer_class = MLClassSerializer
    pagination_class = CorpusMLClassPagination
    # For OpenAPI type discovery: a corpus ID is in the path
@@ -322,7 +329,7 @@ class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
    def corpus(self):
        role = Role.Guest
        if self.request.method == "POST":
-            role = Role.Contributor
+            role = Role.Admin
        return self.get_corpus(self.kwargs["pk"], role=role)

    def check_permissions(self, *args, **kwargs):
@@ -357,10 +364,26 @@ class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):

 @extend_schema(tags=["classifications"])
 @extend_schema_view(
-    get=extend_schema(description="Retrieve a ML class."),
-    patch=extend_schema(description="Rename a ML class."),
-    put=extend_schema(description="Rename a ML class."),
-    delete=extend_schema(description="Delete a ML class if it is not used by any classification."),
+    get=extend_schema(description=dedent("""
+        Retrieve an ML class.
+
+        Requires a **guest** access to the corpus.
+    """)),
+    patch=extend_schema(description=dedent("""
+        Rename an ML class.
+
+        Requires an **admin** access to the corpus.
+    """)),
+    put=extend_schema(description=dedent("""
+        Rename an ML class.
+
+        Requires an **admin** access to the corpus.
+    """)),
+    delete=extend_schema(description=dedent("""
+        Delete an ML class if it is not used by any classification.
+
+        Requires an **admin** access to the corpus.
+    """)),
 )
 class MLClassRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
    serializer_class = MLClassSerializer
@@ -372,7 +395,7 @@ class MLClassRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
    def corpus(self):
        role = Role.Guest
        if self.request and self.request.method != "GET":
-            role = Role.Contributor
+            role = Role.Admin

        return self.get_corpus(self.kwargs["corpus"], role=role)

@@ -440,7 +463,7 @@ class ManageClassificationsSelection(SelectionMixin, CorpusACLMixin, CreateAPIVi
        mode = serializer.validated_data["mode"]
        if mode == ClassificationMode.Create:
            return self.create(corpus, request, *args, **kwargs)
-        elif mode == ClassificationMode.Validate:
+        if mode == ClassificationMode.Validate:
            elements = self.get_selection(corpus.id)
            Classification.objects.filter(
                element__in=elements,

--- a/arkindex/documents/date_parser.py
+++ b/arkindex/documents/date_parser.py
@@ -6,7 +6,7 @@ from arkindex.documents.dates import DateType, InterpretedDate

 logger = logging.getLogger(__name__)

-# Months (unaccented, lowercase)
+# Months, unaccented, lowercase
 MONTHS = {
    "en": (
        "january",
@@ -143,7 +143,7 @@ def instanciate_date(date_elt):
    try:
        date.validate()
    except ValueError as e:
-        logger.warning("Date fields are incorrect: {}".format(e))
+        logger.warning(f"Date fields are incorrect: {e}")
        raise
    return date

@@ -161,6 +161,6 @@ def parse_date(raw_date, functions_table=DATE_FUNCTIONS_TABLE):
            if date_elts:
                return tuple(map(instanciate_date, date_elts))
        except Exception:
-            logger.warning("Failed parsing {} with function {}".format(raw_date, f.__name__))
-    logger.warning("Date not supported: {}".format(raw_date))
+            logger.warning(f"Failed parsing {raw_date} with function {f.__name__}")
+    logger.warning(f"Date not supported: {raw_date}")
    return ()
--- a/arkindex/documents/dates.py
+++ b/arkindex/documents/dates.py
@@ -16,7 +16,7 @@ class DatePrecision(Enum):
    Day = "d"


-class InterpretedDate(object):
+class InterpretedDate:

    def __init__(self, year, month=None, day=None, type=DateType.Exact):
        self.year = int(year)
@@ -26,17 +26,17 @@ class InterpretedDate(object):

    def validate(self):
        if self.year < 0:
-            raise ValueError("Year {} is negative".format(self.year))
+            raise ValueError(f"Year {self.year} is negative")
        if self.month and (self.month < 1 or self.month > 12):
-            raise ValueError("Month {} is not between 1 and 12".format(self.month))
+            raise ValueError(f"Month {self.month} is not between 1 and 12")
        if self.day and (self.day < 1 or self.day > 31):
-            raise ValueError("Day {} is not between 1 and 31".format(self.day))
+            raise ValueError(f"Day {self.day} is not between 1 and 31")
        # Check if day is correct depending on year and month
        if self.precision == DatePrecision.Day:
            try:
                datetime(*tuple(self))
            except ValueError:
-                raise ValueError("Date format is incorrect {}".format(self))
+                raise ValueError(f"Date format is incorrect {self}")

    @property
    def precision(self):
@@ -45,7 +45,7 @@ class InterpretedDate(object):
        """
        if self.month and self.day:
            return DatePrecision.Day
-        elif self.month:
+        if self.month:
            return DatePrecision.Month
        return DatePrecision.Year

@@ -70,10 +70,10 @@ class InterpretedDate(object):
            return s > o

    def __str__(self):
-        return "-".join("{:02d}".format(e) for e in tuple(self) if e)
+        return "-".join(f"{e:02d}" for e in tuple(self) if e)


-class InterpretedDateMixin(object):
+class InterpretedDateMixin:
    """
    Adds on-demand date parsing from a text field to InterpretedDates.
    Requires a `raw_dates` property that returns the date string.

--- a/arkindex/documents/export/__init__.py
+++ b/arkindex/documents/export/__init__.py
@@ -38,8 +38,6 @@ EXPORT_QUERIES = [
    "entity_type",
    "entity",
    "transcription_entity",
-    "entity_role",
-    "entity_link",
    "metadata",
    "dataset",
    "dataset_element",
@@ -120,7 +118,7 @@ def send_email(subject, template_name, corpus_export, **context):
        logger.error(f"Failed to send email to {corpus_export.user.email}")


-@job("high", timeout=settings.RQ_TIMEOUTS["export_corpus"])
+@job("export", timeout=settings.RQ_TIMEOUTS["export_corpus"])
 def export_corpus(corpus_export: CorpusExport) -> None:
    _, db_path = tempfile.mkstemp(suffix=".db")
    try:

--- a/arkindex/documents/export/entity_link.sql
+++ b/arkindex/documents/export/entity_link.sql
-SELECT link.id, link.parent_id, link.child_id, link.role_id
-FROM documents_entitylink link
-INNER JOIN documents_entityrole role ON (link.role_id = role.id)
-WHERE role.corpus_id = '{corpus_id}'::uuid
--- a/arkindex/documents/export/entity_role.sql
+++ b/arkindex/documents/export/entity_role.sql
-SELECT id, parent_name, child_name, parent_type_id, child_type_id
-FROM documents_entityrole
-WHERE corpus_id = '{corpus_id}'::uuid
--- a/arkindex/documents/export/indexes.sql
+++ b/arkindex/documents/export/indexes.sql
@@ -26,13 +26,6 @@ CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
 CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
 CREATE INDEX transcription_entity_worker_run_id ON transcription_entity (worker_run_id);

-CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
-CREATE INDEX entity_link_child_id ON entity_link (child_id);
-CREATE INDEX entity_link_role_id ON entity_link (role_id);
-
-CREATE INDEX entity_role_parent_type_id ON entity_role (parent_type_id);
-CREATE INDEX entity_role_child_type_id ON entity_role (child_type_id);
-
 CREATE INDEX metadata_element_id ON metadata (element_id);
 CREATE INDEX metadata_entity_id ON metadata (entity_id);
 CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);

--- a/arkindex/documents/export/structure.sql
+++ b/arkindex/documents/export/structure.sql
 PRAGMA foreign_keys = ON;

-CREATE TABLE export_version AS SELECT 8 AS version;
+CREATE TABLE export_version AS SELECT 9 AS version;

 CREATE TABLE image_server (
    id INTEGER NOT NULL,
@@ -168,29 +168,6 @@ CREATE TABLE transcription_entity (
    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );

-CREATE TABLE entity_role (
-    id VARCHAR(37) NOT NULL,
-    parent_name VARCHAR(250) NOT NULL,
-    child_name VARCHAR(250) NOT NULL,
-    parent_type_id VARCHAR(37) NOT NULL,
-    child_type_id VARCHAR(37) NOT NULL,
-    PRIMARY KEY (id),
-    FOREIGN KEY (parent_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
-    FOREIGN KEY (child_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
-    UNIQUE (parent_name, child_name, parent_type_id, child_type_id)
-);
-
-CREATE TABLE entity_link (
-    id VARCHAR(37) NOT NULL,
-    parent_id VARCHAR(37) NOT NULL,
-    child_id VARCHAR(37) NOT NULL,
-    role_id VARCHAR(37) NOT NULL,
-    PRIMARY KEY (id),
-    FOREIGN KEY (parent_id) REFERENCES entity (id),
-    FOREIGN KEY (child_id) REFERENCES entity (id),
-    FOREIGN KEY (role_id) REFERENCES entity_role (id)
-);
-
 CREATE TABLE metadata (
    id VARCHAR(37) NOT NULL,
    element_id VARCHAR(37) NOT NULL,

--- a/arkindex/documents/fixtures/data.json
+++ b/arkindex/documents/fixtures/data.json
--- a/arkindex/documents/indexer.py
+++ b/arkindex/documents/indexer.py
@@ -59,7 +59,7 @@ INNER JOIN documents_elementtype elementtype ON (element.type_id = elementtype.i
 """


-class Indexer(object):
+class Indexer:

    # The query yielding all the elements to run on will look for all the child elements of all indexable elements
    # The joins can take a very long time, so the query gets split into one to fetch all the indexable elements,

--- a/arkindex/documents/management/commands/bootstrap.py
+++ b/arkindex/documents/management/commands/bootstrap.py
@@ -6,12 +6,10 @@ from django.core.management.base import BaseCommand
 from django.db import transaction
 from django.db.models import Q
 from django.db.utils import IntegrityError
-from rest_framework.authtoken.models import Token

 from arkindex.images.models import ImageServer
 from arkindex.ponos.models import Farm
 from arkindex.process.models import FeatureUsage, Repository, Worker, WorkerType, WorkerVersion, WorkerVersionState
-from arkindex.users.models import User

 # Constants used in architecture project
 UPLOADS_IMAGE_SERVER_ID = 12345
@@ -30,7 +28,6 @@ IMPORT_WORKER_SLUG = "file_import"
 IMPORT_WORKER_REPO = "https://gitlab.teklia.com/arkindex/tasks"
 IMPORT_WORKER_REVISION_MESSAGE = "File import worker bootstrap"
 IMPORT_WORKER_REVISION_AUTHOR = "Dev Bootstrap"
-ADMIN_API_TOKEN = "deadbeefTestToken"


 class Command(BaseCommand):
@@ -48,15 +45,6 @@ class Command(BaseCommand):
        """Helper to display error messages"""
        self.stdout.write(self.style.ERROR(f"❌ {msg}"))

-    def check_user(self, user):
-        """Ensure a user is admin"""
-        if user.is_admin:
-            self.success(f"Admin user for legacy worker API tokens {user} is valid")
-        else:
-            user.is_admin = True
-            user.save()
-            self.warn(f"Updated user {user} to admin")
-
    def create_image_server(self, id, url, bucket, region, display_name):
        try:
            server = ImageServer.objects.get(Q(id=id) | Q(url=url))
@@ -129,29 +117,6 @@ class Command(BaseCommand):
            )
            self.success("Ponos farm created")

-        # An admin API user with a specific token
-        try:
-            token = Token.objects.get(key=ADMIN_API_TOKEN)
-            self.check_user(token.user)
-        except Token.DoesNotExist:
-            # Create a new internal user
-            user, _ = User.objects.get_or_create(
-                email="internal+bootstrap@teklia.com",
-                defaults={
-                    "display_name": "Bootstrap Admin user",
-                    "is_admin": True,
-                }
-            )
-            self.success("Created internal user")
-            self.check_user(user)
-
-            # Finally create a specific token for that user
-            if hasattr(user, "auth_token"):
-                # Support One-To-One relation
-                user.auth_token.delete()
-            Token.objects.create(key=ADMIN_API_TOKEN, user=user)
-            self.success(f"Created token {ADMIN_API_TOKEN}")
-
        # an image server for local cantaloupe https://ark.localhost/iiif/2
        uploads_server = self.create_image_server(UPLOADS_IMAGE_SERVER_ID , UPLOADS_IMAGE_SERVER_URL, UPLOADS_IMAGE_SERVER_BUCKET , UPLOADS_IMAGE_SERVER_REGION , "Local IIIF server for user uploaded files through frontend")
        if uploads_server is None:

--- a/arkindex/documents/management/commands/build_fixtures.py
+++ b/arkindex/documents/management/commands/build_fixtures.py
@@ -49,7 +49,7 @@ class Command(BaseCommand):
        img5 = Image.objects.create(path="img5", width=1000, height=1000, server=imgsrv)
        img6 = Image.objects.create(path="img6", width=1000, height=1000, server=imgsrv)

-        # Create an admin, an internal and a normal user
+        # Create an admin and a normal user
        superuser = User.objects.create_superuser("root@root.fr", "Pa$$w0rd", display_name="Admin")
        superuser.verified_email = True
        superuser.save()

--- a/arkindex/documents/management/commands/cleanup.py
+++ b/arkindex/documents/management/commands/cleanup.py
@@ -14,10 +14,21 @@ from rq.utils import as_text
 from arkindex.documents.models import CorpusExport, CorpusExportState, Element
 from arkindex.images.models import Image, ImageServer
 from arkindex.ponos.models import Artifact, Task
-from arkindex.process.models import DataFile, GitRef, GitRefType, Process, WorkerVersion, WorkerVersionState
+from arkindex.process.models import (
+    CorpusWorkerVersion,
+    DataFile,
+    GitRef,
+    GitRefType,
+    Process,
+    Worker,
+    WorkerActivity,
+    WorkerRun,
+    WorkerVersion,
+    WorkerVersionState,
+)
 from arkindex.project.aws import s3
 from arkindex.project.rq_overrides import Job
-from arkindex.training.models import ModelVersion
+from arkindex.training.models import Model, ModelVersion
 from redis.exceptions import ConnectionError

 # Ponos artifacts use the path: <task id>/<path>
@@ -32,6 +43,9 @@ class Command(BaseCommand):
    help = "Clean up old corpus exports, trashed DataFiles, expired processes and S3 buckets"

    def handle(self, *args, **options):
+        # Cleaning up workers could free some artifacts, so clean them before artifacts
+        self.cleanup_archived_workers()
+
        self.cleanup_artifacts()

        self.cleanup_expired_processes()
@@ -48,6 +62,8 @@ class Command(BaseCommand):

        self.cleanup_ponos_logs()

+        self.cleanup_archived_models()
+
        self.cleanup_unlinked_model_versions()

        self.cleanup_rq_user_registries()
@@ -294,6 +310,71 @@ class Command(BaseCommand):

        self.stdout.write(self.style.SUCCESS("Successfully cleaned up orphaned Ponos logs."))

+    def cleanup_archived_workers(self):
+        """
+        Remove Worker instances that have been archived for longer than the configured worker cleanup delay
+        and that are not being used in any worker result.
+        """
+        self.stdout.write("Removing archived workers…")
+
+        workers = Worker.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.WORKER_CLEANUP_DELAY))
+        skipped, deleted = 0, 0
+        for worker in workers.iterator():
+            # There are both foreign keys for worker versions and worker runs on worker results.
+            # Some old results might only have a worker version ID, but when a worker run ID is set,
+            # the worker version ID is deduced from it, so we only have to check on the version.
+            if worker.versions.all().in_use():
+                skipped += 1
+                continue
+
+            # Skip any workers whose WorkerConfigurations are in use.
+            # This should never happen since we already filter on the WorkerVersions,
+            # but that could lead to deleting worker results when we didn't want to.
+            if WorkerRun.objects.filter(configuration__worker=worker).in_use():
+                self.stdout.write(self.style.WARNING(
+                    f"Worker {worker.name} ({worker.id}) does not have any worker versions used by worker results, "
+                    "but some of its worker configurations are in use."
+                ))
+                continue
+
+            self.stdout.write(f"Removing worker {worker.name} ({worker.id})")
+            worker.delete()
+            deleted += 1
+
+        if skipped:
+            self.stdout.write(f"Skipping {skipped} archived workers that have worker versions or configurations used in worker results.")
+        self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived workers."))
+
+    def cleanup_archived_models(self):
+        """
+        Remove Model instances that have been archived for longer than the configured model cleanup delay
+        and that are not being used in any worker result.
+        """
+        self.stdout.write("Removing archived models…")
+        models = Model.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.MODEL_CLEANUP_DELAY))
+
+        skipped, deleted = 0, 0
+        for model in models.iterator():
+            if WorkerRun.objects.filter(model_version__model=model).in_use():
+                skipped += 1
+                continue
+
+            self.stdout.write(f"Removing model {model.name} ({model.id})")
+
+            # Remove CorpusWorkerVersions and WorkerActivities first
+            # Those normally use SET_NULL, but this can cause the unique constraints to complain
+            # if there already are rows with a model version set to None.
+            WorkerActivity.objects.filter(model_version__model=model).delete()
+            CorpusWorkerVersion.objects.filter(model_version__model=model).delete()
+
+            model.delete()
+
+            deleted += 1
+
+        if skipped:
+            self.stdout.write(f"Skipping {skipped} archived models that have model versions used in worker results.")
+        self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived models."))
+
    def cleanup_unlinked_model_versions(self):
        self.stdout.write("Removing orphaned model versions archives…")
        bucket = s3.Bucket(settings.AWS_TRAINING_BUCKET)

--- a/arkindex/documents/management/commands/load_export.py
+++ b/arkindex/documents/management/commands/load_export.py
@@ -18,8 +18,6 @@ from arkindex.documents.models import (
    ElementPath,
    ElementType,
    Entity,
-    EntityLink,
-    EntityRole,
    EntityType,
    MetaData,
    MLClass,
@@ -40,7 +38,8 @@ from arkindex.process.models import (
 from arkindex.training.models import Dataset, DatasetElement, DatasetSet, Model
 from arkindex.users.models import Role, User

-EXPORT_VERSION = 8
+EXPORT_VERSION_MIN = 8
+EXPORT_VERSION_MAX = 9

 TABLE_NAMES = {
    "export_version",
@@ -52,8 +51,6 @@ TABLE_NAMES = {
    "element_path",
    "entity",
    "entity_type",
-    "entity_role",
-    "entity_link",
    "transcription",
    "transcription_entity",
    "metadata",
@@ -132,8 +129,6 @@ SQL_TOP_LEVEL_PATH_QUERY = """

 SQL_ENTITY_QUERY = "SELECT * FROM entity"
 SQL_ENTITY_TYPE_QUERY = "SELECT * FROM entity_type"
-SQL_ENTITY_ROLE_QUERY = "SELECT * FROM entity_role"
-SQL_ENTITY_LINK_QUERY = "SELECT * FROM entity_link"

 SQL_TRANSCRIPTION_QUERY = "SELECT * FROM transcription"
 SQL_TRANSCRIPTION_ENTITY_QUERY = "SELECT * FROM transcription_entity"
@@ -249,24 +244,6 @@ class Command(BaseCommand):
            corpus=self.corpus
        )]

-    def convert_entity_roles(self, row):
-        return [EntityRole(
-            id=row["id"],
-            parent_name=row["parent_name"],
-            child_name=row["child_name"],
-            parent_type_id=row["parent_type_id"],
-            child_type_id=row["child_type_id"],
-            corpus=self.corpus
-        )]
-
-    def convert_entity_links(self, row):
-        return [EntityLink(
-            id=row["id"],
-            parent_id=row["parent_id"],
-            child_id=row["child_id"],
-            role_id=row["role_id"],
-        )]
-
    def convert_transcriptions(self, row):
        return [Transcription(
            id=row["id"],
@@ -543,13 +520,16 @@ class Command(BaseCommand):

        # Check database tables
        db_results = self.db.execute(SQL_TABLES_QUERY).fetchall()
-        if not set([table["name"] for table in db_results]) == TABLE_NAMES:
-            raise CommandError(f"The SQLite database {db_path} is not a correct Arkindex export")
+        # Database's tables must be a superset of TABLE_NAMES, so we keep compatibility when removing things
+        if (missing := TABLE_NAMES - set([table["name"] for table in db_results])):
+            raise CommandError(f"The SQLite database {db_path} is missing some expected tables: {sorted(missing)}")

        # Check export version
        db_results = self.db.execute(SQL_VERSION_QUERY).fetchall()
-        if len(db_results) != 1 or db_results[0]["version"] != EXPORT_VERSION:
-            raise CommandError(f"The SQLite database {db_path} does not have the correct export version")
+        if len(db_results) != 1 or not (
+            EXPORT_VERSION_MIN <= db_results[0]["version"] <= EXPORT_VERSION_MAX
+        ):
+            raise CommandError(f"The SQLite database {db_path} does not have a supported export version")

        # Retrieve corpus name
        date = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M")
@@ -597,11 +577,9 @@ class Command(BaseCommand):
            self.bulk_create_objects(ElementPath, self.convert_element_paths, SQL_ELEMENT_PATH_QUERY, ignore_conflicts=False)
            self.bulk_create_objects(ElementPath, self.convert_top_level_paths, SQL_TOP_LEVEL_PATH_QUERY, ignore_conflicts=False)

-            # Create entities, entity types, roles and links
+            # Create entities and entity types
            self.bulk_create_objects(EntityType, self.convert_entity_types, SQL_ENTITY_TYPE_QUERY)
            self.bulk_create_objects(Entity, self.convert_entities, SQL_ENTITY_QUERY)
-            self.bulk_create_objects(EntityRole, self.convert_entity_roles, SQL_ENTITY_ROLE_QUERY)
-            self.bulk_create_objects(EntityLink, self.convert_entity_links, SQL_ENTITY_LINK_QUERY)

            # Create transcriptions and transcription entities
            self.bulk_create_objects(Transcription, self.convert_transcriptions, SQL_TRANSCRIPTION_QUERY)

--- a/arkindex/documents/management/commands/merge_entities.py
+++ b/arkindex/documents/management/commands/merge_entities.py
@@ -135,24 +135,6 @@ class Command(BaseCommand):
            """)
            self.stdout.write(f"Updated {cursor.rowcount} TranscriptionEntities.")

-            self.stdout.write("Updating child entity IDs on entity links…")
-            cursor.execute("""
-            UPDATE documents_entitylink
-            SET child_id = keep_id
-            FROM duplicated_entities
-            WHERE child_id = remove_id;
-            """)
-            self.stdout.write(f"Updated {cursor.rowcount} entity links.")
-
-            self.stdout.write("Updating parent entity IDs on entity links…")
-            cursor.execute("""
-            UPDATE documents_entitylink
-            SET parent_id = keep_id
-            FROM duplicated_entities
-            WHERE parent_id = remove_id;
-            """)
-            self.stdout.write(f"Updated {cursor.rowcount} entity links.")
-
            self.stdout.write("Removing duplicate entities…")
            cursor.execute("""
            DELETE FROM documents_entity
No results found