From 85384931cd631ace5904b0a5ade84fdd6f3e1c69 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Mon, 16 Oct 2023 13:06:16 +0000
Subject: [PATCH] Merge the Transkribus and file imports

---
 .gitlab-ci.yml                                |   1 -
 .isort.cfg                                    |   1 -
 Dockerfile                                    |  10 -
 Dockerfile.binary                             |  12 +-
 arkindex/documents/fixtures/data.json         |   8 -
 arkindex/documents/models.py                  |   8 +-
 arkindex/process/api.py                       |  56 +-----
 arkindex/process/builder.py                   |  31 ---
 arkindex/process/managers.py                  |  13 --
 .../migrations/0018_remove_transkribus.py     |  22 +++
 arkindex/process/models.py                    |   4 -
 arkindex/process/serializers/imports.py       |  64 ++----
 arkindex/process/tests/test_processes.py      |  54 +++--
 .../process/tests/test_transkribus_import.py  | 185 ------------------
 arkindex/project/api_v1.py                    |   4 -
 arkindex/project/checks.py                    |  33 ----
 arkindex/project/config.py                    |   6 -
 arkindex/project/default_corpus.py            |  24 ---
 arkindex/project/mixins.py                    |   2 +-
 arkindex/project/settings.py                  |   5 -
 arkindex/project/tests/__init__.py            |   4 -
 .../tests/config_samples/defaults.yaml        |   5 -
 .../tests/config_samples/override.yaml        |   5 -
 arkindex/project/tests/test_checks.py         |  77 --------
 .../process_elements_filter_ml_class.sql      |   2 -
 .../process_elements_filter_type.sql          |   2 -
 .../process_elements_top_level.sql            |   2 -
 .../process_elements_with_image.sql           |   2 -
 arkindex/users/admin.py                       |   2 +-
 arkindex/users/api.py                         |  15 --
 .../0002_remove_user_transkribus_email.py     |  17 ++
 arkindex/users/models.py                      |   5 -
 arkindex/users/serializers.py                 |  39 ----
 .../tests/test_update_transkribus_email.py    |  43 ----
 requirements.txt                              |   2 -
 35 files changed, 97 insertions(+), 668 deletions(-)
 create mode 100644 arkindex/process/migrations/0018_remove_transkribus.py
 delete mode 100644 arkindex/process/tests/test_transkribus_import.py
 create mode 100644 arkindex/users/migrations/0002_remove_user_transkribus_email.py
 delete mode 100644 arkindex/users/tests/test_update_transkribus_email.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 04d07ce46d..447ebc9b06 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,7 +19,6 @@ include:
 
   before_script:
     # Custom line to install our own deps from Git using GitLab CI credentials
-    - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/transkribus#egg=transkribus-client"
     - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/license#egg=teklia-license"
     - pip install -r tests-requirements.txt
     - "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH"
diff --git a/.isort.cfg b/.isort.cfg
index 3fd5b5e994..0b8bd7b946 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -7,5 +7,4 @@ use_parentheses = True
 line_length = 120
 
 default_section=FIRSTPARTY
-known_first_party = transkribus
 known_third_party = SolrClient,bleach,boto3,botocore,cryptography,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml
diff --git a/Dockerfile b/Dockerfile
index 5601f26760..49917b1f4c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,22 +6,12 @@ ADD . build
 RUN cd build && python3 setup.py sdist
 
 FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia
-ARG TRANSKRIBUS_BRANCH=master
-ARG TRANSKRIBUS_ID=63
 ARG LICENSE_BRANCH=master
 ARG LICENSE_ID=37
 
 # Auth token expires on 01/07/2024
 ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa"
 
-# Install transkribus-client from private repo
-RUN \
-  mkdir /tmp/transkribus && \
-  wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus/archive.tar.gz && \
-  tar --strip-components=1 -xvf /tmp/transkribus/archive.tar.gz -C /tmp/transkribus && \
-  cd /tmp/transkribus && pip install --disable-pip-version-check --no-cache-dir --quiet . && \
-  rm -rf /tmp/transkribus
-
 # Install teklia-license from private repo
 RUN \
   mkdir /tmp/teklia-license && \
diff --git a/Dockerfile.binary b/Dockerfile.binary
index 5d13f2b757..b53fa6d59b 100644
--- a/Dockerfile.binary
+++ b/Dockerfile.binary
@@ -5,8 +5,6 @@ RUN apt-get update && apt-get install --no-install-recommends -y build-essential
 
 RUN pip install nuitka
 
-ARG TRANSKRIBUS_BRANCH=master
-ARG TRANSKRIBUS_ID=63
 ARG LICENSE_BRANCH=master
 ARG LICENSE_ID=37
 
@@ -22,13 +20,6 @@ ADD arkindex /usr/share/arkindex
 ADD base/requirements.txt /tmp/requirements-base-arkindex.txt
 ADD requirements.txt /tmp/requirements-arkindex.txt
 
-# Install transkribus-client from private repo
-RUN \
-  mkdir /tmp/transkribus && \
-  wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus.tar.gz && \
-  tar --strip-components=1 -xvf /tmp/transkribus.tar.gz -C /tmp/transkribus && \
-  mv /tmp/transkribus/transkribus /usr/share
-
 # Install teklia-license from private repo
 RUN \
   mkdir /tmp/teklia-license && \
@@ -38,7 +29,7 @@ RUN \
   cp /tmp/teklia-license/requirements.txt /tmp/requirements-license-arkindex.txt
 
 # Build full requirements, removing relative or remote references to arkindex projects
-RUN cat /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|transkribus-client|teklia-license' > /requirements.txt
+RUN cat /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|teklia-license' > /requirements.txt
 
 # List all management commands
 RUN find /usr/share/arkindex/*/management -name '*.py' -not -name '__init__.py' > /commands.txt
@@ -56,7 +47,6 @@ ENV NUITKA_RESOURCE_MODE=linker
 RUN python -m nuitka \
       --nofollow-imports \
       --include-package=arkindex \
-      --include-package=transkribus \
       --include-package=teklia_license \
       --show-progress \
       --lto=yes \
diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json
index 59fe545ccb..9025e50c09 100644
--- a/arkindex/documents/fixtures/data.json
+++ b/arkindex/documents/fixtures/data.json
@@ -19,7 +19,6 @@
         "element_type": null,
         "name_contains": null,
         "load_children": false,
-        "collection_id": null,
         "use_cache": false,
         "use_gpu": false,
         "template": null,
@@ -52,7 +51,6 @@
         "element_type": null,
         "name_contains": null,
         "load_children": false,
-        "collection_id": null,
         "use_cache": false,
         "use_gpu": false,
         "template": null,
@@ -85,7 +83,6 @@
         "element_type": null,
         "name_contains": null,
         "load_children": false,
-        "collection_id": null,
         "use_cache": false,
         "use_gpu": false,
         "template": null,
@@ -118,7 +115,6 @@
         "element_type": null,
         "name_contains": null,
         "load_children": false,
-        "collection_id": null,
         "use_cache": false,
         "use_gpu": false,
         "template": null,
@@ -1771,7 +1767,6 @@
         "last_login": null,
         "email": "root@root.fr",
         "display_name": "Admin",
-        "transkribus_email": null,
         "is_active": true,
         "is_admin": true,
         "verified_email": true,
@@ -1787,7 +1782,6 @@
         "last_login": null,
         "email": "user@user.fr",
         "display_name": "Test user",
-        "transkribus_email": null,
         "is_active": true,
         "is_admin": false,
         "verified_email": true,
@@ -1803,7 +1797,6 @@
         "last_login": null,
         "email": "user2@user.fr",
         "display_name": "Test user write",
-        "transkribus_email": null,
         "is_active": true,
         "is_admin": false,
         "verified_email": true,
@@ -1819,7 +1812,6 @@
         "last_login": null,
         "email": "user3@user.fr",
         "display_name": "Test user read",
-        "transkribus_email": null,
         "is_active": true,
         "is_admin": false,
         "verified_email": true,
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 4f39c1764f..6a0265d3a3 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -22,7 +22,7 @@ from arkindex.documents.dates import InterpretedDateMixin
 from arkindex.documents.deletion import delete_element
 from arkindex.documents.managers import CorpusManager, ElementManager
 from arkindex.project.aws import S3FileMixin
-from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES, DEFAULT_TRANSKRIBUS_TYPES
+from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES
 from arkindex.project.fields import ArrayConcat, ArrayField, LinearRingField
 from arkindex.project.models import IndexableModel
 
@@ -72,12 +72,6 @@ class Corpus(IndexableModel):
             for values in DEFAULT_CORPUS_TYPES
         )
 
-    def create_default_transkribus_types(self):
-        self.types.bulk_create(
-            ElementType(corpus=self, **values)
-            for values in DEFAULT_TRANSKRIBUS_TYPES
-        )
-
 
 class ElementType(models.Model):
     id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False)
diff --git a/arkindex/process/api.py b/arkindex/process/api.py
index b7d19b26ff..3e8acd4f73 100644
--- a/arkindex/process/api.py
+++ b/arkindex/process/api.py
@@ -79,10 +79,8 @@ from arkindex.process.serializers.git import ExternalRepositorySerializer, Revis
 from arkindex.process.serializers.imports import (
     ApplyProcessTemplateSerializer,
     CorpusProcessSerializer,
-    CreateImportTranskribusErrorResponseSerializer,
     CreateProcessTemplateSerializer,
     FilesProcessSerializer,
-    ImportTranskribusSerializer,
     ProcessDetailsSerializer,
     ProcessElementLightSerializer,
     ProcessElementSerializer,
@@ -478,7 +476,7 @@ class FilesProcess(CreateAPIView):
         folder_type = serializer.validated_data.get('folder_type')
         element_type = serializer.validated_data['element_type']
 
-        if folder and folder.corpus != corpus:
+        if folder and folder.corpus_id != corpus.id:
             # The files' corpus is already validated as writable
             raise ValidationError({'__all__': ['Element and files are in different corpora']})
 
@@ -1538,58 +1536,6 @@ class WorkerRunDetails(ProcessACLMixin, RetrieveUpdateDestroyAPIView):
         return super().perform_destroy(instance)
 
 
-@extend_schema_view(post=extend_schema(
-    operation_id='CreateImportTranskribus',
-    tags=['process'],
-    responses={
-        201: ProcessSerializer,
-        400: CreateImportTranskribusErrorResponseSerializer
-    },
-    examples=[OpenApiExample(
-        status_codes=['400'],
-        response_only=True,
-        name="user-permission",
-        value={'collection_id': "User user@example.com is not a member of the collection 1"},
-        description="An error where the user is not a member of the collection."
-    )]
-))
-class ImportTranskribus(CreateAPIView):
-    """
-    Start a data import from Transkribus email and collection ID.
-    """
-
-    permission_classes = (IsVerified, )
-    serializer_class = ImportTranskribusSerializer
-
-    def create(self, *args, **kwargs):
-        if not settings.ARKINDEX_FEATURES['transkribus']:
-            raise ValidationError(['Transkribus import is unavailable due to the transkribus feature being disabled.'])
-        super().create(*args, **kwargs)
-        return Response(
-            status=status.HTTP_201_CREATED,
-            data=ProcessSerializer(self.process, context={'request': self.request}).data,
-        )
-
-    def perform_create(self, serializer):
-        collection_id = serializer.validated_data['collection_id']
-
-        # Create corpus
-        corpus = Corpus.objects.create(
-            name=f"Transkribus collection n°{collection_id}",
-        )
-        corpus.memberships.create(user=self.request.user, level=Role.Admin.value)
-        corpus.create_default_transkribus_types()
-
-        # Start a process with thumbnails generation
-        self.process = corpus.processes.create(
-            creator=self.request.user,
-            mode=ProcessMode.Transkribus,
-            collection_id=collection_id,
-            generate_thumbnails=True,
-        )
-        self.process.run()
-
-
 @extend_schema_view(get=extend_schema(
     operation_id='ListProcessElements',
     parameters=[
diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py
index 1827286c31..a083c91621 100644
--- a/arkindex/process/builder.py
+++ b/arkindex/process/builder.py
@@ -297,37 +297,6 @@ class ProcessBuilder(object):
             ) for run in worker_runs
         ])
 
-    def build_transkribus(self):
-        from arkindex.process.models import WorkerVersion
-
-        worker_run = self._create_fake_worker_run(worker_version=WorkerVersion.objects.transkribus_version)
-        env = {
-            **self.base_env,
-            'TRANSKRIBUS_EMAIL': settings.TRANSKRIBUS_EMAIL,
-            'TRANSKRIBUS_PASSWORD': settings.TRANSKRIBUS_PASSWORD,
-            'ARKINDEX_WORKER_RUN_ID': str(worker_run.id),
-        }
-        transkribus_task_slug = 'export_transkribus'
-        self._build_task(
-            command=f'python -m arkindex_tasks.export_transkribus {self.process.collection_id}',
-            slug=transkribus_task_slug,
-            env=env,
-        )
-
-        import_task_slug = 'import_arkindex'
-        self._build_task(
-            command='python -m arkindex_tasks.import_transkribus '
-                    f'--job-path /data/export_transkribus/transkribus_export_job.json '
-                    f'--corpus {self.process.corpus.id}',
-            slug=import_task_slug,
-            env=env,
-        )
-        self.tasks_parents[import_task_slug].append(transkribus_task_slug)
-
-        self._add_thumbnails(import_task_slug=import_task_slug)
-
-        self._create_worker_versions_cache([(settings.TRANSKRIBUS_WORKER_VERSION, None, None)])
-
     def build_s3(self):
         from arkindex.process.models import WorkerVersion
 
diff --git a/arkindex/process/managers.py b/arkindex/process/managers.py
index 3b40101e45..0696f66f52 100644
--- a/arkindex/process/managers.py
+++ b/arkindex/process/managers.py
@@ -141,19 +141,6 @@ class WorkerVersionManager(Manager):
             .get(id=settings.IMPORTS_WORKER_VERSION)
         )
 
-    @cached_property
-    def transkribus_version(self):
-        """
-        WorkerVersion used for all Transkribus imports.
-        """
-        return (
-            self
-            # Required by WorkerRun.build_summary
-            .select_related('worker', 'revision')
-            .prefetch_related('revision__refs')
-            .get(id=settings.TRANSKRIBUS_WORKER_VERSION)
-        )
-
 
 class WorkerManager(BaseACLManager):
 
diff --git a/arkindex/process/migrations/0018_remove_transkribus.py b/arkindex/process/migrations/0018_remove_transkribus.py
new file mode 100644
index 0000000000..b854e5dfce
--- /dev/null
+++ b/arkindex/process/migrations/0018_remove_transkribus.py
@@ -0,0 +1,22 @@
+# Generated by Django 4.1.7 on 2023-10-09 14:12
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('process', '0017_remove_process_model_version_and_more'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='process',
+            name='collection_id',
+        ),
+        migrations.RunSQL(
+            "UPDATE process_process SET mode = 'files' WHERE mode = 'transkribus'",
+            reverse_sql=migrations.RunSQL.noop,
+            elidable=True,
+        )
+    ]
diff --git a/arkindex/process/models.py b/arkindex/process/models.py
index fd6a1fc68c..c2c9dd47d9 100644
--- a/arkindex/process/models.py
+++ b/arkindex/process/models.py
@@ -50,7 +50,6 @@ class ProcessMode(Enum):
     Repository = 'repository'
     IIIF = 'iiif'
     Workers = 'workers'
-    Transkribus = 'transkribus'
     Template = 'template'
     S3 = 's3'
     Training = 'training'
@@ -137,9 +136,6 @@ class Process(IndexableModel):
         ],
     )
 
-    # Used to define the collection ID and entities import for Transkribus import
-    collection_id = models.PositiveIntegerField(null=True, blank=True)
-
     # Use elements cache in Workers processes
     use_cache = models.BooleanField(default=False)
 
diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py
index 3f449fe200..122a25d6a2 100644
--- a/arkindex/process/serializers/imports.py
+++ b/arkindex/process/serializers/imports.py
@@ -24,7 +24,6 @@ from arkindex.project.validators import MaxValueValidator
 from arkindex.training.models import ModelVersionState
 from arkindex.users.models import Role
 from arkindex.users.utils import get_max_level
-from transkribus import TranskribusAPI
 
 
 class ProcessLightSerializer(serializers.ModelSerializer):
@@ -242,7 +241,10 @@ class ProcessListSerializer(ProcessLightSerializer):
 class FilesProcessSerializer(serializers.Serializer):
 
     mode = EnumField(ProcessMode, default=ProcessMode.Files)
-    files = serializers.PrimaryKeyRelatedField(queryset=DataFile.objects.all(), many=True)
+    files = serializers.PrimaryKeyRelatedField(
+        queryset=DataFile.objects.select_related('corpus'),
+        many=True,
+    )
     folder_id = serializers.UUIDField(required=False, allow_null=True)
     folder_type = serializers.SlugField(required=False, allow_null=True)
     element_type = serializers.SlugField()
@@ -253,7 +255,7 @@ class FilesProcessSerializer(serializers.Serializer):
         'unique_corpus': 'Imports can only run on files from a single corpus',
         'corpus_read_only': 'Cannot write in corpus',
         'folder_not_found': 'Folder does not exist',
-        'image_or_pdf': 'File imports can only import images or PDF documents',
+        'unsupported_content_type': 'File imports can only import images, PDF documents or ZIP archives',
         'iiif_only': 'IIIF imports can only import IIIF documents',
         'folder_required': 'Either folder_type, folder_id or both are required',
         'iiif_folder_required': 'IIIF imports require both folder_type and element_type',
@@ -274,7 +276,7 @@ class FilesProcessSerializer(serializers.Serializer):
         if len(corpora) > 1:
             self.fail('unique_corpus')
         corpus = corpora.pop()
-        if corpus not in Corpus.objects.writable(self.context['request'].user):
+        if not Corpus.objects.writable(self.context['request'].user).filter(id=corpus.id).exists():
             self.fail('corpus_read_only')
         return files
 
@@ -291,8 +293,12 @@ class FilesProcessSerializer(serializers.Serializer):
 
     def validate(self, data):
         if data['mode'] == ProcessMode.Files:
-            if not all(f.content_type == 'application/pdf' or f.content_type.startswith('image/') for f in data['files']):
-                self.fail('image_or_pdf')
+            if not all(
+                f.content_type in ('application/pdf', 'application/zip', 'application/x-zip-compressed')
+                or f.content_type.startswith('image/')
+                for f in data['files']
+            ):
+                self.fail('unsupported_content_type')
 
         elif data['mode'] == ProcessMode.IIIF:
             if not set(f.content_type.split(';')[0] for f in data['files']) <= {'application/json', 'application/ld+json'}:
@@ -621,52 +627,6 @@ class CorpusProcessSerializer(serializers.Serializer):
         return data
 
 
-class ImportTranskribusSerializer(serializers.Serializer):
-    """
-    Serialize a Transkribus import
-    """
-    collection_id = serializers.IntegerField(min_value=1)
-
-    def validate(self, data):
-        collection_id = data.get('collection_id')
-
-        # Check Transkribus email
-        transkribus_email = self.context['request'].user.transkribus_email
-        if not transkribus_email:
-            raise serializers.ValidationError(
-                {"__all__": ["You have not registered your transkribus email"]}
-            )
-
-        # Login as Arkindex user on transkribus
-        try:
-            transkribus_client = TranskribusAPI(email=settings.TRANSKRIBUS_EMAIL, password=settings.TRANSKRIBUS_PASSWORD)
-        except Exception:
-            raise serializers.ValidationError(
-                {"__all__" : [f"Failed to login on Transkribus as {settings.TRANSKRIBUS_EMAIL}"]}
-            )
-
-        # Check Arkindex's right
-        try:
-            users = transkribus_client.list_user_collection(collection_id)
-        except Exception:
-            raise serializers.ValidationError(
-                {"collection_id" : [f"User {settings.TRANSKRIBUS_EMAIL} is not a member of the collection {collection_id}"]}
-            )
-
-        # Check user's right
-        user = next(filter(lambda user: user["email"] == transkribus_email, users), None)
-        if not user:
-            raise serializers.ValidationError(
-                {"collection_id": [f"User {transkribus_email} is not a member of the collection {collection_id}"]}
-            )
-
-        return data
-
-
-class CreateImportTranskribusErrorResponseSerializer(serializers.Serializer):
-    collection_id = serializers.CharField(required=False, help_text="Errors that occurred during collection ID field validation.")
-
-
 class ProcessElementLightSerializer(serializers.ModelSerializer):
     """
     Serialises an Element, using optimized query for ListProcessElement
diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py
index 1fb032ce5d..f7e4bad4f8 100644
--- a/arkindex/process/tests/test_processes.py
+++ b/arkindex/process/tests/test_processes.py
@@ -58,6 +58,16 @@ class TestProcesses(FixtureAPITestCase):
             size=42,
             content_type='application/json',
         )
+        cls.zip_df = cls.corpus.files.create(
+            name='test.zip',
+            size=1337,
+            content_type='application/zip',
+        )
+        cls.windows_zip_df = cls.corpus.files.create(
+            name='windows.zip',
+            size=1337,
+            content_type='application/x-zip-compressed',
+        )
         cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug='page')
         cls.volume_type = ElementType.objects.get(corpus=cls.corpus, slug='volume')
         cls.ml_class = cls.corpus.ml_classes.create(name='clafoutis')
@@ -1902,20 +1912,35 @@ class TestProcesses(FixtureAPITestCase):
         self.assertIsNone(process.element)
 
     @override_settings(IMPORTS_WORKER_VERSION=None)
-    def test_from_files_image_and_pdf(self):
+    def test_from_files_multiple_types(self):
         self.client.force_login(self.user)
-        with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
+
+        with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)), self.assertNumQueries(30):
             response = self.client.post(reverse('api:files-process'), {
-                'files': [str(self.pdf_df.id), str(self.img_df.id)],
+                'files': [
+                    str(self.pdf_df.id),
+                    str(self.img_df.id),
+                    str(self.zip_df.id),
+                    str(self.windows_zip_df.id),
+                ],
                 'mode': 'files',
                 'folder_type': 'volume',
                 'element_type': 'page',
             }, format='json')
             self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+
         data = response.json()
         process = Process.objects.get(id=data['id'])
         self.assertEqual(process.mode, ProcessMode.Files)
-        self.assertListEqual(list(process.files.all()), [self.img_df, self.pdf_df])
+        self.assertQuerysetEqual(
+            process.files.all(), [
+                self.img_df,
+                self.pdf_df,
+                self.zip_df,
+                self.windows_zip_df,
+            ],
+            ordered=False,
+        )
         self.assertIsNone(process.element)
 
     @override_settings(
@@ -1925,7 +1950,7 @@ class TestProcesses(FixtureAPITestCase):
     def test_from_files_iiif(self):
         self.client.force_login(self.user)
 
-        with self.assertNumQueries(28), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
+        with self.assertNumQueries(27), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
             response = self.client.post(reverse('api:files-process'), {
                 'files': [str(self.iiif_df.id)],
                 'mode': 'iiif',
@@ -2033,14 +2058,17 @@ class TestProcesses(FixtureAPITestCase):
 
     def test_from_files_files_wrong_type(self):
         self.client.force_login(self.user)
-        response = self.client.post(reverse('api:files-process'), {
-            'files': [str(self.iiif_df.id)],
-            'folder_type': 'volume',
-            'element_type': 'page',
-            'mode': 'files',
-        }, format='json')
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images or PDF documents']})
+
+        with self.assertNumQueries(6):
+            response = self.client.post(reverse('api:files-process'), {
+                'files': [str(self.iiif_df.id)],
+                'folder_type': 'volume',
+                'element_type': 'page',
+                'mode': 'files',
+            }, format='json')
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+        self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images, PDF documents or ZIP archives']})
 
     @override_settings(IMPORTS_WORKER_VERSION=None)
     def test_from_files_folder_id(self):
diff --git a/arkindex/process/tests/test_transkribus_import.py b/arkindex/process/tests/test_transkribus_import.py
deleted file mode 100644
index 282bf27c2e..0000000000
--- a/arkindex/process/tests/test_transkribus_import.py
+++ /dev/null
@@ -1,185 +0,0 @@
-from unittest.mock import patch
-
-from django.test import override_settings
-from django.urls import reverse
-from rest_framework import status
-
-from arkindex.ponos.models import State
-from arkindex.process.models import Process, ProcessMode, Repository, Revision, Worker, WorkerType, WorkerVersion
-from arkindex.project.default_corpus import DEFAULT_TRANSKRIBUS_TYPES
-from arkindex.project.tests import FixtureAPITestCase
-from arkindex.users.models import Role, User
-
-
-@override_settings(TRANSKRIBUS_EMAIL="arkindex@teklia.com", TRANSKRIBUS_PASSWORD=None)
-class TestTranskribusImport(FixtureAPITestCase):
-    """
-    Test transkribus import
-    """
-
-    @classmethod
-    def setUpTestData(cls):
-        super().setUpTestData()
-        User.objects.update(transkribus_email="nope@nope.fr")
-        cls.repo = Repository.objects.create(url='http://fakery')
-        cls.tr_type = WorkerType.objects.get(slug='import')
-        cls.transkribus_worker = Worker.objects.create(
-            repository=cls.repo,
-            name='Transkribus Import',
-            slug='transkribus_import',
-            type=cls.tr_type
-        )
-        cls.transkribus_rev = Revision.objects.create(
-            hash='1234',
-            message='commit commit',
-            author='Wolpertinger',
-            repo=cls.repo,
-        )
-        cls.transkribus_worker_version = WorkerVersion.objects.create(
-            worker=cls.transkribus_worker,
-            revision=cls.transkribus_rev,
-            configuration={}
-        )
-
-    @override_settings(ARKINDEX_FEATURES={"transkribus": True})
-    def test_requires_login(self):
-        response = self.client.post(reverse("api:import-transkribus"))
-        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
-
-    @override_settings(ARKINDEX_FEATURES={"transkribus": False})
-    def test_requires_flag(self):
-        self.client.force_login(self.user)
-        response = self.client.post(reverse("api:import-transkribus"), {
-            "collection_id": "12345",
-        }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), ["Transkribus import is unavailable due to the transkribus feature being disabled."])
-
-    @override_settings(ARKINDEX_FEATURES={"transkribus": True})
-    def test_requires_transkribus_email(self):
-        User.objects.update(transkribus_email=None)
-        self.client.force_login(self.user)
-        response = self.client.post(reverse("api:import-transkribus"), {
-            "collection_id": "12345",
-        }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), {"__all__": ["You have not registered your transkribus email"]})
-
-    @override_settings(ARKINDEX_FEATURES={"transkribus": True})
-    @patch("transkribus.TranskribusAPI.list_user_collection")
-    def test_arkindex_has_not_access(self, mock_transkribus):
-        # Not a mistake: Transkribus client raises `Exception` directly when it runs out of retries
-        mock_transkribus.side_effect = Exception("401 Unauthorized")
-        self.client.force_login(self.user)
-        response = self.client.post(reverse("api:import-transkribus"), {
-            "collection_id": "12345",
-        }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), {"collection_id" : ["User arkindex@teklia.com is not a member of the collection 12345"]})
-
-    @override_settings(ARKINDEX_FEATURES={"transkribus": True})
-    @patch("transkribus.TranskribusAPI.list_user_collection")
-    def test_user_has_not_access(self, mock_transkribus):
-        mock_transkribus.return_value = [{"email": "arkindex@teklia.com"}]
-
-        self.client.force_login(self.user)
-        response = self.client.post(reverse("api:import-transkribus"), {
-            "collection_id": "12345",
-        }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), {"collection_id": ["User nope@nope.fr is not a member of the collection 12345"]})
-
-    @override_settings(
-        PONOS_DEFAULT_ENV={},
-        ARKINDEX_FEATURES={"transkribus": True},
-        TRANSKRIBUS_EMAIL="arkindex@teklia.com",
-        TRANSKRIBUS_PASSWORD="averysecretpassword",
-        ARKINDEX_TASKS_IMAGE='registry.teklia.com/tasks',
-    )
-    @patch("arkindex.process.serializers.imports.TranskribusAPI")
-    def test_create_import(self, mock_transkribus):
-        mock_transkribus.return_value.list_user_collection.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}]
-        self.client.force_login(self.user)
-
-        with self.settings(TRANSKRIBUS_WORKER_VERSION=self.transkribus_worker_version.id):
-            response = self.client.post(reverse("api:import-transkribus"), {
-                "collection_id": "12345",
-            }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-
-        data = response.json()
-        process = Process.objects.get(id=data["id"])
-
-        self.assertEqual(process.mode, ProcessMode.Transkribus)
-        self.assertEqual(process.collection_id, 12345)
-        self.assertEqual(process.state, State.Unscheduled)
-
-        corpus = process.corpus
-        self.assertEqual(corpus.name, "Transkribus collection n°12345")
-        self.assertEqual(corpus.description, "")
-        self.assertEqual(corpus.public, False)
-        # Assert defaults types are set on the new corpus
-        self.assertCountEqual(
-            list(corpus.types.values(
-                "slug",
-                "display_name",
-                "folder",
-                "color",
-            )),
-            [{
-                "folder": False,
-                **values
-            } for values in DEFAULT_TRANSKRIBUS_TYPES]
-        )
-
-        right = corpus.memberships.get(user=self.user)
-        self.assertTrue(right.level >= Role.Admin.value)
-
-        worker_run = process.worker_runs.get()
-        self.assertEqual(worker_run.version, self.transkribus_worker_version)
-        self.assertListEqual(worker_run.parents, [])
-        self.assertIsNone(worker_run.configuration_id)
-        self.assertIsNone(worker_run.model_version_id)
-
-        tasks_slugs = list(process.tasks.order_by('slug').values_list('slug', flat=True))
-        self.assertEqual(len(tasks_slugs), 3)
-        self.assertEqual(tasks_slugs, ['export_transkribus', 'import_arkindex', 'thumbnails'])
-
-        export_task = process.tasks.get(slug='export_transkribus')
-        self.assertEqual(export_task.command, 'python -m arkindex_tasks.export_transkribus 12345')
-        self.assertEqual(export_task.image, 'registry.teklia.com/tasks')
-        self.assertEqual(list(export_task.parents.all()), [])
-
-        import_task = process.tasks.get(slug='import_arkindex')
-        self.assertEqual(import_task.command, f'python -m arkindex_tasks.import_transkribus --job-path /data/export_transkribus/transkribus_export_job.json --corpus {corpus.id}')
-        self.assertEqual(import_task.image, 'registry.teklia.com/tasks')
-        self.assertEqual(list(import_task.parents.all()), [export_task])
-
-        thumbnails_task = process.tasks.get(slug='thumbnails')
-        self.assertEqual(thumbnails_task.command, 'python3 -m arkindex_tasks.generate_thumbnails /data/import_arkindex/elements.json')
-        self.assertEqual(thumbnails_task.image, 'registry.teklia.com/tasks')
-        self.assertEqual(list(thumbnails_task.parents.all()), [import_task])
-
-    @override_settings(
-        PONOS_DEFAULT_ENV={},
-        ARKINDEX_FEATURES={'transkribus': True},
-    )
-    @patch("transkribus.TranskribusAPI.list_user_collection")
-    def test_corpus_worker_version_list_transkribus(self, mock_transkribus):
-        mock_transkribus.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}]
-        self.client.force_login(self.user)
-        with self.settings(TRANSKRIBUS_WORKER_VERSION=self.transkribus_worker_version.id):
-            response = self.client.post(reverse("api:import-transkribus"), {
-                "collection_id": "12345",
-            }, format="json")
-        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        data = response.json()
-        process = Process.objects.get(id=data["id"])
-        self.assertEqual(process.mode, ProcessMode.Transkribus)
-        corpus = process.corpus
-        with self.assertNumQueries(7):
-            response = self.client.get(reverse('api:corpus-versions', kwargs={'pk': corpus.id}))
-            self.assertEqual(response.status_code, status.HTTP_200_OK)
-
-        self.assertEqual(len(response.json()['results']), 1)
-        self.assertEqual(response.json()['results'][0]['worker_version']['id'], str(self.transkribus_worker_version.id))
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index 46852deefe..be0895c0cd 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -90,7 +90,6 @@ from arkindex.process.api import (
     DataFileRetrieve,
     FilesProcess,
     GitRepositoryImportHook,
-    ImportTranskribus,
     ListProcessElements,
     ProcessDatasetManage,
     ProcessDatasets,
@@ -157,7 +156,6 @@ from arkindex.users.api import (
     UserEmailVerification,
     UserMemberships,
     UserRetrieve,
-    UserTranskribus,
 )
 
 api = [
@@ -285,7 +283,6 @@ api = [
     # Import workflows
     path('process/', ProcessList.as_view(), name='process-list'),
     path('process/fromfiles/', FilesProcess.as_view(), name='files-process'),
-    path('process/transkribus/', ImportTranskribus.as_view(), name='import-transkribus'),
     path('process/<uuid:pk>/', ProcessDetails.as_view(), name='process-details'),
     path('process/<uuid:pk>/retry/', ProcessRetry.as_view(), name='process-retry'),
     path('process/<uuid:pk>/start/', StartProcess.as_view(), name='process-start'),
@@ -342,7 +339,6 @@ api = [
     path('user/token/', UserEmailVerification.as_view(), name='user-token'),
     path('user/password-reset/', PasswordReset.as_view(), name='password-reset'),
     path('user/password-reset/confirm/', PasswordResetConfirm.as_view(), name='password-reset-confirm'),
-    path('user/transkribus/', UserTranskribus.as_view(), name='user-transkribus'),
 
     # Rights management
     path('groups/', GroupsCreate.as_view(), name='groups-create'),
diff --git a/arkindex/project/checks.py b/arkindex/project/checks.py
index 22fdfa8c2c..9db3ec795c 100644
--- a/arkindex/project/checks.py
+++ b/arkindex/project/checks.py
@@ -11,8 +11,6 @@ import sys
 
 from django.core.checks import Critical, Error, Warning, register
 
-from arkindex.process.models import WorkerVersion
-
 
 def only_runserver(func):
     "Decorator to run a system check only when running a dev server or deploying"
@@ -296,34 +294,3 @@ def botocore_config_check(*args, **kwargs):
             ])
 
     return warnings
-
-
-@register()
-@only_runserver
-def transkribus_check(*args, **kwargs):
-    from django.conf import settings
-    errors = []
-    transkribus_feature = settings.ARKINDEX_FEATURES['transkribus']
-    transkribus_config = {
-        'TRANSKRIBUS_EMAIL': 'Transkribus account email address',
-        'TRANSKRIBUS_PASSWORD': 'Transkribus account password',
-        'TRANSKRIBUS_WORKER_VERSION': 'Transkribus worker version'
-    }
-    if not transkribus_feature:
-        return []
-    for setting, descr in transkribus_config.items():
-        value = getattr(settings, setting)
-        if not value:
-            errors.append(Error(
-                f'No {descr} is set; all Transkribus-related features will fail.',
-                hint=f'settings.{setting} = {value!r}',
-                id='arkindex.E012',
-            ))
-    worker_version = settings.TRANSKRIBUS_WORKER_VERSION
-    if worker_version and not WorkerVersion.objects.filter(pk=worker_version).exists():
-        errors.append(Error(
-            'Transkribus worker version does not exist.',
-            hint=f'settings.TRANSKRIBUS_WORKER_VERSION = {worker_version!r}',
-            id='arkindex.E012',
-        ))
-    return errors
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 8cae2633c5..baab5f0e90 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -132,11 +132,6 @@ def get_settings_parser(base_dir):
     doorbell_parser.add_option('id', type=str, default=None)
     doorbell_parser.add_option('appkey', type=str, default=None)
 
-    transkribus_parser = parser.add_subparser('transkribus', default={})
-    transkribus_parser.add_option('email', type=str, default=None)
-    transkribus_parser.add_option('password', type=str, default=None)
-    transkribus_parser.add_option('worker_version', type=uuid.UUID, default=None)
-
     gitlab_parser = parser.add_subparser('gitlab', default={})
     gitlab_parser.add_option('app_id', type=str, default=None)
     gitlab_parser.add_option('app_secret', type=str, default=None)
@@ -196,7 +191,6 @@ def get_settings_parser(base_dir):
     features_parser.add_option('signup', type=bool, default=True)
     features_parser.add_option('selection', type=bool, default=True)
     features_parser.add_option('search', type=bool, default=False)
-    features_parser.add_option('transkribus', type=bool, default=True)
 
     banner_parser = parser.add_subparser('banner', default={})
     banner_parser.add_option('message', type=str, default=None)
diff --git a/arkindex/project/default_corpus.py b/arkindex/project/default_corpus.py
index 691fe36012..827e480c8f 100644
--- a/arkindex/project/default_corpus.py
+++ b/arkindex/project/default_corpus.py
@@ -37,27 +37,3 @@ DEFAULT_CORPUS_TYPES = [
         'color': '642aeb'
     }
 ]
-
-DEFAULT_TRANSKRIBUS_TYPES = [
-    {
-        'slug': 'volume',
-        'display_name': 'Volume',
-        'folder': True,
-        'color': '28b62c',
-    },
-    {
-        'slug': 'page',
-        'display_name': 'Page',
-        'color': '28b62c',
-    },
-    {
-        'slug': 'text_line',
-        'display_name': 'Text line',
-        'color': '115eed',
-    },
-    {
-        'slug': 'paragraph',
-        'display_name': 'Paragraph',
-        'color': '642aeb'
-    }
-]
diff --git a/arkindex/project/mixins.py b/arkindex/project/mixins.py
index ac622d6895..51b0c1cb52 100644
--- a/arkindex/project/mixins.py
+++ b/arkindex/project/mixins.py
@@ -205,7 +205,7 @@ class ProcessACLMixin(ACLMixin):
         # Return the access level on a single process
         access_levels = []
         if process.corpus_id:
-            # Use project right. Covers Images, IIIF, PDF, Repository (IIIF), Elements (Workers) and Transkribus process modes
+            # On all processes with a corpus, include corpus ACLs
             access_levels.append(get_max_level(self.user, process.corpus))
         elif process.mode == ProcessMode.Repository and process.revision_id:
             # Use repository right in case of a workers docker build
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 002fe5cc36..1ae3f31a15 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -309,11 +309,6 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60)
 # check_images sample size when checking all servers
 CHECK_IMAGES_SAMPLE_SIZE = 20
 
-# Transkribus credentials
-TRANSKRIBUS_EMAIL = conf['transkribus']['email']
-TRANSKRIBUS_PASSWORD = conf['transkribus']['password']
-TRANSKRIBUS_WORKER_VERSION = conf['transkribus']['worker_version']
-
 # GitLab OAuth
 GITLAB_APP_ID = conf['gitlab']['app_id']
 GITLAB_APP_SECRET = conf['gitlab']['app_secret']
diff --git a/arkindex/project/tests/__init__.py b/arkindex/project/tests/__init__.py
index b26dcfc06d..5e6fa6ab9d 100644
--- a/arkindex/project/tests/__init__.py
+++ b/arkindex/project/tests/__init__.py
@@ -147,10 +147,6 @@ class FixtureMixin(object):
             pass
 
         # Do the same for WorkerVersion cached properties
-        try:
-            del WorkerVersion.objects.transkribus_version
-        except AttributeError:
-            pass
         try:
             del WorkerVersion.objects.imports_version
         except AttributeError:
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 6ece4c85c6..ce3e095ae9 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -39,7 +39,6 @@ features:
   search: false
   selection: true
   signup: true
-  transkribus: true
 gitlab:
   app_id: null
   app_secret: null
@@ -109,9 +108,5 @@ static:
   mirador_url: null
   root_path: null
   universal_viewer_url: null
-transkribus:
-  email: null
-  password: null
-  worker_version: null
 worker_activity_timeout: 3600
 workers_max_chunks: 10
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index 0fbb32b3b5..8fd0071792 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -51,7 +51,6 @@ features:
   search: true
   selection: false
   signup: false
-  transkribus: false
 gitlab:
   app_id: a
   app_secret: b
@@ -126,9 +125,5 @@ static:
   mirador_url: gopher://mirador/
   root_path: /
   universal_viewer_url: gopher://uv/
-transkribus:
-  email: nope@nope
-  password: superSecret
-  worker_version: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
 worker_activity_timeout: 3600
 workers_max_chunks: 42
diff --git a/arkindex/project/tests/test_checks.py b/arkindex/project/tests/test_checks.py
index 0557764316..1d07c80e03 100644
--- a/arkindex/project/tests/test_checks.py
+++ b/arkindex/project/tests/test_checks.py
@@ -369,80 +369,3 @@ class ChecksTestCase(TestCase):
         settings.INGEST_S3_ENDPOINT = None
         settings.INGEST_S3_REGION = 'something'
         self.assertCountEqual(botocore_config_check(), [])
-
-    @override_settings()
-    def test_transkribus_checks(self):
-        from arkindex.process.models import Repository, Revision, Worker, WorkerType, WorkerVersion
-        from arkindex.project.checks import transkribus_check
-
-        repo = Repository.objects.create(url='http://fakery')
-        tr_type = WorkerType.objects.create(slug='import', display_name='Import')
-        transkribus_worker = Worker.objects.create(
-            repository=repo,
-            name='Transkribus Import',
-            slug='transkribus_import',
-            type=tr_type
-        )
-        transkribus_rev = Revision.objects.create(
-            hash='1234',
-            message='commit commit',
-            author='Wolpertinger',
-            repo=repo,
-        )
-        transkribus_worker_version = WorkerVersion.objects.create(
-            worker=transkribus_worker,
-            revision=transkribus_rev,
-            configuration={}
-        )
-
-        with self.settings(
-            ARKINDEX_FEATURES={
-                'transkribus': True
-            },
-            TRANSKRIBUS_EMAIL='mail@mail.com',
-            TRANSKRIBUS_PASSWORD='passpass',
-            TRANSKRIBUS_WORKER_VERSION=None
-        ):
-            self.assertCountEqual(transkribus_check(), [
-                Error(
-                    'No Transkribus worker version is set; all Transkribus-related features will fail.',
-                    hint='settings.TRANSKRIBUS_WORKER_VERSION = None',
-                    id='arkindex.E012',
-                )
-            ])
-
-        with self.settings(
-            ARKINDEX_FEATURES={
-                'transkribus': True
-            },
-            TRANSKRIBUS_EMAIL='mail@mail.com',
-            TRANSKRIBUS_PASSWORD='passpass',
-            TRANSKRIBUS_WORKER_VERSION='bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'
-        ):
-            self.assertCountEqual(transkribus_check(), [
-                Error(
-                    'Transkribus worker version does not exist.',
-                    hint="settings.TRANSKRIBUS_WORKER_VERSION = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'",
-                    id='arkindex.E012',
-                )
-            ])
-
-        with self.settings(
-            ARKINDEX_FEATURES={
-                'transkribus': True
-            },
-            TRANSKRIBUS_EMAIL='mail@mail.com',
-            TRANSKRIBUS_PASSWORD='passpass',
-            TRANSKRIBUS_WORKER_VERSION=transkribus_worker_version.id
-        ):
-            self.assertListEqual(transkribus_check(), [])
-
-        with self.settings(
-            ARKINDEX_FEATURES={
-                'transkribus': False
-            },
-            TRANSKRIBUS_EMAIL='mail@mail.com',
-            TRANSKRIBUS_PASSWORD='passpass',
-            TRANSKRIBUS_WORKER_VERSION=None
-        ):
-            self.assertListEqual(transkribus_check(), [])
diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql
index c32976537c..0dd85b16c9 100644
--- a/arkindex/sql_validation/process_elements_filter_ml_class.sql
+++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql
@@ -3,7 +3,6 @@ SELECT "users_user"."id",
        "users_user"."last_login",
        "users_user"."email",
        "users_user"."display_name",
-       "users_user"."transkribus_email",
        "users_user"."is_active",
        "users_user"."is_admin",
        "users_user"."verified_email",
@@ -33,7 +32,6 @@ SELECT "process_process"."id",
        "process_process"."load_children",
        "process_process"."generate_thumbnails",
        "process_process"."chunks",
-       "process_process"."collection_id",
        "process_process"."use_cache",
        "process_process"."use_gpu",
        "process_process"."template_id",
diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql
index 5587a67095..4b2502d959 100644
--- a/arkindex/sql_validation/process_elements_filter_type.sql
+++ b/arkindex/sql_validation/process_elements_filter_type.sql
@@ -3,7 +3,6 @@ SELECT "users_user"."id",
        "users_user"."last_login",
        "users_user"."email",
        "users_user"."display_name",
-       "users_user"."transkribus_email",
        "users_user"."is_active",
        "users_user"."is_admin",
        "users_user"."verified_email",
@@ -33,7 +32,6 @@ SELECT "process_process"."id",
        "process_process"."load_children",
        "process_process"."generate_thumbnails",
        "process_process"."chunks",
-       "process_process"."collection_id",
        "process_process"."use_cache",
        "process_process"."use_gpu",
        "process_process"."template_id",
diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql
index d51c384522..7d2bfe7d00 100644
--- a/arkindex/sql_validation/process_elements_top_level.sql
+++ b/arkindex/sql_validation/process_elements_top_level.sql
@@ -3,7 +3,6 @@ SELECT "users_user"."id",
        "users_user"."last_login",
        "users_user"."email",
        "users_user"."display_name",
-       "users_user"."transkribus_email",
        "users_user"."is_active",
        "users_user"."is_admin",
        "users_user"."verified_email",
@@ -33,7 +32,6 @@ SELECT "process_process"."id",
        "process_process"."load_children",
        "process_process"."generate_thumbnails",
        "process_process"."chunks",
-       "process_process"."collection_id",
        "process_process"."use_cache",
        "process_process"."use_gpu",
        "process_process"."template_id",
diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql
index 7677dbb14a..d7a841886b 100644
--- a/arkindex/sql_validation/process_elements_with_image.sql
+++ b/arkindex/sql_validation/process_elements_with_image.sql
@@ -3,7 +3,6 @@ SELECT "users_user"."id",
        "users_user"."last_login",
        "users_user"."email",
        "users_user"."display_name",
-       "users_user"."transkribus_email",
        "users_user"."is_active",
        "users_user"."is_admin",
        "users_user"."verified_email",
@@ -33,7 +32,6 @@ SELECT "process_process"."id",
        "process_process"."load_children",
        "process_process"."generate_thumbnails",
        "process_process"."chunks",
-       "process_process"."collection_id",
        "process_process"."use_cache",
        "process_process"."use_gpu",
        "process_process"."template_id",
diff --git a/arkindex/users/admin.py b/arkindex/users/admin.py
index 5a2e118db6..aac4399da5 100644
--- a/arkindex/users/admin.py
+++ b/arkindex/users/admin.py
@@ -66,7 +66,7 @@ class UserAdmin(BaseUserAdmin):
     list_display = ('email', 'display_name', 'is_admin', 'created')
     list_filter = ('is_admin', )
     fieldsets = (
-        (None, {'fields': ('email', 'display_name', 'verified_email', 'password', 'transkribus_email')}),
+        (None, {'fields': ('email', 'display_name', 'verified_email', 'password')}),
         ('Permissions', {'fields': ('is_admin', 'is_active')}),
     )
     # add_fieldsets is not a standard ModelAdmin attribute. UserAdmin
diff --git a/arkindex/users/api.py b/arkindex/users/api.py
index 719112da39..141f218611 100644
--- a/arkindex/users/api.py
+++ b/arkindex/users/api.py
@@ -28,7 +28,6 @@ from rest_framework.generics import (
     RetrieveAPIView,
     RetrieveDestroyAPIView,
     RetrieveUpdateDestroyAPIView,
-    UpdateAPIView,
 )
 from rest_framework.permissions import SAFE_METHODS
 from rest_framework.response import Response
@@ -56,7 +55,6 @@ from arkindex.users.serializers import (
     PasswordResetConfirmSerializer,
     PasswordResetSerializer,
     UserSerializer,
-    UserTranskribusSerializer,
 )
 from arkindex.users.utils import RightContent, get_max_level
 
@@ -352,19 +350,6 @@ class PasswordResetConfirm(CreateAPIView):
     serializer_class = PasswordResetConfirmSerializer
 
 
-@extend_schema(tags=['users'])
-class UserTranskribus(UpdateAPIView):
-    """
-    Update and validate Transkribus account
-    Only the user's email is stored in our database
-    """
-    permission_classes = (IsVerified, )
-    serializer_class = UserTranskribusSerializer
-
-    def get_object(self):
-        return self.request.user
-
-
 class OAuthSignIn(APIView):
     """
     Start the OAuth authentication code flow for a given provider
diff --git a/arkindex/users/migrations/0002_remove_user_transkribus_email.py b/arkindex/users/migrations/0002_remove_user_transkribus_email.py
new file mode 100644
index 0000000000..9a79a5e21b
--- /dev/null
+++ b/arkindex/users/migrations/0002_remove_user_transkribus_email.py
@@ -0,0 +1,17 @@
+# Generated by Django 4.1.7 on 2023-10-09 14:12
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('users', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='user',
+            name='transkribus_email',
+        ),
+    ]
diff --git a/arkindex/users/models.py b/arkindex/users/models.py
index f4f4fbd98b..3a92035320 100644
--- a/arkindex/users/models.py
+++ b/arkindex/users/models.py
@@ -77,11 +77,6 @@ class User(AbstractBaseUser):
         db_collation='case_insensitive',
     )
     display_name = models.CharField(max_length=120)
-    transkribus_email = models.EmailField(
-        max_length=255,
-        null=True,
-        blank=True,
-    )
     is_active = models.BooleanField(default=True)
     is_admin = models.BooleanField(default=False)
     verified_email = models.BooleanField(default=False)
diff --git a/arkindex/users/serializers.py b/arkindex/users/serializers.py
index b2112274be..839c8696f7 100644
--- a/arkindex/users/serializers.py
+++ b/arkindex/users/serializers.py
@@ -14,9 +14,6 @@ from arkindex.project.mixins import WorkerACLMixin
 from arkindex.project.serializer_fields import EnumField
 from arkindex.users.models import Group, OAuthCredentials, OAuthStatus, Right, Role, User
 from arkindex.users.utils import RightContent, get_max_level
-from transkribus import TranskribusAPI
-
-transkribus_client = TranskribusAPI()
 
 
 def validate_user_password(user, data):
@@ -73,7 +70,6 @@ class SimpleUserSerializer(serializers.ModelSerializer):
 class UserSerializer(SimpleUserSerializer):
 
     features = serializers.SerializerMethodField(read_only=True)
-    transkribus_import_email = serializers.SerializerMethodField(read_only=True)
 
     class Meta(SimpleUserSerializer.Meta):
         fields = SimpleUserSerializer.Meta.fields + (
@@ -82,8 +78,6 @@ class UserSerializer(SimpleUserSerializer):
             'is_admin',
             'auth_token',
             'features',
-            'transkribus_email',
-            'transkribus_import_email',
         )
         extra_kwargs = {
             'id': {'read_only': True},
@@ -98,7 +92,6 @@ class UserSerializer(SimpleUserSerializer):
             'verified_email': {'read_only': True},
             'is_admin': {'read_only': True},
             'auth_token': {'read_only': True},
-            'transkribus_email': {'read_only': True},
         }
 
     @extend_schema_field(inline_serializer(
@@ -111,10 +104,6 @@ class UserSerializer(SimpleUserSerializer):
     def get_features(self, *args, **kwargs):
         return settings.ARKINDEX_FEATURES
 
-    @extend_schema_field(serializers.EmailField(allow_null=True))
-    def get_transkribus_import_email(self, *args, **kwargs):
-        return settings.TRANSKRIBUS_EMAIL
-
     def update(self, instance, validated_data):
         if 'password' in validated_data:
             instance.set_password(validated_data.pop('password'))
@@ -198,34 +187,6 @@ class PasswordResetConfirmSerializer(serializers.Serializer):
         user.save()
 
 
-class UserTranskribusSerializer(serializers.Serializer):
-    """
-    A serializer that allows to verify Transkribus credentials
-    and to save the Transkribus email
-    """
-    transkribus_email = serializers.EmailField()
-    transkribus_password = serializers.CharField(write_only=True, style={'input_type': 'password'})
-
-    def validate(self, data):
-        transkribus_email = data.get('transkribus_email')
-        transkribus_password = data.pop('transkribus_password')
-
-        # Check that the credentials are correct
-        try:
-            transkribus_client.login(email=transkribus_email, password=transkribus_password)
-        except Exception:
-            raise serializers.ValidationError(
-                {"__all__" : ["The email or password is incorrect"]}
-            )
-
-        return data
-
-    def update(self, instance, validated_data):
-        instance.transkribus_email = validated_data.get('transkribus_email', None)
-        instance.save()
-        return instance
-
-
 class JobSerializer(serializers.Serializer):
     """
     Serializers a RQ job.
diff --git a/arkindex/users/tests/test_update_transkribus_email.py b/arkindex/users/tests/test_update_transkribus_email.py
deleted file mode 100644
index 8bb9d2c618..0000000000
--- a/arkindex/users/tests/test_update_transkribus_email.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from unittest.mock import patch
-
-from django.test import override_settings
-from django.urls import reverse
-from rest_framework import status
-
-from arkindex.project.tests import FixtureAPITestCase
-
-
-@override_settings(TRANSKRIBUS_EMAIL=None, TRANSKRIBUS_PASSWORD=None)
-class TestUpdateTranskribusEmail(FixtureAPITestCase):
-
-    def test_requires_login(self):
-        response = self.client.patch(reverse('api:user-transkribus'), {
-            'transkribus_email': 'nope@nope.com',
-            'transkribus_password': '42'
-        }, format='json')
-        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
-
-    @patch('transkribus.TranskribusAPI.login')
-    def test_wrong_credentials(self, mock_transkribus):
-        mock_transkribus.side_effect = Exception()
-        self.client.force_login(self.user)
-        response = self.client.patch(reverse('api:user-transkribus'), {
-            'transkribus_email': 'nope@nope.com',
-            'transkribus_password': '42'
-        }, format='json')
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertEqual(response.json(), {"__all__": ["The email or password is incorrect"]})
-
-    @patch('transkribus.TranskribusAPI.login')
-    def test_update_transkribus_email(self, mock_transkribus):
-        mock_transkribus.return_value = {"email": "nope@nope.com"}
-        self.client.force_login(self.user)
-
-        self.assertIsNone(self.user.transkribus_email)
-        response = self.client.patch(reverse('api:user-transkribus'), {
-            'transkribus_email': 'nope@nope.com',
-            'transkribus_password': '42'
-        }, format='json')
-        self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.user.refresh_from_db()
-        self.assertEqual(self.user.transkribus_email, "nope@nope.com")
diff --git a/requirements.txt b/requirements.txt
index fc18c65f64..5c6c517da4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,6 +22,4 @@ teklia-license==0.1.1
 git+https://gitlab.teklia.com/arkindex/license.git#egg=teklia-license
 teklia-toolbox==0.1.3
 tenacity==8.2.2
-transkribus-client>=0.1.1
-git+https://gitlab.teklia.com/arkindex/transkribus.git#egg=transkribus-client
 uritemplate==4.1.1
-- 
GitLab