From 28013427e2c050f0b88f80e939f864e8d1186219 Mon Sep 17 00:00:00 2001
From: manon blanco <blanco@teklia.com>
Date: Wed, 26 Aug 2020 11:58:48 +0000
Subject: [PATCH] New endpoint to import a Transkribus collection

---
 .gitlab-ci.yml                                |   1 +
 arkindex/dataimport/api.py                    |  57 +++++++-
 .../0017_dataimport_collection_id.py          |  18 +++
 arkindex/dataimport/models.py                 |  21 +++
 arkindex/dataimport/serializers/imports.py    |  37 ++++++
 .../tests/test_transkribus_import.py          | 123 ++++++++++++++++++
 arkindex/documents/models.py                  |   8 +-
 arkindex/project/api_v1.py                    |   5 +-
 arkindex/project/config.py                    |   5 +
 arkindex/project/default_corpus.py            |  22 ++++
 arkindex/project/openapi/patch.yml            |  18 +++
 arkindex/project/settings.py                  |   4 +
 .../tests/config_samples/defaults.yaml        |   4 +
 .../tests/config_samples/override.yaml        |   4 +
 arkindex/users/admin.py                       |   2 +-
 arkindex/users/api.py                         |  18 ++-
 .../migrations/0005_user_transkribus_email.py |  18 +++
 arkindex/users/models.py                      |   5 +
 arkindex/users/serializers.py                 |  45 +++++++
 arkindex/users/tests/test_providers.py        |   3 +-
 .../tests/test_update_transkribus_email.py    |  39 ++++++
 requirements.txt                              |   2 +
 22 files changed, 453 insertions(+), 6 deletions(-)
 create mode 100644 arkindex/dataimport/migrations/0017_dataimport_collection_id.py
 create mode 100644 arkindex/dataimport/tests/test_transkribus_import.py
 create mode 100644 arkindex/users/migrations/0005_user_transkribus_email.py
 create mode 100644 arkindex/users/tests/test_update_transkribus_email.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ce4e002372..0e313ad29d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -15,6 +15,7 @@ stages:
     # Custom line to install our own deps from Git using GitLab CI credentials
     - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/common#egg=arkindex-common"
     - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/ponos#egg=ponos-server"
+    - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/transkribus#egg=transkribus-client"
     - pip install -r tests-requirements.txt codecov
     - "echo 'database: {host: postgres, port: 5432}' > $CONFIG_PATH"
 
diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py
index 43e4eb0e11..a82f342e0e 100644
--- a/arkindex/dataimport/api.py
+++ b/arkindex/dataimport/api.py
@@ -25,7 +25,8 @@ from arkindex.dataimport.serializers.git import \
     RepositorySerializer, RepositoryStartImportSerializer, ExternalRepositorySerializer, RevisionSerializer
 from arkindex.dataimport.serializers.imports import (
     DataImportLightSerializer, DataImportSerializer,
-    DataImportFromFilesSerializer, ElementsWorkflowSerializer, WorkerRunLightSerializer, WorkerRunSerializer
+    DataImportFromFilesSerializer, ElementsWorkflowSerializer, WorkerRunLightSerializer, WorkerRunSerializer,
+    ImportTranskribusSerializer
 )
 from arkindex.dataimport.serializers.workers import WorkerSerializer, WorkerVersionSerializer
 from arkindex.users.models import OAuthCredentials
@@ -716,3 +717,57 @@ class WorkerRunDetails(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
         instance.dataimport.worker_runs.filter(parents__contains=[instance.id]).update(parents=ArrayRemove('parents', instance.id))
 
         return super().perform_destroy(instance)
+
+
+class ImportTranskribus(CreateAPIView):
+    """
+    Start a data import from Transkribus email and collection ID.
+    """
+
+    permission_classes = (IsVerified, )
+    serializer_class = ImportTranskribusSerializer
+    openapi_overrides = {
+        'operationId': 'CreateImportTranskribus',
+        'description': 'Create a data import from Transkribus collection ID.',
+        'tags': ['imports'],
+        'responses': {
+            '201': {
+                'content': {
+                    'application/json': {
+                        'schema': AutoSchema()._map_serializer(DataImportSerializer())
+                    }
+                }
+            }
+        }
+    }
+
+    def create(self, *args, **kwargs):
+        if not settings.ARKINDEX_FEATURES['transkribus']:
+            raise ValidationError(['Transkribus import is unavailable due to the transkribus feature being disabled.'])
+        super().create(*args, **kwargs)
+        return Response(
+            status=status.HTTP_201_CREATED,
+            data=DataImportSerializer(self.dataimport, context={'request': self.request}).data,
+        )
+
+    def perform_create(self, serializer):
+        collection_id = serializer.validated_data['collection_id']
+
+        # Create corpus
+        corpus = Corpus.objects.create(
+            name=f"Transkribus collection n°{collection_id}",
+        )
+        corpus.corpus_right.create(
+            user=self.request.user,
+            can_write=True,
+            can_admin=True,
+        )
+        corpus.create_default_transkribus_types()
+
+        # Create dataimport
+        self.dataimport = corpus.imports.create(
+            creator=self.request.user,
+            mode=DataImportMode.Transkribus,
+            collection_id=collection_id
+        )
+        self.dataimport.start()
diff --git a/arkindex/dataimport/migrations/0017_dataimport_collection_id.py b/arkindex/dataimport/migrations/0017_dataimport_collection_id.py
new file mode 100644
index 0000000000..9767eae85e
--- /dev/null
+++ b/arkindex/dataimport/migrations/0017_dataimport_collection_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 2.2.13 on 2020-08-03 13:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('dataimport', '0016_new_jsonfield'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='dataimport',
+            name='collection_id',
+            field=models.PositiveIntegerField(blank=True, null=True),
+        ),
+    ]
diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py
index 7111c0a6cb..1b32ce999e 100644
--- a/arkindex/dataimport/models.py
+++ b/arkindex/dataimport/models.py
@@ -69,6 +69,9 @@ class DataImport(IndexableModel):
         related_name='imports',
     )
 
+    # Used to define the collection ID for Transkribus import
+    collection_id = models.PositiveIntegerField(null=True, blank=True)
+
     class Meta:
         ordering = ['corpus', '-created']
 
@@ -153,6 +156,19 @@ class DataImport(IndexableModel):
                 },
             }
 
+        elif self.mode == DataImportMode.Transkribus:
+            args = [
+                'python', '-m', 'arkindex_tasks.import_transkribus',
+                str(self.collection_id),
+                '--corpus', str(self.corpus.id),
+            ]
+            tasks = {
+                import_task_name: {
+                    'image': settings.ARKINDEX_TASKS_IMAGE,
+                    'command': ' '.join(args),
+                }
+            }
+
         else:
             tasks = {
                 import_task_name: {
@@ -203,6 +219,11 @@ class DataImport(IndexableModel):
         # Build the workflow in db
         recipe = settings.PONOS_RECIPE.copy()
         recipe['tasks'] = tasks
+        if self.mode == DataImportMode.Transkribus:
+            if 'env' not in recipe:
+                recipe['env'] = {}
+            recipe['env']['TRANSKRIBUS_EMAIL'] = settings.TRANSKRIBUS_EMAIL
+            recipe['env']['TRANSKRIBUS_PASSWORD'] = settings.TRANSKRIBUS_PASSWORD
         return Workflow.objects.create(recipe=yaml.dump(recipe))
 
     def start(self, ml_tools=[], chunks=None, thumbnails=False):
diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py
index a324aaee74..824d4b983d 100644
--- a/arkindex/dataimport/serializers/imports.py
+++ b/arkindex/dataimport/serializers/imports.py
@@ -10,6 +10,9 @@ from arkindex.documents.serializers.elements import ElementSlimSerializer
 from arkindex.dataimport.models import DataImport, DataFile, WorkerRun, WorkerVersion
 from arkindex.dataimport.serializers.git import RevisionSerializer
 from arkindex.dataimport.serializers.ml_tool import MLToolTaskSerializer
+from transkribus import TranskribusAPI
+
+transkribus_client = TranskribusAPI(email=settings.TRANSKRIBUS_EMAIL, password=settings.TRANSKRIBUS_PASSWORD)
 
 
 class DataImportLightSerializer(serializers.ModelSerializer):
@@ -299,3 +302,37 @@ class WorkerRunSerializer(WorkerRunLightSerializer):
             raise serializers.ValidationError({'__all__': ["Can't update a WorkerRun related to a DataImport that has already started"]})
 
         return data
+
+
+class ImportTranskribusSerializer(serializers.Serializer):
+    """
+    Serialize a Transkribus import
+    """
+    collection_id = serializers.IntegerField(min_value=1)
+
+    def validate(self, data):
+        collection_id = data.get('collection_id')
+
+        # Check Transkribus email
+        transkribus_email = self.context['request'].user.transkribus_email
+        if not transkribus_email:
+            raise serializers.ValidationError(
+                {"__all__": ["You have not register your transkribus email"]}
+            )
+
+        # Check Arkindex's right
+        try:
+            users = transkribus_client.list_user_collection(collection_id)
+        except Exception:
+            raise serializers.ValidationError(
+                {"collection_id" : [f"User {settings.TRANSKRIBUS_EMAIL} is not a member of the collection {collection_id}"]}
+            )
+
+        # Check user's right
+        user = next(filter(lambda user: user["email"] == transkribus_email, users), None)
+        if not user:
+            raise serializers.ValidationError(
+                {"collection_id": [f"User {transkribus_email} is not a member of the collection {collection_id}"]}
+            )
+
+        return data
diff --git a/arkindex/dataimport/tests/test_transkribus_import.py b/arkindex/dataimport/tests/test_transkribus_import.py
new file mode 100644
index 0000000000..4f8c119f8c
--- /dev/null
+++ b/arkindex/dataimport/tests/test_transkribus_import.py
@@ -0,0 +1,123 @@
+from unittest.mock import patch
+from django.urls import reverse
+from django.test import override_settings
+from django.conf import settings
+from rest_framework import status
+from arkindex_common.enums import DataImportMode
+from arkindex.dataimport.models import DataImport
+from arkindex.project.tests import FixtureAPITestCase
+from arkindex.project.default_corpus import DEFAULT_TRANSKRIBUS_TYPES
+from ponos.models import State
+from arkindex.users.models import User
+import yaml
+
+
+@override_settings(TRANSKRIBUS_EMAIL="arkindex@teklia.com", TRANSKRIBUS_PASSWORD=None)
+class TestTranskribusImport(FixtureAPITestCase):
+    """
+    Test transkribus import
+    """
+
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+        User.objects.update(transkribus_email="nope@nope.fr")
+
+    def test_requires_login(self):
+        response = self.client.post(reverse("api:import-transkribus"))
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
+    @override_settings(ARKINDEX_FEATURES={"transkribus": False})
+    def test_requires_flag(self):
+        self.client.force_login(self.user)
+        response = self.client.post(reverse("api:import-transkribus"), {
+            "collection_id": "12345",
+        }, format="json")
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), ["Transkribus import is unavailable due to the transkribus feature being disabled."])
+
+    def test_requires_transkribus_email(self):
+        User.objects.update(transkribus_email=None)
+        self.client.force_login(self.user)
+        response = self.client.post(reverse("api:import-transkribus"), {
+            "collection_id": "12345",
+        }, format="json")
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), {"__all__": ["You have not register your transkribus email"]})
+
+    def test_arkindex_has_not_access(self):
+        self.client.force_login(self.user)
+        response = self.client.post(reverse("api:import-transkribus"), {
+            "collection_id": "12345",
+        }, format="json")
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), {"collection_id" : ["User arkindex@teklia.com is not a member of the collection 12345"]})
+
+    @patch("transkribus.TranskribusAPI.list_user_collection")
+    def test_user_has_not_access(self, mock_transkribus):
+        mock_transkribus.return_value = [{"email": "arkindex@teklia.com"}]
+
+        self.client.force_login(self.user)
+        response = self.client.post(reverse("api:import-transkribus"), {
+            "collection_id": "12345",
+        }, format="json")
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), {"collection_id": ["User nope@nope.fr is not a member of the collection 12345"]})
+
+    @override_settings(
+        PONOS_RECIPE={}
+    )
+    @patch("transkribus.TranskribusAPI.list_user_collection")
+    def test_create_import(self, mock_transkribus):
+        mock_transkribus.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}]
+
+        self.client.force_login(self.user)
+        response = self.client.post(reverse("api:import-transkribus"), {
+            "collection_id": "12345",
+        }, format="json")
+        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+        data = response.json()
+        dataimport = DataImport.objects.get(id=data["id"])
+        self.assertEqual(dataimport.mode, DataImportMode.Transkribus)
+        self.assertEqual(dataimport.collection_id, 12345)
+        corpus = dataimport.corpus
+        right = corpus.corpus_right.get(user=self.user)
+        self.assertTrue(right.can_write)
+        self.assertTrue(right.can_admin)
+        self.assertEqual(corpus.name, "Transkribus collection n°12345")
+        self.assertEqual(corpus.description, "")
+        self.assertEqual(corpus.public, False)
+        # Assert defaults types are set on the new corpus
+        self.assertCountEqual(
+            list(corpus.types.values(
+                "slug",
+                "display_name",
+                "folder",
+                "allowed_transcription"
+            )),
+            [{
+                "folder": False,
+                "allowed_transcription": None,
+                **values
+            } for values in DEFAULT_TRANSKRIBUS_TYPES]
+        )
+        self.assertEqual(dataimport.state, State.Unscheduled)
+        self.assertIsNotNone(dataimport.workflow)
+        recipe = yaml.safe_load(dataimport.workflow.recipe)
+        self.assertDictEqual(recipe, {
+            'env': {
+                'TRANSKRIBUS_EMAIL': settings.TRANSKRIBUS_EMAIL,
+                'TRANSKRIBUS_PASSWORD': settings.TRANSKRIBUS_PASSWORD
+            },
+            'tasks': {
+                'import': {
+                    'command': 'python -m arkindex_tasks.import_transkribus 12345 --corpus {}'.format(corpus.id),
+                    'image': 'registry.gitlab.com/arkindex/tasks'
+                },
+                'thumbnails': {
+                    'command': 'python3 -m arkindex_tasks.generate_thumbnails /data/import/elements.json',
+                    'image': 'registry.gitlab.com/arkindex/tasks',
+                    'parents': ['import']
+                }
+            }
+        })
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 14d259dd1c..438ca87624 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -8,7 +8,7 @@ from django.core.exceptions import ValidationError
 from enumfields import EnumField, Enum
 from arkindex_common.enums import TranscriptionType, MetaType, EntityType
 from arkindex_common.ml_tool import MLToolType
-from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES
+from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES, DEFAULT_TRANSKRIBUS_TYPES
 from arkindex.project.models import IndexableModel
 from arkindex.project.fields import ArrayField
 from arkindex.project.elastic import ESTranscription, ESElement, ESEntity
@@ -89,6 +89,12 @@ class Corpus(IndexableModel):
             for values in DEFAULT_CORPUS_TYPES
         )
 
+    def create_default_transkribus_types(self):
+        self.types.bulk_create(
+            ElementType(corpus=self, **values)
+            for values in DEFAULT_TRANSKRIBUS_TYPES
+        )
+
 
 class ElementType(models.Model):
     id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False)
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index 892635a602..6597310f77 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -28,12 +28,13 @@ from arkindex.dataimport.api import (
     RepositoryList, RepositoryRetrieve, RepositoryStartImport, DataFileCreate,
     GitRepositoryImportHook, AvailableRepositoriesList, RevisionRetrieve,
     MLToolList, CorpusWorkflow, WorkerList, WorkerVersionList, WorkerVersionRetrieve,
-    WorkerRunList, WorkerRunDetails
+    WorkerRunList, WorkerRunDetails, ImportTranskribus
 )
 from arkindex.images.api import ImageCreate, IIIFURLCreate, IIIFInformationCreate, ImageRetrieve, ImageElements
 from arkindex.users.api import (
     ProvidersList, CredentialsList, CredentialsRetrieve, OAuthSignIn, OAuthRetry, OAuthCallback,
     UserRetrieve, UserCreate, UserEmailLogin, UserEmailVerification, PasswordReset, PasswordResetConfirm,
+    UpdateUserTranskribus
 )
 
 # Cache the OpenAPI schema view for a day
@@ -136,6 +137,7 @@ api = [
     path('imports/', DataImportsList.as_view(), name='import-list'),
     path('imports/fromfiles/', DataImportFromFiles.as_view(), name='import-from-files'),
     path('imports/mltools/', MLToolList.as_view(), name='ml-tool-list'),
+    path('imports/transkribus/', ImportTranskribus.as_view(), name='import-transkribus'),
     path('imports/<uuid:pk>/', DataImportDetails.as_view(), name='import-details'),
     path('imports/<uuid:pk>/elements/', DataImportElements.as_view(), name='import-elements'),
     path('imports/<uuid:pk>/retry/', DataImportRetry.as_view(), name='import-retry'),
@@ -170,6 +172,7 @@ api = [
     path('user/token/', UserEmailVerification.as_view(), name='user-token'),
     path('user/password-reset/', PasswordReset.as_view(), name='password-reset'),
     path('user/password-reset/confirm/', PasswordResetConfirm.as_view(), name='password-reset-confirm'),
+    path('user/transkribus/', UpdateUserTranskribus.as_view(), name='user-transkribus'),
 
     # Management tools
     path('reindex/', ReindexStart.as_view(), name='reindex-start'),
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index d09d1fe2e2..8607f8a508 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -73,6 +73,10 @@ def get_settings_parser(base_dir):
     influxdb_parser = parser.add_subparser('influxdb', default={})
     influxdb_parser.add_option('api_url', type=str, default='http://localhost:8086/')
 
+    transkribus_parser = parser.add_subparser('transkribus', default={})
+    transkribus_parser.add_option('email', type=str, default=None)
+    transkribus_parser.add_option('password', type=str, default=None)
+
     gitlab_parser = parser.add_subparser('gitlab', default={})
     gitlab_parser.add_option('app_id', type=str, default=None)
     gitlab_parser.add_option('app_secret', type=str, default=None)
@@ -115,6 +119,7 @@ def get_settings_parser(base_dir):
     features_parser.add_option('signup', type=bool, default=True)
     features_parser.add_option('selection', type=bool, default=True)
     features_parser.add_option('search', type=bool, default=True)
+    features_parser.add_option('transkribus', type=bool, default=True)
     features_parser.add_option('workers', type=bool, default=False)
 
     cache_parser = ConfigParser()
diff --git a/arkindex/project/default_corpus.py b/arkindex/project/default_corpus.py
index b58b92337f..765e8206a1 100644
--- a/arkindex/project/default_corpus.py
+++ b/arkindex/project/default_corpus.py
@@ -38,3 +38,25 @@ DEFAULT_CORPUS_TYPES = [
         'display_name': 'Signature',
     }
 ]
+
+DEFAULT_TRANSKRIBUS_TYPES = [
+    {
+        'slug': 'volume',
+        'display_name': 'Volume',
+        'folder': True,
+    },
+    {
+        'slug': 'page',
+        'display_name': 'Page',
+    },
+    {
+        'slug': 'text_line',
+        'display_name': 'Text line',
+        'allowed_transcription': TranscriptionType.Line,
+    },
+    {
+        'slug': 'paragraph',
+        'display_name': 'Paragraph',
+        'allowed_transcription': TranscriptionType.Paragraph,
+    }
+]
diff --git a/arkindex/project/openapi/patch.yml b/arkindex/project/openapi/patch.yml
index 7425f728eb..696fe21baf 100644
--- a/arkindex/project/openapi/patch.yml
+++ b/arkindex/project/openapi/patch.yml
@@ -234,6 +234,24 @@ paths:
       description: Partially update a worker run
     delete:
       description: Delete a worker run
+  /api/v1/imports/transkribus/:
+    post:
+      responses:
+        '400':
+          description: An error occured while validating the collection ID.
+          content:
+            application/json:
+              schema:
+                properties:
+                  collection_id:
+                    type: string
+                    description: Errors that occured during collection ID field validation.
+                    readOnly: true
+              examples:
+                user-permission:
+                  summary: An error where the user is not a member of the collection.
+                  value:
+                    collection_id: User user@example.com is not a member of the collection 1
   /api/v1/oauth/credentials/{id}/:
     delete:
       description: Delete OAuth credentials. This may disable access to some Git repositories.
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index a6459ad37d..31fedc7fb2 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -238,6 +238,10 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60)
 # check_images sample size when checking all servers
 CHECK_IMAGES_SAMPLE_SIZE = 20
 
+# Transkribus credentials
+TRANSKRIBUS_EMAIL = conf['transkribus']['email']
+TRANSKRIBUS_PASSWORD = conf['transkribus']['password']
+
 # GitLab OAuth
 GITLAB_APP_ID = conf['gitlab']['app_id']
 GITLAB_APP_SECRET = conf['gitlab']['app_secret']
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 31bb76bfe6..f2850a47c8 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -34,6 +34,7 @@ features:
   search: true
   selection: true
   signup: true
+  transkribus: true
   workers: false
 gitlab:
   app_id: null
@@ -73,3 +74,6 @@ static:
   mirador_url: null
   root_path: null
   universal_viewer_url: null
+transkribus:
+  email: null
+  password: null
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index 96d3482c2b..d9695aa224 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -48,6 +48,7 @@ features:
   search: false
   selection: false
   signup: false
+  transkribus: false
   workers: true
 gitlab:
   app_id: a
@@ -88,3 +89,6 @@ static:
   mirador_url: gopher://mirador/
   root_path: /
   universal_viewer_url: gopher://uv/
+transkribus:
+  email: nope@nope
+  password: superSecret
diff --git a/arkindex/users/admin.py b/arkindex/users/admin.py
index 4d8a4d499b..e06367de8d 100644
--- a/arkindex/users/admin.py
+++ b/arkindex/users/admin.py
@@ -68,7 +68,7 @@ class UserAdmin(BaseUserAdmin):
     list_display = ('email', 'is_admin')
     list_filter = ('is_admin', 'groups')
     fieldsets = (
-        (None, {'fields': ('email', 'verified_email', 'password')}),
+        (None, {'fields': ('email', 'verified_email', 'password', 'transkribus_email')}),
         ('Permissions', {'fields': ('is_admin', 'groups')}),
     )
     # add_fieldsets is not a standard ModelAdmin attribute. UserAdmin
diff --git a/arkindex/users/api.py b/arkindex/users/api.py
index 77cbbce5e8..818e1a33d2 100644
--- a/arkindex/users/api.py
+++ b/arkindex/users/api.py
@@ -11,7 +11,7 @@ from django.utils.http import urlsafe_base64_encode
 from rest_framework import status
 from rest_framework.views import APIView
 from rest_framework.generics import \
-    ListAPIView, RetrieveAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, CreateAPIView
+    ListAPIView, RetrieveAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, CreateAPIView, UpdateAPIView
 from rest_framework.response import Response
 from rest_framework.exceptions import AuthenticationFailed, ValidationError, PermissionDenied
 from arkindex.project.permissions import IsAuthenticatedOrReadOnly, IsVerified
@@ -22,6 +22,7 @@ from arkindex.users.serializers import (
     OAuthCredentialsSerializer, OAuthProviderClassSerializer, OAuthRetrySerializer,
     UserSerializer, NewUserSerializer, EmailLoginSerializer,
     PasswordResetSerializer, PasswordResetConfirmSerializer,
+    UpdateUserTranskribusSerializer
 )
 import urllib.parse
 import logging
@@ -311,6 +312,21 @@ class PasswordResetConfirm(CreateAPIView):
     }
 
 
+class UpdateUserTranskribus(UpdateAPIView):
+    """
+    Update and validate Transkribus account
+    Only the user's email is stored in our database
+    """
+    permission_classes = (IsVerified, )
+    serializer_class = UpdateUserTranskribusSerializer
+    openapi_overrides = {
+        'tags': ['users'],
+    }
+
+    def get_object(self):
+        return self.request.user
+
+
 class OAuthSignIn(APIView):
     """
     Start the OAuth authentication code flow for a given provider
diff --git a/arkindex/users/migrations/0005_user_transkribus_email.py b/arkindex/users/migrations/0005_user_transkribus_email.py
new file mode 100644
index 0000000000..ae38739bc3
--- /dev/null
+++ b/arkindex/users/migrations/0005_user_transkribus_email.py
@@ -0,0 +1,18 @@
+# Generated by Django 2.2.13 on 2020-08-05 07:24
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('users', '0004_internal_bool'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='user',
+            name='transkribus_email',
+            field=models.EmailField(blank=True, max_length=255, null=True),
+        ),
+    ]
diff --git a/arkindex/users/models.py b/arkindex/users/models.py
index 352e0e41a7..f4b1f7409c 100644
--- a/arkindex/users/models.py
+++ b/arkindex/users/models.py
@@ -12,6 +12,11 @@ class User(AbstractBaseUser):
         max_length=255,
         unique=True,
     )
+    transkribus_email = models.EmailField(
+        max_length=255,
+        null=True,
+        blank=True,
+    )
     is_active = models.BooleanField(default=True)
     is_internal = models.BooleanField(default=False)
     is_admin = models.BooleanField(default=False)
diff --git a/arkindex/users/serializers.py b/arkindex/users/serializers.py
index 08e368c8ed..860616e07e 100644
--- a/arkindex/users/serializers.py
+++ b/arkindex/users/serializers.py
@@ -6,6 +6,15 @@ from django.contrib.auth.tokens import default_token_generator
 from django.contrib.auth.password_validation import validate_password
 from arkindex.users.models import OAuthCredentials, OAuthStatus, User
 from arkindex.project.serializer_fields import EnumField
+from transkribus import TranskribusAPI
+import logging
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(levelname)s] %(message)s',
+)
+logger = logging.getLogger(__name__)
+transkribus_client = TranskribusAPI()
 
 
 def validate_user_password(user, data):
@@ -54,6 +63,7 @@ class OAuthRetrySerializer(serializers.Serializer):
 class UserSerializer(serializers.ModelSerializer):
 
     features = serializers.SerializerMethodField(read_only=True)
+    transkribus_import_email = serializers.SerializerMethodField(read_only=True)
 
     class Meta:
         model = User
@@ -65,6 +75,8 @@ class UserSerializer(serializers.ModelSerializer):
             'is_admin',
             'auth_token',
             'features',
+            'transkribus_email',
+            'transkribus_import_email',
         )
         extra_kwargs = {
             'id': {'read_only': True},
@@ -79,11 +91,15 @@ class UserSerializer(serializers.ModelSerializer):
             'verified_email': {'read_only': True},
             'is_admin': {'read_only': True},
             'auth_token': {'read_only': True},
+            'transkribus_email': {'read_only': True},
         }
 
     def get_features(self, *args, **kwargs):
         return settings.ARKINDEX_FEATURES
 
+    def get_transkribus_import_email(self, *args, **kwargs):
+        return settings.TRANSKRIBUS_EMAIL
+
     def update(self, instance, validated_data):
         if 'password' in validated_data:
             instance.set_password(validated_data.pop('password'))
@@ -164,3 +180,32 @@ class PasswordResetConfirmSerializer(serializers.Serializer):
             return
         user.set_password(self.validated_data['password'])
         user.save()
+
+
+class UpdateUserTranskribusSerializer(serializers.Serializer):
+    """
+    A serializer that allows to verify Transkribus credentials
+    and to save the Transkribus email
+    """
+    transkribus_email = serializers.EmailField()
+    transkribus_password = serializers.CharField(write_only=True, style={'input_type': 'password'})
+
+    def validate(self, data):
+        transkribus_email = data.get('transkribus_email')
+        transkribus_password = data.pop('transkribus_password')
+
+        # Check that the credentials are correct
+        try:
+            transkribus_client.login(email=transkribus_email, password=transkribus_password)
+        except Exception:
+            logger.info("Invalid Transkribus email or password")
+            raise serializers.ValidationError(
+                {"__all__" : ["The email or password is incorrect"]}
+            )
+
+        return data
+
+    def update(self, instance, validated_data):
+        instance.transkribus_email = validated_data.get('transkribus_email', None)
+        instance.save()
+        return instance
diff --git a/arkindex/users/tests/test_providers.py b/arkindex/users/tests/test_providers.py
index f922a3cee9..82d47d48a7 100644
--- a/arkindex/users/tests/test_providers.py
+++ b/arkindex/users/tests/test_providers.py
@@ -19,11 +19,12 @@ class TestProviders(FixtureTestCase):
         cls.provider_mock = MagicMock()
         cls.provider_mock.slug = 'provider-slug'
         cls.provider_mock().credentials = cls.creds
-        cls.old_providers = providers.oauth_providers
+        cls.old_providers = providers.oauth_providers.copy()
         providers.oauth_providers = [cls.provider_mock]
 
     @classmethod
     def tearDownClass(cls):
+        super().tearDownClass()
         providers.oauth_providers = cls.old_providers
 
     def test_get_provider(self):
diff --git a/arkindex/users/tests/test_update_transkribus_email.py b/arkindex/users/tests/test_update_transkribus_email.py
new file mode 100644
index 0000000000..40be88cfdf
--- /dev/null
+++ b/arkindex/users/tests/test_update_transkribus_email.py
@@ -0,0 +1,39 @@
+from unittest.mock import patch
+from rest_framework import status
+from django.urls import reverse
+from django.test import override_settings
+from arkindex.project.tests import FixtureAPITestCase
+
+
+@override_settings(TRANSKRIBUS_EMAIL=None, TRANSKRIBUS_PASSWORD=None)
+class TestUpdateTranskribusEmail(FixtureAPITestCase):
+
+    def test_requires_login(self):
+        response = self.client.patch(reverse('api:user-transkribus'), {
+            'transkribus_email': 'nope@nope.com',
+            'transkribus_password': '42'
+        }, format='json')
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
+    def test_wrong_credentials(self):
+        self.client.force_login(self.user)
+        response = self.client.patch(reverse('api:user-transkribus'), {
+            'transkribus_email': 'nope@nope.com',
+            'transkribus_password': '42'
+        }, format='json')
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.json(), {"__all__": ["The email or password is incorrect"]})
+
+    @patch('transkribus.TranskribusAPI.login')
+    def test_update_transkribus_email(self, mock_transkribus):
+        mock_transkribus.return_value = {"email": "nope@nope.com"}
+        self.client.force_login(self.user)
+
+        self.assertIsNone(self.user.transkribus_email)
+        response = self.client.patch(reverse('api:user-transkribus'), {
+            'transkribus_email': 'nope@nope.com',
+            'transkribus_password': '42'
+        }, format='json')
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.user.refresh_from_db()
+        self.assertEqual(self.user.transkribus_email, "nope@nope.com")
diff --git a/requirements.txt b/requirements.txt
index 0dd9cae66e..106d8b632e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,5 +27,7 @@ requests==2.22
 responses==0.10.7
 sentry-sdk==0.14.3
 tenacity==6.2
+transkribus-client>=0.1.1
+git+https://gitlab.com/arkindex/transkribus.git#egg=transkribus-client
 uritemplate==3
 urllib3==1.22
-- 
GitLab