From 28013427e2c050f0b88f80e939f864e8d1186219 Mon Sep 17 00:00:00 2001 From: manon blanco <blanco@teklia.com> Date: Wed, 26 Aug 2020 11:58:48 +0000 Subject: [PATCH] New endpoint to import a Transkribus collection --- .gitlab-ci.yml | 1 + arkindex/dataimport/api.py | 57 +++++++- .../0017_dataimport_collection_id.py | 18 +++ arkindex/dataimport/models.py | 21 +++ arkindex/dataimport/serializers/imports.py | 37 ++++++ .../tests/test_transkribus_import.py | 123 ++++++++++++++++++ arkindex/documents/models.py | 8 +- arkindex/project/api_v1.py | 5 +- arkindex/project/config.py | 5 + arkindex/project/default_corpus.py | 22 ++++ arkindex/project/openapi/patch.yml | 18 +++ arkindex/project/settings.py | 4 + .../tests/config_samples/defaults.yaml | 4 + .../tests/config_samples/override.yaml | 4 + arkindex/users/admin.py | 2 +- arkindex/users/api.py | 18 ++- .../migrations/0005_user_transkribus_email.py | 18 +++ arkindex/users/models.py | 5 + arkindex/users/serializers.py | 45 +++++++ arkindex/users/tests/test_providers.py | 3 +- .../tests/test_update_transkribus_email.py | 39 ++++++ requirements.txt | 2 + 22 files changed, 453 insertions(+), 6 deletions(-) create mode 100644 arkindex/dataimport/migrations/0017_dataimport_collection_id.py create mode 100644 arkindex/dataimport/tests/test_transkribus_import.py create mode 100644 arkindex/users/migrations/0005_user_transkribus_email.py create mode 100644 arkindex/users/tests/test_update_transkribus_email.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ce4e002372..0e313ad29d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,7 @@ stages: # Custom line to install our own deps from Git using GitLab CI credentials - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/common#egg=arkindex-common" - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/ponos#egg=ponos-server" + - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/transkribus#egg=transkribus-client" - pip install -r tests-requirements.txt codecov - "echo 'database: {host: postgres, port: 5432}' > $CONFIG_PATH" diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index 43e4eb0e11..a82f342e0e 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -25,7 +25,8 @@ from arkindex.dataimport.serializers.git import \ RepositorySerializer, RepositoryStartImportSerializer, ExternalRepositorySerializer, RevisionSerializer from arkindex.dataimport.serializers.imports import ( DataImportLightSerializer, DataImportSerializer, - DataImportFromFilesSerializer, ElementsWorkflowSerializer, WorkerRunLightSerializer, WorkerRunSerializer + DataImportFromFilesSerializer, ElementsWorkflowSerializer, WorkerRunLightSerializer, WorkerRunSerializer, + ImportTranskribusSerializer ) from arkindex.dataimport.serializers.workers import WorkerSerializer, WorkerVersionSerializer from arkindex.users.models import OAuthCredentials @@ -716,3 +717,57 @@ class WorkerRunDetails(CorpusACLMixin, RetrieveUpdateDestroyAPIView): instance.dataimport.worker_runs.filter(parents__contains=[instance.id]).update(parents=ArrayRemove('parents', instance.id)) return super().perform_destroy(instance) + + +class ImportTranskribus(CreateAPIView): + """ + Start a data import from Transkribus email and collection ID. + """ + + permission_classes = (IsVerified, ) + serializer_class = ImportTranskribusSerializer + openapi_overrides = { + 'operationId': 'CreateImportTranskribus', + 'description': 'Create a data import from Transkribus collection ID.', + 'tags': ['imports'], + 'responses': { + '201': { + 'content': { + 'application/json': { + 'schema': AutoSchema()._map_serializer(DataImportSerializer()) + } + } + } + } + } + + def create(self, *args, **kwargs): + if not settings.ARKINDEX_FEATURES['transkribus']: + raise ValidationError(['Transkribus import is unavailable due to the transkribus feature being disabled.']) + super().create(*args, **kwargs) + return Response( + status=status.HTTP_201_CREATED, + data=DataImportSerializer(self.dataimport, context={'request': self.request}).data, + ) + + def perform_create(self, serializer): + collection_id = serializer.validated_data['collection_id'] + + # Create corpus + corpus = Corpus.objects.create( + name=f"Transkribus collection n°{collection_id}", + ) + corpus.corpus_right.create( + user=self.request.user, + can_write=True, + can_admin=True, + ) + corpus.create_default_transkribus_types() + + # Create dataimport + self.dataimport = corpus.imports.create( + creator=self.request.user, + mode=DataImportMode.Transkribus, + collection_id=collection_id + ) + self.dataimport.start() diff --git a/arkindex/dataimport/migrations/0017_dataimport_collection_id.py b/arkindex/dataimport/migrations/0017_dataimport_collection_id.py new file mode 100644 index 0000000000..9767eae85e --- /dev/null +++ b/arkindex/dataimport/migrations/0017_dataimport_collection_id.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2020-08-03 13:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0016_new_jsonfield'), + ] + + operations = [ + migrations.AddField( + model_name='dataimport', + name='collection_id', + field=models.PositiveIntegerField(blank=True, null=True), + ), + ] diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 7111c0a6cb..1b32ce999e 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -69,6 +69,9 @@ class DataImport(IndexableModel): related_name='imports', ) + # Used to define the collection ID for Transkribus import + collection_id = models.PositiveIntegerField(null=True, blank=True) + class Meta: ordering = ['corpus', '-created'] @@ -153,6 +156,19 @@ class DataImport(IndexableModel): }, } + elif self.mode == DataImportMode.Transkribus: + args = [ + 'python', '-m', 'arkindex_tasks.import_transkribus', + str(self.collection_id), + '--corpus', str(self.corpus.id), + ] + tasks = { + import_task_name: { + 'image': settings.ARKINDEX_TASKS_IMAGE, + 'command': ' '.join(args), + } + } + else: tasks = { import_task_name: { @@ -203,6 +219,11 @@ class DataImport(IndexableModel): # Build the workflow in db recipe = settings.PONOS_RECIPE.copy() recipe['tasks'] = tasks + if self.mode == DataImportMode.Transkribus: + if 'env' not in recipe: + recipe['env'] = {} + recipe['env']['TRANSKRIBUS_EMAIL'] = settings.TRANSKRIBUS_EMAIL + recipe['env']['TRANSKRIBUS_PASSWORD'] = settings.TRANSKRIBUS_PASSWORD return Workflow.objects.create(recipe=yaml.dump(recipe)) def start(self, ml_tools=[], chunks=None, thumbnails=False): diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index a324aaee74..824d4b983d 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -10,6 +10,9 @@ from arkindex.documents.serializers.elements import ElementSlimSerializer from arkindex.dataimport.models import DataImport, DataFile, WorkerRun, WorkerVersion from arkindex.dataimport.serializers.git import RevisionSerializer from arkindex.dataimport.serializers.ml_tool import MLToolTaskSerializer +from transkribus import TranskribusAPI + +transkribus_client = TranskribusAPI(email=settings.TRANSKRIBUS_EMAIL, password=settings.TRANSKRIBUS_PASSWORD) class DataImportLightSerializer(serializers.ModelSerializer): @@ -299,3 +302,37 @@ class WorkerRunSerializer(WorkerRunLightSerializer): raise serializers.ValidationError({'__all__': ["Can't update a WorkerRun related to a DataImport that has already started"]}) return data + + +class ImportTranskribusSerializer(serializers.Serializer): + """ + Serialize a Transkribus import + """ + collection_id = serializers.IntegerField(min_value=1) + + def validate(self, data): + collection_id = data.get('collection_id') + + # Check Transkribus email + transkribus_email = self.context['request'].user.transkribus_email + if not transkribus_email: + raise serializers.ValidationError( + {"__all__": ["You have not register your transkribus email"]} + ) + + # Check Arkindex's right + try: + users = transkribus_client.list_user_collection(collection_id) + except Exception: + raise serializers.ValidationError( + {"collection_id" : [f"User {settings.TRANSKRIBUS_EMAIL} is not a member of the collection {collection_id}"]} + ) + + # Check user's right + user = next(filter(lambda user: user["email"] == transkribus_email, users), None) + if not user: + raise serializers.ValidationError( + {"collection_id": [f"User {transkribus_email} is not a member of the collection {collection_id}"]} + ) + + return data diff --git a/arkindex/dataimport/tests/test_transkribus_import.py b/arkindex/dataimport/tests/test_transkribus_import.py new file mode 100644 index 0000000000..4f8c119f8c --- /dev/null +++ b/arkindex/dataimport/tests/test_transkribus_import.py @@ -0,0 +1,123 @@ +from unittest.mock import patch +from django.urls import reverse +from django.test import override_settings +from django.conf import settings +from rest_framework import status +from arkindex_common.enums import DataImportMode +from arkindex.dataimport.models import DataImport +from arkindex.project.tests import FixtureAPITestCase +from arkindex.project.default_corpus import DEFAULT_TRANSKRIBUS_TYPES +from ponos.models import State +from arkindex.users.models import User +import yaml + + +@override_settings(TRANSKRIBUS_EMAIL="arkindex@teklia.com", TRANSKRIBUS_PASSWORD=None) +class TestTranskribusImport(FixtureAPITestCase): + """ + Test transkribus import + """ + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + User.objects.update(transkribus_email="nope@nope.fr") + + def test_requires_login(self): + response = self.client.post(reverse("api:import-transkribus")) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + @override_settings(ARKINDEX_FEATURES={"transkribus": False}) + def test_requires_flag(self): + self.client.force_login(self.user) + response = self.client.post(reverse("api:import-transkribus"), { + "collection_id": "12345", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), ["Transkribus import is unavailable due to the transkribus feature being disabled."]) + + def test_requires_transkribus_email(self): + User.objects.update(transkribus_email=None) + self.client.force_login(self.user) + response = self.client.post(reverse("api:import-transkribus"), { + "collection_id": "12345", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {"__all__": ["You have not register your transkribus email"]}) + + def test_arkindex_has_not_access(self): + self.client.force_login(self.user) + response = self.client.post(reverse("api:import-transkribus"), { + "collection_id": "12345", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {"collection_id" : ["User arkindex@teklia.com is not a member of the collection 12345"]}) + + @patch("transkribus.TranskribusAPI.list_user_collection") + def test_user_has_not_access(self, mock_transkribus): + mock_transkribus.return_value = [{"email": "arkindex@teklia.com"}] + + self.client.force_login(self.user) + response = self.client.post(reverse("api:import-transkribus"), { + "collection_id": "12345", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {"collection_id": ["User nope@nope.fr is not a member of the collection 12345"]}) + + @override_settings( + PONOS_RECIPE={} + ) + @patch("transkribus.TranskribusAPI.list_user_collection") + def test_create_import(self, mock_transkribus): + mock_transkribus.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}] + + self.client.force_login(self.user) + response = self.client.post(reverse("api:import-transkribus"), { + "collection_id": "12345", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + data = response.json() + dataimport = DataImport.objects.get(id=data["id"]) + self.assertEqual(dataimport.mode, DataImportMode.Transkribus) + self.assertEqual(dataimport.collection_id, 12345) + corpus = dataimport.corpus + right = corpus.corpus_right.get(user=self.user) + self.assertTrue(right.can_write) + self.assertTrue(right.can_admin) + self.assertEqual(corpus.name, "Transkribus collection n°12345") + self.assertEqual(corpus.description, "") + self.assertEqual(corpus.public, False) + # Assert defaults types are set on the new corpus + self.assertCountEqual( + list(corpus.types.values( + "slug", + "display_name", + "folder", + "allowed_transcription" + )), + [{ + "folder": False, + "allowed_transcription": None, + **values + } for values in DEFAULT_TRANSKRIBUS_TYPES] + ) + self.assertEqual(dataimport.state, State.Unscheduled) + self.assertIsNotNone(dataimport.workflow) + recipe = yaml.safe_load(dataimport.workflow.recipe) + self.assertDictEqual(recipe, { + 'env': { + 'TRANSKRIBUS_EMAIL': settings.TRANSKRIBUS_EMAIL, + 'TRANSKRIBUS_PASSWORD': settings.TRANSKRIBUS_PASSWORD + }, + 'tasks': { + 'import': { + 'command': 'python -m arkindex_tasks.import_transkribus 12345 --corpus {}'.format(corpus.id), + 'image': 'registry.gitlab.com/arkindex/tasks' + }, + 'thumbnails': { + 'command': 'python3 -m arkindex_tasks.generate_thumbnails /data/import/elements.json', + 'image': 'registry.gitlab.com/arkindex/tasks', + 'parents': ['import'] + } + } + }) diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 14d259dd1c..438ca87624 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -8,7 +8,7 @@ from django.core.exceptions import ValidationError from enumfields import EnumField, Enum from arkindex_common.enums import TranscriptionType, MetaType, EntityType from arkindex_common.ml_tool import MLToolType -from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES +from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES, DEFAULT_TRANSKRIBUS_TYPES from arkindex.project.models import IndexableModel from arkindex.project.fields import ArrayField from arkindex.project.elastic import ESTranscription, ESElement, ESEntity @@ -89,6 +89,12 @@ class Corpus(IndexableModel): for values in DEFAULT_CORPUS_TYPES ) + def create_default_transkribus_types(self): + self.types.bulk_create( + ElementType(corpus=self, **values) + for values in DEFAULT_TRANSKRIBUS_TYPES + ) + class ElementType(models.Model): id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 892635a602..6597310f77 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -28,12 +28,13 @@ from arkindex.dataimport.api import ( RepositoryList, RepositoryRetrieve, RepositoryStartImport, DataFileCreate, GitRepositoryImportHook, AvailableRepositoriesList, RevisionRetrieve, MLToolList, CorpusWorkflow, WorkerList, WorkerVersionList, WorkerVersionRetrieve, - WorkerRunList, WorkerRunDetails + WorkerRunList, WorkerRunDetails, ImportTranskribus ) from arkindex.images.api import ImageCreate, IIIFURLCreate, IIIFInformationCreate, ImageRetrieve, ImageElements from arkindex.users.api import ( ProvidersList, CredentialsList, CredentialsRetrieve, OAuthSignIn, OAuthRetry, OAuthCallback, UserRetrieve, UserCreate, UserEmailLogin, UserEmailVerification, PasswordReset, PasswordResetConfirm, + UpdateUserTranskribus ) # Cache the OpenAPI schema view for a day @@ -136,6 +137,7 @@ api = [ path('imports/', DataImportsList.as_view(), name='import-list'), path('imports/fromfiles/', DataImportFromFiles.as_view(), name='import-from-files'), path('imports/mltools/', MLToolList.as_view(), name='ml-tool-list'), + path('imports/transkribus/', ImportTranskribus.as_view(), name='import-transkribus'), path('imports/<uuid:pk>/', DataImportDetails.as_view(), name='import-details'), path('imports/<uuid:pk>/elements/', DataImportElements.as_view(), name='import-elements'), path('imports/<uuid:pk>/retry/', DataImportRetry.as_view(), name='import-retry'), @@ -170,6 +172,7 @@ api = [ path('user/token/', UserEmailVerification.as_view(), name='user-token'), path('user/password-reset/', PasswordReset.as_view(), name='password-reset'), path('user/password-reset/confirm/', PasswordResetConfirm.as_view(), name='password-reset-confirm'), + path('user/transkribus/', UpdateUserTranskribus.as_view(), name='user-transkribus'), # Management tools path('reindex/', ReindexStart.as_view(), name='reindex-start'), diff --git a/arkindex/project/config.py b/arkindex/project/config.py index d09d1fe2e2..8607f8a508 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -73,6 +73,10 @@ def get_settings_parser(base_dir): influxdb_parser = parser.add_subparser('influxdb', default={}) influxdb_parser.add_option('api_url', type=str, default='http://localhost:8086/') + transkribus_parser = parser.add_subparser('transkribus', default={}) + transkribus_parser.add_option('email', type=str, default=None) + transkribus_parser.add_option('password', type=str, default=None) + gitlab_parser = parser.add_subparser('gitlab', default={}) gitlab_parser.add_option('app_id', type=str, default=None) gitlab_parser.add_option('app_secret', type=str, default=None) @@ -115,6 +119,7 @@ def get_settings_parser(base_dir): features_parser.add_option('signup', type=bool, default=True) features_parser.add_option('selection', type=bool, default=True) features_parser.add_option('search', type=bool, default=True) + features_parser.add_option('transkribus', type=bool, default=True) features_parser.add_option('workers', type=bool, default=False) cache_parser = ConfigParser() diff --git a/arkindex/project/default_corpus.py b/arkindex/project/default_corpus.py index b58b92337f..765e8206a1 100644 --- a/arkindex/project/default_corpus.py +++ b/arkindex/project/default_corpus.py @@ -38,3 +38,25 @@ DEFAULT_CORPUS_TYPES = [ 'display_name': 'Signature', } ] + +DEFAULT_TRANSKRIBUS_TYPES = [ + { + 'slug': 'volume', + 'display_name': 'Volume', + 'folder': True, + }, + { + 'slug': 'page', + 'display_name': 'Page', + }, + { + 'slug': 'text_line', + 'display_name': 'Text line', + 'allowed_transcription': TranscriptionType.Line, + }, + { + 'slug': 'paragraph', + 'display_name': 'Paragraph', + 'allowed_transcription': TranscriptionType.Paragraph, + } +] diff --git a/arkindex/project/openapi/patch.yml b/arkindex/project/openapi/patch.yml index 7425f728eb..696fe21baf 100644 --- a/arkindex/project/openapi/patch.yml +++ b/arkindex/project/openapi/patch.yml @@ -234,6 +234,24 @@ paths: description: Partially update a worker run delete: description: Delete a worker run + /api/v1/imports/transkribus/: + post: + responses: + '400': + description: An error occured while validating the collection ID. + content: + application/json: + schema: + properties: + collection_id: + type: string + description: Errors that occured during collection ID field validation. + readOnly: true + examples: + user-permission: + summary: An error where the user is not a member of the collection. + value: + collection_id: User user@example.com is not a member of the collection 1 /api/v1/oauth/credentials/{id}/: delete: description: Delete OAuth credentials. This may disable access to some Git repositories. diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index a6459ad37d..31fedc7fb2 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -238,6 +238,10 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60) # check_images sample size when checking all servers CHECK_IMAGES_SAMPLE_SIZE = 20 +# Transkribus credentials +TRANSKRIBUS_EMAIL = conf['transkribus']['email'] +TRANSKRIBUS_PASSWORD = conf['transkribus']['password'] + # GitLab OAuth GITLAB_APP_ID = conf['gitlab']['app_id'] GITLAB_APP_SECRET = conf['gitlab']['app_secret'] diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index 31bb76bfe6..f2850a47c8 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -34,6 +34,7 @@ features: search: true selection: true signup: true + transkribus: true workers: false gitlab: app_id: null @@ -73,3 +74,6 @@ static: mirador_url: null root_path: null universal_viewer_url: null +transkribus: + email: null + password: null diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index 96d3482c2b..d9695aa224 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -48,6 +48,7 @@ features: search: false selection: false signup: false + transkribus: false workers: true gitlab: app_id: a @@ -88,3 +89,6 @@ static: mirador_url: gopher://mirador/ root_path: / universal_viewer_url: gopher://uv/ +transkribus: + email: nope@nope + password: superSecret diff --git a/arkindex/users/admin.py b/arkindex/users/admin.py index 4d8a4d499b..e06367de8d 100644 --- a/arkindex/users/admin.py +++ b/arkindex/users/admin.py @@ -68,7 +68,7 @@ class UserAdmin(BaseUserAdmin): list_display = ('email', 'is_admin') list_filter = ('is_admin', 'groups') fieldsets = ( - (None, {'fields': ('email', 'verified_email', 'password')}), + (None, {'fields': ('email', 'verified_email', 'password', 'transkribus_email')}), ('Permissions', {'fields': ('is_admin', 'groups')}), ) # add_fieldsets is not a standard ModelAdmin attribute. UserAdmin diff --git a/arkindex/users/api.py b/arkindex/users/api.py index 77cbbce5e8..818e1a33d2 100644 --- a/arkindex/users/api.py +++ b/arkindex/users/api.py @@ -11,7 +11,7 @@ from django.utils.http import urlsafe_base64_encode from rest_framework import status from rest_framework.views import APIView from rest_framework.generics import \ - ListAPIView, RetrieveAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, CreateAPIView + ListAPIView, RetrieveAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, CreateAPIView, UpdateAPIView from rest_framework.response import Response from rest_framework.exceptions import AuthenticationFailed, ValidationError, PermissionDenied from arkindex.project.permissions import IsAuthenticatedOrReadOnly, IsVerified @@ -22,6 +22,7 @@ from arkindex.users.serializers import ( OAuthCredentialsSerializer, OAuthProviderClassSerializer, OAuthRetrySerializer, UserSerializer, NewUserSerializer, EmailLoginSerializer, PasswordResetSerializer, PasswordResetConfirmSerializer, + UpdateUserTranskribusSerializer ) import urllib.parse import logging @@ -311,6 +312,21 @@ class PasswordResetConfirm(CreateAPIView): } +class UpdateUserTranskribus(UpdateAPIView): + """ + Update and validate Transkribus account + Only the user's email is stored in our database + """ + permission_classes = (IsVerified, ) + serializer_class = UpdateUserTranskribusSerializer + openapi_overrides = { + 'tags': ['users'], + } + + def get_object(self): + return self.request.user + + class OAuthSignIn(APIView): """ Start the OAuth authentication code flow for a given provider diff --git a/arkindex/users/migrations/0005_user_transkribus_email.py b/arkindex/users/migrations/0005_user_transkribus_email.py new file mode 100644 index 0000000000..ae38739bc3 --- /dev/null +++ b/arkindex/users/migrations/0005_user_transkribus_email.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2020-08-05 07:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('users', '0004_internal_bool'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='transkribus_email', + field=models.EmailField(blank=True, max_length=255, null=True), + ), + ] diff --git a/arkindex/users/models.py b/arkindex/users/models.py index 352e0e41a7..f4b1f7409c 100644 --- a/arkindex/users/models.py +++ b/arkindex/users/models.py @@ -12,6 +12,11 @@ class User(AbstractBaseUser): max_length=255, unique=True, ) + transkribus_email = models.EmailField( + max_length=255, + null=True, + blank=True, + ) is_active = models.BooleanField(default=True) is_internal = models.BooleanField(default=False) is_admin = models.BooleanField(default=False) diff --git a/arkindex/users/serializers.py b/arkindex/users/serializers.py index 08e368c8ed..860616e07e 100644 --- a/arkindex/users/serializers.py +++ b/arkindex/users/serializers.py @@ -6,6 +6,15 @@ from django.contrib.auth.tokens import default_token_generator from django.contrib.auth.password_validation import validate_password from arkindex.users.models import OAuthCredentials, OAuthStatus, User from arkindex.project.serializer_fields import EnumField +from transkribus import TranskribusAPI +import logging + +logging.basicConfig( + level=logging.INFO, + format='[%(levelname)s] %(message)s', +) +logger = logging.getLogger(__name__) +transkribus_client = TranskribusAPI() def validate_user_password(user, data): @@ -54,6 +63,7 @@ class OAuthRetrySerializer(serializers.Serializer): class UserSerializer(serializers.ModelSerializer): features = serializers.SerializerMethodField(read_only=True) + transkribus_import_email = serializers.SerializerMethodField(read_only=True) class Meta: model = User @@ -65,6 +75,8 @@ class UserSerializer(serializers.ModelSerializer): 'is_admin', 'auth_token', 'features', + 'transkribus_email', + 'transkribus_import_email', ) extra_kwargs = { 'id': {'read_only': True}, @@ -79,11 +91,15 @@ class UserSerializer(serializers.ModelSerializer): 'verified_email': {'read_only': True}, 'is_admin': {'read_only': True}, 'auth_token': {'read_only': True}, + 'transkribus_email': {'read_only': True}, } def get_features(self, *args, **kwargs): return settings.ARKINDEX_FEATURES + def get_transkribus_import_email(self, *args, **kwargs): + return settings.TRANSKRIBUS_EMAIL + def update(self, instance, validated_data): if 'password' in validated_data: instance.set_password(validated_data.pop('password')) @@ -164,3 +180,32 @@ class PasswordResetConfirmSerializer(serializers.Serializer): return user.set_password(self.validated_data['password']) user.save() + + +class UpdateUserTranskribusSerializer(serializers.Serializer): + """ + A serializer that allows to verify Transkribus credentials + and to save the Transkribus email + """ + transkribus_email = serializers.EmailField() + transkribus_password = serializers.CharField(write_only=True, style={'input_type': 'password'}) + + def validate(self, data): + transkribus_email = data.get('transkribus_email') + transkribus_password = data.pop('transkribus_password') + + # Check that the credentials are correct + try: + transkribus_client.login(email=transkribus_email, password=transkribus_password) + except Exception: + logger.info("Invalid Transkribus email or password") + raise serializers.ValidationError( + {"__all__" : ["The email or password is incorrect"]} + ) + + return data + + def update(self, instance, validated_data): + instance.transkribus_email = validated_data.get('transkribus_email', None) + instance.save() + return instance diff --git a/arkindex/users/tests/test_providers.py b/arkindex/users/tests/test_providers.py index f922a3cee9..82d47d48a7 100644 --- a/arkindex/users/tests/test_providers.py +++ b/arkindex/users/tests/test_providers.py @@ -19,11 +19,12 @@ class TestProviders(FixtureTestCase): cls.provider_mock = MagicMock() cls.provider_mock.slug = 'provider-slug' cls.provider_mock().credentials = cls.creds - cls.old_providers = providers.oauth_providers + cls.old_providers = providers.oauth_providers.copy() providers.oauth_providers = [cls.provider_mock] @classmethod def tearDownClass(cls): + super().tearDownClass() providers.oauth_providers = cls.old_providers def test_get_provider(self): diff --git a/arkindex/users/tests/test_update_transkribus_email.py b/arkindex/users/tests/test_update_transkribus_email.py new file mode 100644 index 0000000000..40be88cfdf --- /dev/null +++ b/arkindex/users/tests/test_update_transkribus_email.py @@ -0,0 +1,39 @@ +from unittest.mock import patch +from rest_framework import status +from django.urls import reverse +from django.test import override_settings +from arkindex.project.tests import FixtureAPITestCase + + +@override_settings(TRANSKRIBUS_EMAIL=None, TRANSKRIBUS_PASSWORD=None) +class TestUpdateTranskribusEmail(FixtureAPITestCase): + + def test_requires_login(self): + response = self.client.patch(reverse('api:user-transkribus'), { + 'transkribus_email': 'nope@nope.com', + 'transkribus_password': '42' + }, format='json') + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_wrong_credentials(self): + self.client.force_login(self.user) + response = self.client.patch(reverse('api:user-transkribus'), { + 'transkribus_email': 'nope@nope.com', + 'transkribus_password': '42' + }, format='json') + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json(), {"__all__": ["The email or password is incorrect"]}) + + @patch('transkribus.TranskribusAPI.login') + def test_update_transkribus_email(self, mock_transkribus): + mock_transkribus.return_value = {"email": "nope@nope.com"} + self.client.force_login(self.user) + + self.assertIsNone(self.user.transkribus_email) + response = self.client.patch(reverse('api:user-transkribus'), { + 'transkribus_email': 'nope@nope.com', + 'transkribus_password': '42' + }, format='json') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.user.refresh_from_db() + self.assertEqual(self.user.transkribus_email, "nope@nope.com") diff --git a/requirements.txt b/requirements.txt index 0dd9cae66e..106d8b632e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,5 +27,7 @@ requests==2.22 responses==0.10.7 sentry-sdk==0.14.3 tenacity==6.2 +transkribus-client>=0.1.1 +git+https://gitlab.com/arkindex/transkribus.git#egg=transkribus-client uritemplate==3 urllib3==1.22 -- GitLab