From 85384931cd631ace5904b0a5ade84fdd6f3e1c69 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Mon, 16 Oct 2023 13:06:16 +0000 Subject: [PATCH] Merge the Transkribus and file imports --- .gitlab-ci.yml | 1 - .isort.cfg | 1 - Dockerfile | 10 - Dockerfile.binary | 12 +- arkindex/documents/fixtures/data.json | 8 - arkindex/documents/models.py | 8 +- arkindex/process/api.py | 56 +----- arkindex/process/builder.py | 31 --- arkindex/process/managers.py | 13 -- .../migrations/0018_remove_transkribus.py | 22 +++ arkindex/process/models.py | 4 - arkindex/process/serializers/imports.py | 64 ++---- arkindex/process/tests/test_processes.py | 54 +++-- .../process/tests/test_transkribus_import.py | 185 ------------------ arkindex/project/api_v1.py | 4 - arkindex/project/checks.py | 33 ---- arkindex/project/config.py | 6 - arkindex/project/default_corpus.py | 24 --- arkindex/project/mixins.py | 2 +- arkindex/project/settings.py | 5 - arkindex/project/tests/__init__.py | 4 - .../tests/config_samples/defaults.yaml | 5 - .../tests/config_samples/override.yaml | 5 - arkindex/project/tests/test_checks.py | 77 -------- .../process_elements_filter_ml_class.sql | 2 - .../process_elements_filter_type.sql | 2 - .../process_elements_top_level.sql | 2 - .../process_elements_with_image.sql | 2 - arkindex/users/admin.py | 2 +- arkindex/users/api.py | 15 -- .../0002_remove_user_transkribus_email.py | 17 ++ arkindex/users/models.py | 5 - arkindex/users/serializers.py | 39 ---- .../tests/test_update_transkribus_email.py | 43 ---- requirements.txt | 2 - 35 files changed, 97 insertions(+), 668 deletions(-) create mode 100644 arkindex/process/migrations/0018_remove_transkribus.py delete mode 100644 arkindex/process/tests/test_transkribus_import.py create mode 100644 arkindex/users/migrations/0002_remove_user_transkribus_email.py delete mode 100644 arkindex/users/tests/test_update_transkribus_email.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 04d07ce46d..447ebc9b06 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,7 +19,6 @@ include: before_script: # Custom line to install our own deps from Git using GitLab CI credentials - - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/transkribus#egg=transkribus-client" - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/license#egg=teklia-license" - pip install -r tests-requirements.txt - "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH" diff --git a/.isort.cfg b/.isort.cfg index 3fd5b5e994..0b8bd7b946 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -7,5 +7,4 @@ use_parentheses = True line_length = 120 default_section=FIRSTPARTY -known_first_party = transkribus known_third_party = SolrClient,bleach,boto3,botocore,cryptography,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml diff --git a/Dockerfile b/Dockerfile index 5601f26760..49917b1f4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,22 +6,12 @@ ADD . build RUN cd build && python3 setup.py sdist FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia -ARG TRANSKRIBUS_BRANCH=master -ARG TRANSKRIBUS_ID=63 ARG LICENSE_BRANCH=master ARG LICENSE_ID=37 # Auth token expires on 01/07/2024 ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa" -# Install transkribus-client from private repo -RUN \ - mkdir /tmp/transkribus && \ - wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus/archive.tar.gz && \ - tar --strip-components=1 -xvf /tmp/transkribus/archive.tar.gz -C /tmp/transkribus && \ - cd /tmp/transkribus && pip install --disable-pip-version-check --no-cache-dir --quiet . && \ - rm -rf /tmp/transkribus - # Install teklia-license from private repo RUN \ mkdir /tmp/teklia-license && \ diff --git a/Dockerfile.binary b/Dockerfile.binary index 5d13f2b757..b53fa6d59b 100644 --- a/Dockerfile.binary +++ b/Dockerfile.binary @@ -5,8 +5,6 @@ RUN apt-get update && apt-get install --no-install-recommends -y build-essential RUN pip install nuitka -ARG TRANSKRIBUS_BRANCH=master -ARG TRANSKRIBUS_ID=63 ARG LICENSE_BRANCH=master ARG LICENSE_ID=37 @@ -22,13 +20,6 @@ ADD arkindex /usr/share/arkindex ADD base/requirements.txt /tmp/requirements-base-arkindex.txt ADD requirements.txt /tmp/requirements-arkindex.txt -# Install transkribus-client from private repo -RUN \ - mkdir /tmp/transkribus && \ - wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus.tar.gz && \ - tar --strip-components=1 -xvf /tmp/transkribus.tar.gz -C /tmp/transkribus && \ - mv /tmp/transkribus/transkribus /usr/share - # Install teklia-license from private repo RUN \ mkdir /tmp/teklia-license && \ @@ -38,7 +29,7 @@ RUN \ cp /tmp/teklia-license/requirements.txt /tmp/requirements-license-arkindex.txt # Build full requirements, removing relative or remote references to arkindex projects -RUN cat /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|transkribus-client|teklia-license' > /requirements.txt +RUN cat /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|teklia-license' > /requirements.txt # List all management commands RUN find /usr/share/arkindex/*/management -name '*.py' -not -name '__init__.py' > /commands.txt @@ -56,7 +47,6 @@ ENV NUITKA_RESOURCE_MODE=linker RUN python -m nuitka \ --nofollow-imports \ --include-package=arkindex \ - --include-package=transkribus \ --include-package=teklia_license \ --show-progress \ --lto=yes \ diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json index 59fe545ccb..9025e50c09 100644 --- a/arkindex/documents/fixtures/data.json +++ b/arkindex/documents/fixtures/data.json @@ -19,7 +19,6 @@ "element_type": null, "name_contains": null, "load_children": false, - "collection_id": null, "use_cache": false, "use_gpu": false, "template": null, @@ -52,7 +51,6 @@ "element_type": null, "name_contains": null, "load_children": false, - "collection_id": null, "use_cache": false, "use_gpu": false, "template": null, @@ -85,7 +83,6 @@ "element_type": null, "name_contains": null, "load_children": false, - "collection_id": null, "use_cache": false, "use_gpu": false, "template": null, @@ -118,7 +115,6 @@ "element_type": null, "name_contains": null, "load_children": false, - "collection_id": null, "use_cache": false, "use_gpu": false, "template": null, @@ -1771,7 +1767,6 @@ "last_login": null, "email": "root@root.fr", "display_name": "Admin", - "transkribus_email": null, "is_active": true, "is_admin": true, "verified_email": true, @@ -1787,7 +1782,6 @@ "last_login": null, "email": "user@user.fr", "display_name": "Test user", - "transkribus_email": null, "is_active": true, "is_admin": false, "verified_email": true, @@ -1803,7 +1797,6 @@ "last_login": null, "email": "user2@user.fr", "display_name": "Test user write", - "transkribus_email": null, "is_active": true, "is_admin": false, "verified_email": true, @@ -1819,7 +1812,6 @@ "last_login": null, "email": "user3@user.fr", "display_name": "Test user read", - "transkribus_email": null, "is_active": true, "is_admin": false, "verified_email": true, diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 4f39c1764f..6a0265d3a3 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -22,7 +22,7 @@ from arkindex.documents.dates import InterpretedDateMixin from arkindex.documents.deletion import delete_element from arkindex.documents.managers import CorpusManager, ElementManager from arkindex.project.aws import S3FileMixin -from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES, DEFAULT_TRANSKRIBUS_TYPES +from arkindex.project.default_corpus import DEFAULT_CORPUS_TYPES from arkindex.project.fields import ArrayConcat, ArrayField, LinearRingField from arkindex.project.models import IndexableModel @@ -72,12 +72,6 @@ class Corpus(IndexableModel): for values in DEFAULT_CORPUS_TYPES ) - def create_default_transkribus_types(self): - self.types.bulk_create( - ElementType(corpus=self, **values) - for values in DEFAULT_TRANSKRIBUS_TYPES - ) - class ElementType(models.Model): id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) diff --git a/arkindex/process/api.py b/arkindex/process/api.py index b7d19b26ff..3e8acd4f73 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -79,10 +79,8 @@ from arkindex.process.serializers.git import ExternalRepositorySerializer, Revis from arkindex.process.serializers.imports import ( ApplyProcessTemplateSerializer, CorpusProcessSerializer, - CreateImportTranskribusErrorResponseSerializer, CreateProcessTemplateSerializer, FilesProcessSerializer, - ImportTranskribusSerializer, ProcessDetailsSerializer, ProcessElementLightSerializer, ProcessElementSerializer, @@ -478,7 +476,7 @@ class FilesProcess(CreateAPIView): folder_type = serializer.validated_data.get('folder_type') element_type = serializer.validated_data['element_type'] - if folder and folder.corpus != corpus: + if folder and folder.corpus_id != corpus.id: # The files' corpus is already validated as writable raise ValidationError({'__all__': ['Element and files are in different corpora']}) @@ -1538,58 +1536,6 @@ class WorkerRunDetails(ProcessACLMixin, RetrieveUpdateDestroyAPIView): return super().perform_destroy(instance) -@extend_schema_view(post=extend_schema( - operation_id='CreateImportTranskribus', - tags=['process'], - responses={ - 201: ProcessSerializer, - 400: CreateImportTranskribusErrorResponseSerializer - }, - examples=[OpenApiExample( - status_codes=['400'], - response_only=True, - name="user-permission", - value={'collection_id': "User user@example.com is not a member of the collection 1"}, - description="An error where the user is not a member of the collection." - )] -)) -class ImportTranskribus(CreateAPIView): - """ - Start a data import from Transkribus email and collection ID. - """ - - permission_classes = (IsVerified, ) - serializer_class = ImportTranskribusSerializer - - def create(self, *args, **kwargs): - if not settings.ARKINDEX_FEATURES['transkribus']: - raise ValidationError(['Transkribus import is unavailable due to the transkribus feature being disabled.']) - super().create(*args, **kwargs) - return Response( - status=status.HTTP_201_CREATED, - data=ProcessSerializer(self.process, context={'request': self.request}).data, - ) - - def perform_create(self, serializer): - collection_id = serializer.validated_data['collection_id'] - - # Create corpus - corpus = Corpus.objects.create( - name=f"Transkribus collection n°{collection_id}", - ) - corpus.memberships.create(user=self.request.user, level=Role.Admin.value) - corpus.create_default_transkribus_types() - - # Start a process with thumbnails generation - self.process = corpus.processes.create( - creator=self.request.user, - mode=ProcessMode.Transkribus, - collection_id=collection_id, - generate_thumbnails=True, - ) - self.process.run() - - @extend_schema_view(get=extend_schema( operation_id='ListProcessElements', parameters=[ diff --git a/arkindex/process/builder.py b/arkindex/process/builder.py index 1827286c31..a083c91621 100644 --- a/arkindex/process/builder.py +++ b/arkindex/process/builder.py @@ -297,37 +297,6 @@ class ProcessBuilder(object): ) for run in worker_runs ]) - def build_transkribus(self): - from arkindex.process.models import WorkerVersion - - worker_run = self._create_fake_worker_run(worker_version=WorkerVersion.objects.transkribus_version) - env = { - **self.base_env, - 'TRANSKRIBUS_EMAIL': settings.TRANSKRIBUS_EMAIL, - 'TRANSKRIBUS_PASSWORD': settings.TRANSKRIBUS_PASSWORD, - 'ARKINDEX_WORKER_RUN_ID': str(worker_run.id), - } - transkribus_task_slug = 'export_transkribus' - self._build_task( - command=f'python -m arkindex_tasks.export_transkribus {self.process.collection_id}', - slug=transkribus_task_slug, - env=env, - ) - - import_task_slug = 'import_arkindex' - self._build_task( - command='python -m arkindex_tasks.import_transkribus ' - f'--job-path /data/export_transkribus/transkribus_export_job.json ' - f'--corpus {self.process.corpus.id}', - slug=import_task_slug, - env=env, - ) - self.tasks_parents[import_task_slug].append(transkribus_task_slug) - - self._add_thumbnails(import_task_slug=import_task_slug) - - self._create_worker_versions_cache([(settings.TRANSKRIBUS_WORKER_VERSION, None, None)]) - def build_s3(self): from arkindex.process.models import WorkerVersion diff --git a/arkindex/process/managers.py b/arkindex/process/managers.py index 3b40101e45..0696f66f52 100644 --- a/arkindex/process/managers.py +++ b/arkindex/process/managers.py @@ -141,19 +141,6 @@ class WorkerVersionManager(Manager): .get(id=settings.IMPORTS_WORKER_VERSION) ) - @cached_property - def transkribus_version(self): - """ - WorkerVersion used for all Transkribus imports. - """ - return ( - self - # Required by WorkerRun.build_summary - .select_related('worker', 'revision') - .prefetch_related('revision__refs') - .get(id=settings.TRANSKRIBUS_WORKER_VERSION) - ) - class WorkerManager(BaseACLManager): diff --git a/arkindex/process/migrations/0018_remove_transkribus.py b/arkindex/process/migrations/0018_remove_transkribus.py new file mode 100644 index 0000000000..b854e5dfce --- /dev/null +++ b/arkindex/process/migrations/0018_remove_transkribus.py @@ -0,0 +1,22 @@ +# Generated by Django 4.1.7 on 2023-10-09 14:12 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('process', '0017_remove_process_model_version_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='process', + name='collection_id', + ), + migrations.RunSQL( + "UPDATE process_process SET mode = 'files' WHERE mode = 'transkribus'", + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ) + ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index fd6a1fc68c..c2c9dd47d9 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -50,7 +50,6 @@ class ProcessMode(Enum): Repository = 'repository' IIIF = 'iiif' Workers = 'workers' - Transkribus = 'transkribus' Template = 'template' S3 = 's3' Training = 'training' @@ -137,9 +136,6 @@ class Process(IndexableModel): ], ) - # Used to define the collection ID and entities import for Transkribus import - collection_id = models.PositiveIntegerField(null=True, blank=True) - # Use elements cache in Workers processes use_cache = models.BooleanField(default=False) diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py index 3f449fe200..122a25d6a2 100644 --- a/arkindex/process/serializers/imports.py +++ b/arkindex/process/serializers/imports.py @@ -24,7 +24,6 @@ from arkindex.project.validators import MaxValueValidator from arkindex.training.models import ModelVersionState from arkindex.users.models import Role from arkindex.users.utils import get_max_level -from transkribus import TranskribusAPI class ProcessLightSerializer(serializers.ModelSerializer): @@ -242,7 +241,10 @@ class ProcessListSerializer(ProcessLightSerializer): class FilesProcessSerializer(serializers.Serializer): mode = EnumField(ProcessMode, default=ProcessMode.Files) - files = serializers.PrimaryKeyRelatedField(queryset=DataFile.objects.all(), many=True) + files = serializers.PrimaryKeyRelatedField( + queryset=DataFile.objects.select_related('corpus'), + many=True, + ) folder_id = serializers.UUIDField(required=False, allow_null=True) folder_type = serializers.SlugField(required=False, allow_null=True) element_type = serializers.SlugField() @@ -253,7 +255,7 @@ class FilesProcessSerializer(serializers.Serializer): 'unique_corpus': 'Imports can only run on files from a single corpus', 'corpus_read_only': 'Cannot write in corpus', 'folder_not_found': 'Folder does not exist', - 'image_or_pdf': 'File imports can only import images or PDF documents', + 'unsupported_content_type': 'File imports can only import images, PDF documents or ZIP archives', 'iiif_only': 'IIIF imports can only import IIIF documents', 'folder_required': 'Either folder_type, folder_id or both are required', 'iiif_folder_required': 'IIIF imports require both folder_type and element_type', @@ -274,7 +276,7 @@ class FilesProcessSerializer(serializers.Serializer): if len(corpora) > 1: self.fail('unique_corpus') corpus = corpora.pop() - if corpus not in Corpus.objects.writable(self.context['request'].user): + if not Corpus.objects.writable(self.context['request'].user).filter(id=corpus.id).exists(): self.fail('corpus_read_only') return files @@ -291,8 +293,12 @@ class FilesProcessSerializer(serializers.Serializer): def validate(self, data): if data['mode'] == ProcessMode.Files: - if not all(f.content_type == 'application/pdf' or f.content_type.startswith('image/') for f in data['files']): - self.fail('image_or_pdf') + if not all( + f.content_type in ('application/pdf', 'application/zip', 'application/x-zip-compressed') + or f.content_type.startswith('image/') + for f in data['files'] + ): + self.fail('unsupported_content_type') elif data['mode'] == ProcessMode.IIIF: if not set(f.content_type.split(';')[0] for f in data['files']) <= {'application/json', 'application/ld+json'}: @@ -621,52 +627,6 @@ class CorpusProcessSerializer(serializers.Serializer): return data -class ImportTranskribusSerializer(serializers.Serializer): - """ - Serialize a Transkribus import - """ - collection_id = serializers.IntegerField(min_value=1) - - def validate(self, data): - collection_id = data.get('collection_id') - - # Check Transkribus email - transkribus_email = self.context['request'].user.transkribus_email - if not transkribus_email: - raise serializers.ValidationError( - {"__all__": ["You have not registered your transkribus email"]} - ) - - # Login as Arkindex user on transkribus - try: - transkribus_client = TranskribusAPI(email=settings.TRANSKRIBUS_EMAIL, password=settings.TRANSKRIBUS_PASSWORD) - except Exception: - raise serializers.ValidationError( - {"__all__" : [f"Failed to login on Transkribus as {settings.TRANSKRIBUS_EMAIL}"]} - ) - - # Check Arkindex's right - try: - users = transkribus_client.list_user_collection(collection_id) - except Exception: - raise serializers.ValidationError( - {"collection_id" : [f"User {settings.TRANSKRIBUS_EMAIL} is not a member of the collection {collection_id}"]} - ) - - # Check user's right - user = next(filter(lambda user: user["email"] == transkribus_email, users), None) - if not user: - raise serializers.ValidationError( - {"collection_id": [f"User {transkribus_email} is not a member of the collection {collection_id}"]} - ) - - return data - - -class CreateImportTranskribusErrorResponseSerializer(serializers.Serializer): - collection_id = serializers.CharField(required=False, help_text="Errors that occurred during collection ID field validation.") - - class ProcessElementLightSerializer(serializers.ModelSerializer): """ Serialises an Element, using optimized query for ListProcessElement diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py index 1fb032ce5d..f7e4bad4f8 100644 --- a/arkindex/process/tests/test_processes.py +++ b/arkindex/process/tests/test_processes.py @@ -58,6 +58,16 @@ class TestProcesses(FixtureAPITestCase): size=42, content_type='application/json', ) + cls.zip_df = cls.corpus.files.create( + name='test.zip', + size=1337, + content_type='application/zip', + ) + cls.windows_zip_df = cls.corpus.files.create( + name='windows.zip', + size=1337, + content_type='application/x-zip-compressed', + ) cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug='page') cls.volume_type = ElementType.objects.get(corpus=cls.corpus, slug='volume') cls.ml_class = cls.corpus.ml_classes.create(name='clafoutis') @@ -1902,20 +1912,35 @@ class TestProcesses(FixtureAPITestCase): self.assertIsNone(process.element) @override_settings(IMPORTS_WORKER_VERSION=None) - def test_from_files_image_and_pdf(self): + def test_from_files_multiple_types(self): self.client.force_login(self.user) - with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): + + with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)), self.assertNumQueries(30): response = self.client.post(reverse('api:files-process'), { - 'files': [str(self.pdf_df.id), str(self.img_df.id)], + 'files': [ + str(self.pdf_df.id), + str(self.img_df.id), + str(self.zip_df.id), + str(self.windows_zip_df.id), + ], 'mode': 'files', 'folder_type': 'volume', 'element_type': 'page', }, format='json') self.assertEqual(response.status_code, status.HTTP_201_CREATED) + data = response.json() process = Process.objects.get(id=data['id']) self.assertEqual(process.mode, ProcessMode.Files) - self.assertListEqual(list(process.files.all()), [self.img_df, self.pdf_df]) + self.assertQuerysetEqual( + process.files.all(), [ + self.img_df, + self.pdf_df, + self.zip_df, + self.windows_zip_df, + ], + ordered=False, + ) self.assertIsNone(process.element) @override_settings( @@ -1925,7 +1950,7 @@ class TestProcesses(FixtureAPITestCase): def test_from_files_iiif(self): self.client.force_login(self.user) - with self.assertNumQueries(28), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): + with self.assertNumQueries(27), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): response = self.client.post(reverse('api:files-process'), { 'files': [str(self.iiif_df.id)], 'mode': 'iiif', @@ -2033,14 +2058,17 @@ class TestProcesses(FixtureAPITestCase): def test_from_files_files_wrong_type(self): self.client.force_login(self.user) - response = self.client.post(reverse('api:files-process'), { - 'files': [str(self.iiif_df.id)], - 'folder_type': 'volume', - 'element_type': 'page', - 'mode': 'files', - }, format='json') - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images or PDF documents']}) + + with self.assertNumQueries(6): + response = self.client.post(reverse('api:files-process'), { + 'files': [str(self.iiif_df.id)], + 'folder_type': 'volume', + 'element_type': 'page', + 'mode': 'files', + }, format='json') + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images, PDF documents or ZIP archives']}) @override_settings(IMPORTS_WORKER_VERSION=None) def test_from_files_folder_id(self): diff --git a/arkindex/process/tests/test_transkribus_import.py b/arkindex/process/tests/test_transkribus_import.py deleted file mode 100644 index 282bf27c2e..0000000000 --- a/arkindex/process/tests/test_transkribus_import.py +++ /dev/null @@ -1,185 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from django.urls import reverse -from rest_framework import status - -from arkindex.ponos.models import State -from arkindex.process.models import Process, ProcessMode, Repository, Revision, Worker, WorkerType, WorkerVersion -from arkindex.project.default_corpus import DEFAULT_TRANSKRIBUS_TYPES -from arkindex.project.tests import FixtureAPITestCase -from arkindex.users.models import Role, User - - -@override_settings(TRANSKRIBUS_EMAIL="arkindex@teklia.com", TRANSKRIBUS_PASSWORD=None) -class TestTranskribusImport(FixtureAPITestCase): - """ - Test transkribus import - """ - - @classmethod - def setUpTestData(cls): - super().setUpTestData() - User.objects.update(transkribus_email="nope@nope.fr") - cls.repo = Repository.objects.create(url='http://fakery') - cls.tr_type = WorkerType.objects.get(slug='import') - cls.transkribus_worker = Worker.objects.create( - repository=cls.repo, - name='Transkribus Import', - slug='transkribus_import', - type=cls.tr_type - ) - cls.transkribus_rev = Revision.objects.create( - hash='1234', - message='commit commit', - author='Wolpertinger', - repo=cls.repo, - ) - cls.transkribus_worker_version = WorkerVersion.objects.create( - worker=cls.transkribus_worker, - revision=cls.transkribus_rev, - configuration={} - ) - - @override_settings(ARKINDEX_FEATURES={"transkribus": True}) - def test_requires_login(self): - response = self.client.post(reverse("api:import-transkribus")) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - @override_settings(ARKINDEX_FEATURES={"transkribus": False}) - def test_requires_flag(self): - self.client.force_login(self.user) - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), ["Transkribus import is unavailable due to the transkribus feature being disabled."]) - - @override_settings(ARKINDEX_FEATURES={"transkribus": True}) - def test_requires_transkribus_email(self): - User.objects.update(transkribus_email=None) - self.client.force_login(self.user) - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"__all__": ["You have not registered your transkribus email"]}) - - @override_settings(ARKINDEX_FEATURES={"transkribus": True}) - @patch("transkribus.TranskribusAPI.list_user_collection") - def test_arkindex_has_not_access(self, mock_transkribus): - # Not a mistake: Transkribus client raises `Exception` directly when it runs out of retries - mock_transkribus.side_effect = Exception("401 Unauthorized") - self.client.force_login(self.user) - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"collection_id" : ["User arkindex@teklia.com is not a member of the collection 12345"]}) - - @override_settings(ARKINDEX_FEATURES={"transkribus": True}) - @patch("transkribus.TranskribusAPI.list_user_collection") - def test_user_has_not_access(self, mock_transkribus): - mock_transkribus.return_value = [{"email": "arkindex@teklia.com"}] - - self.client.force_login(self.user) - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"collection_id": ["User nope@nope.fr is not a member of the collection 12345"]}) - - @override_settings( - PONOS_DEFAULT_ENV={}, - ARKINDEX_FEATURES={"transkribus": True}, - TRANSKRIBUS_EMAIL="arkindex@teklia.com", - TRANSKRIBUS_PASSWORD="averysecretpassword", - ARKINDEX_TASKS_IMAGE='registry.teklia.com/tasks', - ) - @patch("arkindex.process.serializers.imports.TranskribusAPI") - def test_create_import(self, mock_transkribus): - mock_transkribus.return_value.list_user_collection.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}] - self.client.force_login(self.user) - - with self.settings(TRANSKRIBUS_WORKER_VERSION=self.transkribus_worker_version.id): - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - data = response.json() - process = Process.objects.get(id=data["id"]) - - self.assertEqual(process.mode, ProcessMode.Transkribus) - self.assertEqual(process.collection_id, 12345) - self.assertEqual(process.state, State.Unscheduled) - - corpus = process.corpus - self.assertEqual(corpus.name, "Transkribus collection n°12345") - self.assertEqual(corpus.description, "") - self.assertEqual(corpus.public, False) - # Assert defaults types are set on the new corpus - self.assertCountEqual( - list(corpus.types.values( - "slug", - "display_name", - "folder", - "color", - )), - [{ - "folder": False, - **values - } for values in DEFAULT_TRANSKRIBUS_TYPES] - ) - - right = corpus.memberships.get(user=self.user) - self.assertTrue(right.level >= Role.Admin.value) - - worker_run = process.worker_runs.get() - self.assertEqual(worker_run.version, self.transkribus_worker_version) - self.assertListEqual(worker_run.parents, []) - self.assertIsNone(worker_run.configuration_id) - self.assertIsNone(worker_run.model_version_id) - - tasks_slugs = list(process.tasks.order_by('slug').values_list('slug', flat=True)) - self.assertEqual(len(tasks_slugs), 3) - self.assertEqual(tasks_slugs, ['export_transkribus', 'import_arkindex', 'thumbnails']) - - export_task = process.tasks.get(slug='export_transkribus') - self.assertEqual(export_task.command, 'python -m arkindex_tasks.export_transkribus 12345') - self.assertEqual(export_task.image, 'registry.teklia.com/tasks') - self.assertEqual(list(export_task.parents.all()), []) - - import_task = process.tasks.get(slug='import_arkindex') - self.assertEqual(import_task.command, f'python -m arkindex_tasks.import_transkribus --job-path /data/export_transkribus/transkribus_export_job.json --corpus {corpus.id}') - self.assertEqual(import_task.image, 'registry.teklia.com/tasks') - self.assertEqual(list(import_task.parents.all()), [export_task]) - - thumbnails_task = process.tasks.get(slug='thumbnails') - self.assertEqual(thumbnails_task.command, 'python3 -m arkindex_tasks.generate_thumbnails /data/import_arkindex/elements.json') - self.assertEqual(thumbnails_task.image, 'registry.teklia.com/tasks') - self.assertEqual(list(thumbnails_task.parents.all()), [import_task]) - - @override_settings( - PONOS_DEFAULT_ENV={}, - ARKINDEX_FEATURES={'transkribus': True}, - ) - @patch("transkribus.TranskribusAPI.list_user_collection") - def test_corpus_worker_version_list_transkribus(self, mock_transkribus): - mock_transkribus.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}] - self.client.force_login(self.user) - with self.settings(TRANSKRIBUS_WORKER_VERSION=self.transkribus_worker_version.id): - response = self.client.post(reverse("api:import-transkribus"), { - "collection_id": "12345", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - process = Process.objects.get(id=data["id"]) - self.assertEqual(process.mode, ProcessMode.Transkribus) - corpus = process.corpus - with self.assertNumQueries(7): - response = self.client.get(reverse('api:corpus-versions', kwargs={'pk': corpus.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertEqual(len(response.json()['results']), 1) - self.assertEqual(response.json()['results'][0]['worker_version']['id'], str(self.transkribus_worker_version.id)) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 46852deefe..be0895c0cd 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -90,7 +90,6 @@ from arkindex.process.api import ( DataFileRetrieve, FilesProcess, GitRepositoryImportHook, - ImportTranskribus, ListProcessElements, ProcessDatasetManage, ProcessDatasets, @@ -157,7 +156,6 @@ from arkindex.users.api import ( UserEmailVerification, UserMemberships, UserRetrieve, - UserTranskribus, ) api = [ @@ -285,7 +283,6 @@ api = [ # Import workflows path('process/', ProcessList.as_view(), name='process-list'), path('process/fromfiles/', FilesProcess.as_view(), name='files-process'), - path('process/transkribus/', ImportTranskribus.as_view(), name='import-transkribus'), path('process/<uuid:pk>/', ProcessDetails.as_view(), name='process-details'), path('process/<uuid:pk>/retry/', ProcessRetry.as_view(), name='process-retry'), path('process/<uuid:pk>/start/', StartProcess.as_view(), name='process-start'), @@ -342,7 +339,6 @@ api = [ path('user/token/', UserEmailVerification.as_view(), name='user-token'), path('user/password-reset/', PasswordReset.as_view(), name='password-reset'), path('user/password-reset/confirm/', PasswordResetConfirm.as_view(), name='password-reset-confirm'), - path('user/transkribus/', UserTranskribus.as_view(), name='user-transkribus'), # Rights management path('groups/', GroupsCreate.as_view(), name='groups-create'), diff --git a/arkindex/project/checks.py b/arkindex/project/checks.py index 22fdfa8c2c..9db3ec795c 100644 --- a/arkindex/project/checks.py +++ b/arkindex/project/checks.py @@ -11,8 +11,6 @@ import sys from django.core.checks import Critical, Error, Warning, register -from arkindex.process.models import WorkerVersion - def only_runserver(func): "Decorator to run a system check only when running a dev server or deploying" @@ -296,34 +294,3 @@ def botocore_config_check(*args, **kwargs): ]) return warnings - - -@register() -@only_runserver -def transkribus_check(*args, **kwargs): - from django.conf import settings - errors = [] - transkribus_feature = settings.ARKINDEX_FEATURES['transkribus'] - transkribus_config = { - 'TRANSKRIBUS_EMAIL': 'Transkribus account email address', - 'TRANSKRIBUS_PASSWORD': 'Transkribus account password', - 'TRANSKRIBUS_WORKER_VERSION': 'Transkribus worker version' - } - if not transkribus_feature: - return [] - for setting, descr in transkribus_config.items(): - value = getattr(settings, setting) - if not value: - errors.append(Error( - f'No {descr} is set; all Transkribus-related features will fail.', - hint=f'settings.{setting} = {value!r}', - id='arkindex.E012', - )) - worker_version = settings.TRANSKRIBUS_WORKER_VERSION - if worker_version and not WorkerVersion.objects.filter(pk=worker_version).exists(): - errors.append(Error( - 'Transkribus worker version does not exist.', - hint=f'settings.TRANSKRIBUS_WORKER_VERSION = {worker_version!r}', - id='arkindex.E012', - )) - return errors diff --git a/arkindex/project/config.py b/arkindex/project/config.py index 8cae2633c5..baab5f0e90 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -132,11 +132,6 @@ def get_settings_parser(base_dir): doorbell_parser.add_option('id', type=str, default=None) doorbell_parser.add_option('appkey', type=str, default=None) - transkribus_parser = parser.add_subparser('transkribus', default={}) - transkribus_parser.add_option('email', type=str, default=None) - transkribus_parser.add_option('password', type=str, default=None) - transkribus_parser.add_option('worker_version', type=uuid.UUID, default=None) - gitlab_parser = parser.add_subparser('gitlab', default={}) gitlab_parser.add_option('app_id', type=str, default=None) gitlab_parser.add_option('app_secret', type=str, default=None) @@ -196,7 +191,6 @@ def get_settings_parser(base_dir): features_parser.add_option('signup', type=bool, default=True) features_parser.add_option('selection', type=bool, default=True) features_parser.add_option('search', type=bool, default=False) - features_parser.add_option('transkribus', type=bool, default=True) banner_parser = parser.add_subparser('banner', default={}) banner_parser.add_option('message', type=str, default=None) diff --git a/arkindex/project/default_corpus.py b/arkindex/project/default_corpus.py index 691fe36012..827e480c8f 100644 --- a/arkindex/project/default_corpus.py +++ b/arkindex/project/default_corpus.py @@ -37,27 +37,3 @@ DEFAULT_CORPUS_TYPES = [ 'color': '642aeb' } ] - -DEFAULT_TRANSKRIBUS_TYPES = [ - { - 'slug': 'volume', - 'display_name': 'Volume', - 'folder': True, - 'color': '28b62c', - }, - { - 'slug': 'page', - 'display_name': 'Page', - 'color': '28b62c', - }, - { - 'slug': 'text_line', - 'display_name': 'Text line', - 'color': '115eed', - }, - { - 'slug': 'paragraph', - 'display_name': 'Paragraph', - 'color': '642aeb' - } -] diff --git a/arkindex/project/mixins.py b/arkindex/project/mixins.py index ac622d6895..51b0c1cb52 100644 --- a/arkindex/project/mixins.py +++ b/arkindex/project/mixins.py @@ -205,7 +205,7 @@ class ProcessACLMixin(ACLMixin): # Return the access level on a single process access_levels = [] if process.corpus_id: - # Use project right. Covers Images, IIIF, PDF, Repository (IIIF), Elements (Workers) and Transkribus process modes + # On all processes with a corpus, include corpus ACLs access_levels.append(get_max_level(self.user, process.corpus)) elif process.mode == ProcessMode.Repository and process.revision_id: # Use repository right in case of a workers docker build diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 002fe5cc36..1ae3f31a15 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -309,11 +309,6 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60) # check_images sample size when checking all servers CHECK_IMAGES_SAMPLE_SIZE = 20 -# Transkribus credentials -TRANSKRIBUS_EMAIL = conf['transkribus']['email'] -TRANSKRIBUS_PASSWORD = conf['transkribus']['password'] -TRANSKRIBUS_WORKER_VERSION = conf['transkribus']['worker_version'] - # GitLab OAuth GITLAB_APP_ID = conf['gitlab']['app_id'] GITLAB_APP_SECRET = conf['gitlab']['app_secret'] diff --git a/arkindex/project/tests/__init__.py b/arkindex/project/tests/__init__.py index b26dcfc06d..5e6fa6ab9d 100644 --- a/arkindex/project/tests/__init__.py +++ b/arkindex/project/tests/__init__.py @@ -147,10 +147,6 @@ class FixtureMixin(object): pass # Do the same for WorkerVersion cached properties - try: - del WorkerVersion.objects.transkribus_version - except AttributeError: - pass try: del WorkerVersion.objects.imports_version except AttributeError: diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index 6ece4c85c6..ce3e095ae9 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -39,7 +39,6 @@ features: search: false selection: true signup: true - transkribus: true gitlab: app_id: null app_secret: null @@ -109,9 +108,5 @@ static: mirador_url: null root_path: null universal_viewer_url: null -transkribus: - email: null - password: null - worker_version: null worker_activity_timeout: 3600 workers_max_chunks: 10 diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index 0fbb32b3b5..8fd0071792 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -51,7 +51,6 @@ features: search: true selection: false signup: false - transkribus: false gitlab: app_id: a app_secret: b @@ -126,9 +125,5 @@ static: mirador_url: gopher://mirador/ root_path: / universal_viewer_url: gopher://uv/ -transkribus: - email: nope@nope - password: superSecret - worker_version: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa worker_activity_timeout: 3600 workers_max_chunks: 42 diff --git a/arkindex/project/tests/test_checks.py b/arkindex/project/tests/test_checks.py index 0557764316..1d07c80e03 100644 --- a/arkindex/project/tests/test_checks.py +++ b/arkindex/project/tests/test_checks.py @@ -369,80 +369,3 @@ class ChecksTestCase(TestCase): settings.INGEST_S3_ENDPOINT = None settings.INGEST_S3_REGION = 'something' self.assertCountEqual(botocore_config_check(), []) - - @override_settings() - def test_transkribus_checks(self): - from arkindex.process.models import Repository, Revision, Worker, WorkerType, WorkerVersion - from arkindex.project.checks import transkribus_check - - repo = Repository.objects.create(url='http://fakery') - tr_type = WorkerType.objects.create(slug='import', display_name='Import') - transkribus_worker = Worker.objects.create( - repository=repo, - name='Transkribus Import', - slug='transkribus_import', - type=tr_type - ) - transkribus_rev = Revision.objects.create( - hash='1234', - message='commit commit', - author='Wolpertinger', - repo=repo, - ) - transkribus_worker_version = WorkerVersion.objects.create( - worker=transkribus_worker, - revision=transkribus_rev, - configuration={} - ) - - with self.settings( - ARKINDEX_FEATURES={ - 'transkribus': True - }, - TRANSKRIBUS_EMAIL='mail@mail.com', - TRANSKRIBUS_PASSWORD='passpass', - TRANSKRIBUS_WORKER_VERSION=None - ): - self.assertCountEqual(transkribus_check(), [ - Error( - 'No Transkribus worker version is set; all Transkribus-related features will fail.', - hint='settings.TRANSKRIBUS_WORKER_VERSION = None', - id='arkindex.E012', - ) - ]) - - with self.settings( - ARKINDEX_FEATURES={ - 'transkribus': True - }, - TRANSKRIBUS_EMAIL='mail@mail.com', - TRANSKRIBUS_PASSWORD='passpass', - TRANSKRIBUS_WORKER_VERSION='bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb' - ): - self.assertCountEqual(transkribus_check(), [ - Error( - 'Transkribus worker version does not exist.', - hint="settings.TRANSKRIBUS_WORKER_VERSION = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'", - id='arkindex.E012', - ) - ]) - - with self.settings( - ARKINDEX_FEATURES={ - 'transkribus': True - }, - TRANSKRIBUS_EMAIL='mail@mail.com', - TRANSKRIBUS_PASSWORD='passpass', - TRANSKRIBUS_WORKER_VERSION=transkribus_worker_version.id - ): - self.assertListEqual(transkribus_check(), []) - - with self.settings( - ARKINDEX_FEATURES={ - 'transkribus': False - }, - TRANSKRIBUS_EMAIL='mail@mail.com', - TRANSKRIBUS_PASSWORD='passpass', - TRANSKRIBUS_WORKER_VERSION=None - ): - self.assertListEqual(transkribus_check(), []) diff --git a/arkindex/sql_validation/process_elements_filter_ml_class.sql b/arkindex/sql_validation/process_elements_filter_ml_class.sql index c32976537c..0dd85b16c9 100644 --- a/arkindex/sql_validation/process_elements_filter_ml_class.sql +++ b/arkindex/sql_validation/process_elements_filter_ml_class.sql @@ -3,7 +3,6 @@ SELECT "users_user"."id", "users_user"."last_login", "users_user"."email", "users_user"."display_name", - "users_user"."transkribus_email", "users_user"."is_active", "users_user"."is_admin", "users_user"."verified_email", @@ -33,7 +32,6 @@ SELECT "process_process"."id", "process_process"."load_children", "process_process"."generate_thumbnails", "process_process"."chunks", - "process_process"."collection_id", "process_process"."use_cache", "process_process"."use_gpu", "process_process"."template_id", diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql index 5587a67095..4b2502d959 100644 --- a/arkindex/sql_validation/process_elements_filter_type.sql +++ b/arkindex/sql_validation/process_elements_filter_type.sql @@ -3,7 +3,6 @@ SELECT "users_user"."id", "users_user"."last_login", "users_user"."email", "users_user"."display_name", - "users_user"."transkribus_email", "users_user"."is_active", "users_user"."is_admin", "users_user"."verified_email", @@ -33,7 +32,6 @@ SELECT "process_process"."id", "process_process"."load_children", "process_process"."generate_thumbnails", "process_process"."chunks", - "process_process"."collection_id", "process_process"."use_cache", "process_process"."use_gpu", "process_process"."template_id", diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql index d51c384522..7d2bfe7d00 100644 --- a/arkindex/sql_validation/process_elements_top_level.sql +++ b/arkindex/sql_validation/process_elements_top_level.sql @@ -3,7 +3,6 @@ SELECT "users_user"."id", "users_user"."last_login", "users_user"."email", "users_user"."display_name", - "users_user"."transkribus_email", "users_user"."is_active", "users_user"."is_admin", "users_user"."verified_email", @@ -33,7 +32,6 @@ SELECT "process_process"."id", "process_process"."load_children", "process_process"."generate_thumbnails", "process_process"."chunks", - "process_process"."collection_id", "process_process"."use_cache", "process_process"."use_gpu", "process_process"."template_id", diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql index 7677dbb14a..d7a841886b 100644 --- a/arkindex/sql_validation/process_elements_with_image.sql +++ b/arkindex/sql_validation/process_elements_with_image.sql @@ -3,7 +3,6 @@ SELECT "users_user"."id", "users_user"."last_login", "users_user"."email", "users_user"."display_name", - "users_user"."transkribus_email", "users_user"."is_active", "users_user"."is_admin", "users_user"."verified_email", @@ -33,7 +32,6 @@ SELECT "process_process"."id", "process_process"."load_children", "process_process"."generate_thumbnails", "process_process"."chunks", - "process_process"."collection_id", "process_process"."use_cache", "process_process"."use_gpu", "process_process"."template_id", diff --git a/arkindex/users/admin.py b/arkindex/users/admin.py index 5a2e118db6..aac4399da5 100644 --- a/arkindex/users/admin.py +++ b/arkindex/users/admin.py @@ -66,7 +66,7 @@ class UserAdmin(BaseUserAdmin): list_display = ('email', 'display_name', 'is_admin', 'created') list_filter = ('is_admin', ) fieldsets = ( - (None, {'fields': ('email', 'display_name', 'verified_email', 'password', 'transkribus_email')}), + (None, {'fields': ('email', 'display_name', 'verified_email', 'password')}), ('Permissions', {'fields': ('is_admin', 'is_active')}), ) # add_fieldsets is not a standard ModelAdmin attribute. UserAdmin diff --git a/arkindex/users/api.py b/arkindex/users/api.py index 719112da39..141f218611 100644 --- a/arkindex/users/api.py +++ b/arkindex/users/api.py @@ -28,7 +28,6 @@ from rest_framework.generics import ( RetrieveAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView, - UpdateAPIView, ) from rest_framework.permissions import SAFE_METHODS from rest_framework.response import Response @@ -56,7 +55,6 @@ from arkindex.users.serializers import ( PasswordResetConfirmSerializer, PasswordResetSerializer, UserSerializer, - UserTranskribusSerializer, ) from arkindex.users.utils import RightContent, get_max_level @@ -352,19 +350,6 @@ class PasswordResetConfirm(CreateAPIView): serializer_class = PasswordResetConfirmSerializer -@extend_schema(tags=['users']) -class UserTranskribus(UpdateAPIView): - """ - Update and validate Transkribus account - Only the user's email is stored in our database - """ - permission_classes = (IsVerified, ) - serializer_class = UserTranskribusSerializer - - def get_object(self): - return self.request.user - - class OAuthSignIn(APIView): """ Start the OAuth authentication code flow for a given provider diff --git a/arkindex/users/migrations/0002_remove_user_transkribus_email.py b/arkindex/users/migrations/0002_remove_user_transkribus_email.py new file mode 100644 index 0000000000..9a79a5e21b --- /dev/null +++ b/arkindex/users/migrations/0002_remove_user_transkribus_email.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1.7 on 2023-10-09 14:12 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('users', '0001_initial'), + ] + + operations = [ + migrations.RemoveField( + model_name='user', + name='transkribus_email', + ), + ] diff --git a/arkindex/users/models.py b/arkindex/users/models.py index f4f4fbd98b..3a92035320 100644 --- a/arkindex/users/models.py +++ b/arkindex/users/models.py @@ -77,11 +77,6 @@ class User(AbstractBaseUser): db_collation='case_insensitive', ) display_name = models.CharField(max_length=120) - transkribus_email = models.EmailField( - max_length=255, - null=True, - blank=True, - ) is_active = models.BooleanField(default=True) is_admin = models.BooleanField(default=False) verified_email = models.BooleanField(default=False) diff --git a/arkindex/users/serializers.py b/arkindex/users/serializers.py index b2112274be..839c8696f7 100644 --- a/arkindex/users/serializers.py +++ b/arkindex/users/serializers.py @@ -14,9 +14,6 @@ from arkindex.project.mixins import WorkerACLMixin from arkindex.project.serializer_fields import EnumField from arkindex.users.models import Group, OAuthCredentials, OAuthStatus, Right, Role, User from arkindex.users.utils import RightContent, get_max_level -from transkribus import TranskribusAPI - -transkribus_client = TranskribusAPI() def validate_user_password(user, data): @@ -73,7 +70,6 @@ class SimpleUserSerializer(serializers.ModelSerializer): class UserSerializer(SimpleUserSerializer): features = serializers.SerializerMethodField(read_only=True) - transkribus_import_email = serializers.SerializerMethodField(read_only=True) class Meta(SimpleUserSerializer.Meta): fields = SimpleUserSerializer.Meta.fields + ( @@ -82,8 +78,6 @@ class UserSerializer(SimpleUserSerializer): 'is_admin', 'auth_token', 'features', - 'transkribus_email', - 'transkribus_import_email', ) extra_kwargs = { 'id': {'read_only': True}, @@ -98,7 +92,6 @@ class UserSerializer(SimpleUserSerializer): 'verified_email': {'read_only': True}, 'is_admin': {'read_only': True}, 'auth_token': {'read_only': True}, - 'transkribus_email': {'read_only': True}, } @extend_schema_field(inline_serializer( @@ -111,10 +104,6 @@ class UserSerializer(SimpleUserSerializer): def get_features(self, *args, **kwargs): return settings.ARKINDEX_FEATURES - @extend_schema_field(serializers.EmailField(allow_null=True)) - def get_transkribus_import_email(self, *args, **kwargs): - return settings.TRANSKRIBUS_EMAIL - def update(self, instance, validated_data): if 'password' in validated_data: instance.set_password(validated_data.pop('password')) @@ -198,34 +187,6 @@ class PasswordResetConfirmSerializer(serializers.Serializer): user.save() -class UserTranskribusSerializer(serializers.Serializer): - """ - A serializer that allows to verify Transkribus credentials - and to save the Transkribus email - """ - transkribus_email = serializers.EmailField() - transkribus_password = serializers.CharField(write_only=True, style={'input_type': 'password'}) - - def validate(self, data): - transkribus_email = data.get('transkribus_email') - transkribus_password = data.pop('transkribus_password') - - # Check that the credentials are correct - try: - transkribus_client.login(email=transkribus_email, password=transkribus_password) - except Exception: - raise serializers.ValidationError( - {"__all__" : ["The email or password is incorrect"]} - ) - - return data - - def update(self, instance, validated_data): - instance.transkribus_email = validated_data.get('transkribus_email', None) - instance.save() - return instance - - class JobSerializer(serializers.Serializer): """ Serializers a RQ job. diff --git a/arkindex/users/tests/test_update_transkribus_email.py b/arkindex/users/tests/test_update_transkribus_email.py deleted file mode 100644 index 8bb9d2c618..0000000000 --- a/arkindex/users/tests/test_update_transkribus_email.py +++ /dev/null @@ -1,43 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from django.urls import reverse -from rest_framework import status - -from arkindex.project.tests import FixtureAPITestCase - - -@override_settings(TRANSKRIBUS_EMAIL=None, TRANSKRIBUS_PASSWORD=None) -class TestUpdateTranskribusEmail(FixtureAPITestCase): - - def test_requires_login(self): - response = self.client.patch(reverse('api:user-transkribus'), { - 'transkribus_email': 'nope@nope.com', - 'transkribus_password': '42' - }, format='json') - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - - @patch('transkribus.TranskribusAPI.login') - def test_wrong_credentials(self, mock_transkribus): - mock_transkribus.side_effect = Exception() - self.client.force_login(self.user) - response = self.client.patch(reverse('api:user-transkribus'), { - 'transkribus_email': 'nope@nope.com', - 'transkribus_password': '42' - }, format='json') - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"__all__": ["The email or password is incorrect"]}) - - @patch('transkribus.TranskribusAPI.login') - def test_update_transkribus_email(self, mock_transkribus): - mock_transkribus.return_value = {"email": "nope@nope.com"} - self.client.force_login(self.user) - - self.assertIsNone(self.user.transkribus_email) - response = self.client.patch(reverse('api:user-transkribus'), { - 'transkribus_email': 'nope@nope.com', - 'transkribus_password': '42' - }, format='json') - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.user.refresh_from_db() - self.assertEqual(self.user.transkribus_email, "nope@nope.com") diff --git a/requirements.txt b/requirements.txt index fc18c65f64..5c6c517da4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,4 @@ teklia-license==0.1.1 git+https://gitlab.teklia.com/arkindex/license.git#egg=teklia-license teklia-toolbox==0.1.3 tenacity==8.2.2 -transkribus-client>=0.1.1 -git+https://gitlab.teklia.com/arkindex/transkribus.git#egg=transkribus-client uritemplate==4.1.1 -- GitLab