diff --git a/arkindex/process/migrations/0034_remove_iiif_import_mode.py b/arkindex/process/migrations/0034_remove_iiif_import_mode.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9f981118883e4a84f5d6bcbe473a8f9bf53aba --- /dev/null +++ b/arkindex/process/migrations/0034_remove_iiif_import_mode.py @@ -0,0 +1,22 @@ +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("process", "0033_remove_process_generate_thumbnails"), + ] + + operations = [ + migrations.RunSQL( + [ + """ + UPDATE process_process + SET mode = 'files' + WHERE mode = 'iiif' + """ + ], + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ) + ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 823b25e67c86da509cd7efa0bc3d023f63ba29af..86723642f6270571f5fed80690dd2b890d2b3068 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -61,7 +61,6 @@ class ActivityState(Enum): class ProcessMode(Enum): Files = "files" - IIIF = "iiif" Workers = "workers" Template = "template" S3 = "s3" diff --git a/arkindex/process/serializers/files.py b/arkindex/process/serializers/files.py index 497aa2e5f5a57a7f25cead7153b48b1c0ad91ce0..20ba91cbbbb65164157803d50551fb64d2bd36b6 100644 --- a/arkindex/process/serializers/files.py +++ b/arkindex/process/serializers/files.py @@ -1,3 +1,5 @@ +from textwrap import dedent + from drf_spectacular.utils import extend_schema_field from rest_framework import serializers @@ -14,7 +16,13 @@ class DataFileSerializer(serializers.ModelSerializer): """ status = EnumField(S3FileStatus) - s3_url = serializers.SerializerMethodField() + s3_url = serializers.SerializerMethodField( + help_text=dedent(""" + URL that can be used to download this DataFile from S3. + + Only available to Ponos tasks of `Files` processes or instance administrators. + """) + ) class Meta: model = DataFile @@ -41,11 +49,11 @@ class DataFileSerializer(serializers.ModelSerializer): def get_s3_url(self, obj): if "request" not in self.context: return - # Only allow the S3 URL for ponos tasks of Files or IIIF processes or admins + # Only allow the S3 URL for ponos tasks of Files processes or admins request = self.context["request"] if is_admin_or_ponos_task(request): request_process = get_process_from_task_auth(request) - if not request_process or request_process.mode in (ProcessMode.Files, ProcessMode.IIIF): + if not request_process or request_process.mode == ProcessMode.Files: return obj.s3_url diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py index d5005782d1df5abe8f50351eca4971348ca099a5..539d78c41b85a7ba3b9aed280a4ee8e7fba49eac 100644 --- a/arkindex/process/serializers/imports.py +++ b/arkindex/process/serializers/imports.py @@ -184,10 +184,10 @@ class ProcessDetailsSerializer(ProcessSerializer): # Fields that can always be edited on any process of any state editable_fields = {"name", "state"} - # Allow editing the element ID and name on Files and IIIF processes at any time + # Allow editing the element ID and name on Files processes at any time # TODO: Only allow editing the element ID on a running file import to Ponos tasks, # since this edition is only permitted to show the "View element" button once the import completes. - if self.instance.mode in (ProcessMode.Files, ProcessMode.IIIF): + if self.instance.mode == ProcessMode.Files: editable_fields.add("element") # If some fields are being edited that are not the fields that are always editable @@ -239,7 +239,6 @@ class ProcessListSerializer(ProcessLightSerializer): class FilesProcessSerializer(serializers.ModelSerializer): - mode = EnumField(ProcessMode, default=ProcessMode.Files) files = serializers.PrimaryKeyRelatedField( queryset=DataFile.objects.select_related("corpus"), many=True, @@ -249,7 +248,7 @@ class FilesProcessSerializer(serializers.ModelSerializer): allow_null=True, source="element", ) - folder_type = serializers.SlugField(required=False, allow_null=True) + folder_type = serializers.SlugField() element_type = serializers.SlugField() farm_id = serializers.PrimaryKeyRelatedField( queryset=Farm.objects.all(), @@ -262,15 +261,11 @@ class FilesProcessSerializer(serializers.ModelSerializer): creator = serializers.HiddenField(default=serializers.CurrentUserDefault()) default_error_messages = { - "mode_not_allowed": "This mode is not allowed when importing from files", "files_required": "At least one file is required to start an import process", "unique_corpus": "Imports can only run on files from a single corpus", "corpus_read_only": "Cannot write in corpus", "folder_not_found": "Folder does not exist", - "unsupported_content_type": "File imports can only import images, PDF documents or ZIP archives", - "iiif_only": "IIIF imports can only import IIIF documents", - "folder_required": "Either folder_type, folder_id or both are required", - "iiif_folder_required": "IIIF imports require both folder_type and element_type", + "unsupported_content_type": "File imports can only import images, PDF documents, ZIP archives or IIIF manifests", "type_not_found": "Element type {slug!r} does not exist", "type_not_folder": "Element type {slug!r} is not a folder", "type_folder": "Element type {slug!r} is a folder", @@ -280,7 +275,6 @@ class FilesProcessSerializer(serializers.ModelSerializer): class Meta: model = Process fields = ( - "mode", "files", "folder_id", "folder_type", @@ -289,11 +283,6 @@ class FilesProcessSerializer(serializers.ModelSerializer): "creator", ) - def validate_mode(self, mode): - if mode not in (ProcessMode.Files, ProcessMode.IIIF): - self.fail("mode_not_allowed") - return mode - def validate_files(self, files): if not files: self.fail("files_required") @@ -326,21 +315,14 @@ class FilesProcessSerializer(serializers.ModelSerializer): return farm def validate(self, data): - if data["mode"] == ProcessMode.Files: - if not all( - f.content_type == "application/pdf" - or f.content_type in settings.ARCHIVE_MIME_TYPES - or f.content_type.startswith("image/") - for f in data["files"] - ): - self.fail("unsupported_content_type") - - elif data["mode"] == ProcessMode.IIIF: - if not set(f.content_type.split(";")[0] for f in data["files"]) <= {"application/json", "application/ld+json"}: - self.fail("iiif_only") - - else: - raise NotImplementedError + if not all( + f.content_type == "application/pdf" + or f.content_type in settings.ARCHIVE_MIME_TYPES + or f.content_type.startswith("image/") + or f.content_type.split(";")[0] in ["application/json", "application/ld+json"] + for f in data["files"] + ): + self.fail("unsupported_content_type") data["corpus"] = corpus = data["files"][0].corpus @@ -354,29 +336,21 @@ class FilesProcessSerializer(serializers.ModelSerializer): data["element_type"] = element_type folder_type_slug, folder = data.get("folder_type"), data.get("element") - if folder_type_slug: - folder_type = corpus.types.filter(slug=folder_type_slug).first() - if not folder_type: - self.fail("type_not_found", slug=folder_type_slug) - if not folder_type.folder: - self.fail("type_not_folder", slug=folder_type_slug) - data["folder_type"] = folder_type - else: - if data["mode"] == ProcessMode.IIIF: - # folder_type is required in IIIF - self.fail("iiif_folder_required") - if not folder: - # Either folder_type or folder_id are required for other imports - self.fail("folder_required") - # When a folder is set, it must be in the same corpus as all files - if folder.corpus_id != corpus.id: - self.fail("wrong_folder_corpus") + folder_type = corpus.types.filter(slug=folder_type_slug).first() + if not folder_type: + self.fail("type_not_found", slug=folder_type_slug) + if not folder_type.folder: + self.fail("type_not_folder", slug=folder_type_slug) + data["folder_type"] = folder_type + # When a folder is set, it must be in the same corpus as all files + if folder and folder.corpus_id != corpus.id: + self.fail("wrong_folder_corpus") return data def create(self, validated_data): files = validated_data.pop("files") - process = super().create(validated_data) + process = super().create({"mode": ProcessMode.Files.value, **validated_data}) process.files.set(files) process.run() return process diff --git a/arkindex/process/tests/test_datafile_api.py b/arkindex/process/tests/test_datafile_api.py index 1b81c3485616aca6a3a795b5d5db997f97cd4457..93eea48ab6b505ae29a9d42851201a2c90dfc9fc 100644 --- a/arkindex/process/tests/test_datafile_api.py +++ b/arkindex/process/tests/test_datafile_api.py @@ -171,7 +171,7 @@ class TestDataFileApi(FixtureAPITestCase): def test_retrieve_datafile_s3_url_task_process_mode(self, gen_url_mock): """ Ponos task authentication allows access to the S3 URL, only if the task's - parent process has a Files or IIIF mode. + parent process has a Files mode. """ user = User.objects.create(email="user2@test.test", display_name="User 2", verified_email=True) gen_url_mock.return_value = "http://somewhere" @@ -185,7 +185,7 @@ class TestDataFileApi(FixtureAPITestCase): with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): process.run() cases = [ - (process_mode, "http://somewhere" if process_mode in (ProcessMode.Files, ProcessMode.IIIF) else None) + (process_mode, "http://somewhere" if process_mode == ProcessMode.Files else None) for process_mode in ProcessMode ] for process_mode, s3_url in cases: diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py index 055bfb7c5ccfe9168231430480cc107326a5de7a..19d399e75ed3708e46828d5a76e0f5bb0c62c5dd 100644 --- a/arkindex/process/tests/test_processes.py +++ b/arkindex/process/tests/test_processes.py @@ -58,6 +58,11 @@ class TestProcesses(FixtureAPITestCase): size=42, content_type="application/json", ) + cls.bad_df = cls.corpus.files.create( + name="test.mp4", + size=42, + content_type="video/mp4", + ) cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug="page") cls.volume_type = ElementType.objects.get(corpus=cls.corpus, slug="volume") cls.ml_class = cls.corpus.ml_classes.create(name="clafoutis") @@ -1790,32 +1795,6 @@ class TestProcesses(FixtureAPITestCase): ["import_s3"], ) - def test_retry_iiif(self): - self.client.force_login(self.user) - process = self.corpus.processes.create( - mode=ProcessMode.IIIF, - creator=self.user, - ) - process.worker_runs.create(version=self.version_with_model) - process.tasks.create(state=State.Error, run=0, depth=0) - self.assertEqual(process.state, State.Error) - process.finished = timezone.now() - - with ( - self.settings(IMPORTS_WORKER_VERSION=str(self.version_with_model.id)), - self.assertNumQueries(14), - ): - response = self.client.post(reverse("api:process-retry", kwargs={"pk": process.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - process.refresh_from_db() - self.assertEqual(process.state, State.Unscheduled) - self.assertIsNone(process.finished) - self.assertQuerysetEqual( - process.tasks.filter(run=1).values_list("slug", flat=True), - ["import_iiif"], - ) - def test_from_files_requires_login(self): response = self.client.post(reverse("api:files-process"), { "files": [str(self.img_df.id)], @@ -1859,7 +1838,6 @@ class TestProcesses(FixtureAPITestCase): with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): response = self.client.post(reverse("api:files-process"), { "files": [str(self.pdf_df.id)], - "mode": "files", "folder_type": "volume", "element_type": "page", }, format="json") @@ -1870,6 +1848,24 @@ class TestProcesses(FixtureAPITestCase): self.assertListEqual(list(process.files.all()), [self.pdf_df]) self.assertIsNone(process.element) + @override_settings(IMPORTS_WORKER_VERSION=None) + def test_from_files_iiif(self): + self.client.force_login(self.user) + + with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): + response = self.client.post(reverse("api:files-process"), { + "files": [str(self.iiif_df.id)], + "folder_type": "volume", + "element_type": "page", + }, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + data = response.json() + process = Process.objects.get(id=data["id"]) + self.assertEqual(process.mode, ProcessMode.Files) + self.assertListEqual(list(process.files.all()), [self.iiif_df]) + self.assertIsNone(process.element) + @override_settings(IMPORTS_WORKER_VERSION=None) def test_from_files_multiple_types(self): self.client.force_login(self.user) @@ -1884,11 +1880,12 @@ class TestProcesses(FixtureAPITestCase): for content_type in settings.ARCHIVE_MIME_TYPES ) - with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)), self.assertNumQueries(37): + with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)), self.assertNumQueries(38): response = self.client.post(reverse("api:files-process"), { "files": [ str(self.pdf_df.id), str(self.img_df.id), + str(self.iiif_df.id), *(datafile.id for datafile in archive_datafiles), ], "mode": "files", @@ -1904,137 +1901,41 @@ class TestProcesses(FixtureAPITestCase): process.files.all(), [ self.img_df, self.pdf_df, + self.iiif_df, *archive_datafiles, ], ordered=False, ) self.assertIsNone(process.element) - @override_settings( - PONOS_DEFAULT_ENV={"ARKINDEX_API_TOKEN": "testToken"}, - ARKINDEX_TASKS_IMAGE="registry.teklia.com/tasks", - ) - def test_from_files_iiif(self): + def test_from_files_files_wrong_type(self): self.client.force_login(self.user) - with self.assertNumQueries(25), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): + with self.assertNumQueries(4): response = self.client.post(reverse("api:files-process"), { - "files": [str(self.iiif_df.id)], - "mode": "iiif", + "files": [str(self.bad_df.id)], "folder_type": "volume", "element_type": "page", }, format="json") - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - data = response.json() - process = Process.objects.get(id=data["id"]) - - self.assertEqual(process.mode, ProcessMode.IIIF) - self.assertListEqual(list(process.files.all()), [self.iiif_df]) - self.assertEqual(process.folder_type.slug, "volume") - self.assertEqual(process.element_type.slug, "page") - self.assertIsNone(process.element) - self.assertEqual(process.creator, self.user) - self.assertEqual(process.corpus, self.corpus) - - worker_run = process.worker_runs.get() - self.assertEqual(worker_run.version, self.import_worker_version) - self.assertListEqual(worker_run.parents, []) - self.assertIsNone(worker_run.configuration_id) - self.assertIsNone(worker_run.model_version_id) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertTrue(process.tasks.exists()) + self.assertEqual(response.json(), {"non_field_errors": ["File imports can only import images, PDF documents, ZIP archives or IIIF manifests"]}) @override_settings(IMPORTS_WORKER_VERSION=None) - def test_from_files_iiif_with_json_charset(self): - self.iiif_df.content_type = "application/json;charset=utf-8" - self.assertEqual(self.iiif_df.content_type, "application/json;charset=utf-8") - + def test_from_files_mode_ignored(self): self.client.force_login(self.user) - with(self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id))): - response = self.client.post(reverse("api:files-process"), { - "files": [str(self.iiif_df.id)], - "mode": "iiif", - "folder_type": "volume", - "element_type": "page", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - process = Process.objects.get(id=data["id"]) - self.assertEqual(process.mode, ProcessMode.IIIF) - self.assertListEqual(list(process.files.all()), [self.iiif_df]) - self.assertEqual(process.folder_type.slug, "volume") - self.assertEqual(process.element_type.slug, "page") - self.assertIsNone(process.element) - @override_settings(IMPORTS_WORKER_VERSION=None) - def test_from_files_iiif_with_json_profile_and_charset(self): - self.iiif_df.content_type = 'application/ld+json;profile="http://iiif.io/api/presentation/2/context.json";charset=utf-8' - self.assertEqual(self.iiif_df.content_type, 'application/ld+json;profile="http://iiif.io/api/presentation/2/context.json";charset=utf-8') - - self.client.force_login(self.user) - with(self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id))): + with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): response = self.client.post(reverse("api:files-process"), { "files": [str(self.iiif_df.id)], - "mode": "iiif", "folder_type": "volume", "element_type": "page", + "mode": "easy" }, format="json") self.assertEqual(response.status_code, status.HTTP_201_CREATED) data = response.json() process = Process.objects.get(id=data["id"]) - self.assertEqual(process.mode, ProcessMode.IIIF) - self.assertListEqual(list(process.files.all()), [self.iiif_df]) - self.assertEqual(process.folder_type.slug, "volume") - self.assertEqual(process.element_type.slug, "page") - self.assertIsNone(process.element) - - def test_from_files_iiif_requires_folder_type(self): - self.client.force_login(self.user) - response = self.client.post(reverse("api:files-process"), { - "files": [str(self.iiif_df.id)], - "mode": "iiif", - "element_type": "page", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), { - "non_field_errors": ["IIIF imports require both folder_type and element_type"] - }) - - def test_from_files_invalid_mode(self): - self.client.force_login(self.user) - response = self.client.post(reverse("api:files-process"), { - "files": [str(self.img_df.id)], - "folder_type": "volume", - "element_type": "page", - "mode": "repository", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - def test_from_files_iiif_wrong_type(self): - self.client.force_login(self.user) - response = self.client.post(reverse("api:files-process"), { - "files": [str(self.pdf_df.id)], - "folder_type": "volume", - "element_type": "page", - "mode": "iiif", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"non_field_errors": ["IIIF imports can only import IIIF documents"]}) - - def test_from_files_files_wrong_type(self): - self.client.force_login(self.user) - - with self.assertNumQueries(4): - response = self.client.post(reverse("api:files-process"), { - "files": [str(self.iiif_df.id)], - "folder_type": "volume", - "element_type": "page", - "mode": "files", - }, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - self.assertEqual(response.json(), {"non_field_errors": ["File imports can only import images, PDF documents or ZIP archives"]}) + self.assertEqual(process.mode, ProcessMode.Files) @override_settings(IMPORTS_WORKER_VERSION=None) def test_from_files_folder_id(self): @@ -2047,6 +1948,7 @@ class TestProcesses(FixtureAPITestCase): "files": [str(self.pdf_df.id)], "mode": "files", "folder_id": str(volume.id), + "folder_type": "volume", "element_type": "page", }, format="json", @@ -2056,6 +1958,23 @@ class TestProcesses(FixtureAPITestCase): process = Process.objects.get(id=data["id"]) self.assertEqual(process.element, volume) + def test_from_files_folder_type_required(self): + self.client.force_login(self.user) + volume = self.corpus.elements.get(name="Volume 1") + with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)): + response = self.client.post( + reverse("api:files-process"), + { + "files": [str(self.pdf_df.id)], + "mode": "files", + "folder_id": str(volume.id), + "element_type": "page", + }, + format="json", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), {"folder_type": ["This field is required."]}) + def test_from_files_no_files(self): self.client.force_login(self.user) response = self.client.post(reverse("api:files-process"), {