diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index aa7218e88df7fe643fbf99b7c420bf4dfef4f9cd..38cbe780d827881cfc808e66a1551a4d7a7870b5 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -183,7 +183,6 @@ class DataImportFromFiles(CreateAPIView): folder_name = serializer.validated_data.get('folder_name') folder_type_slug = serializer.validated_data.get('folder_type') element_type_slug = serializer.validated_data.get('element_type') - pdf_engine = serializer.validated_data.get('pdf_engine') ml_tools = serializer.validated_data.get('ml_tools') if folder: @@ -212,10 +211,6 @@ class DataImportFromFiles(CreateAPIView): if folder: payload['folder_id'] = str(folder.id) - if mode == DataImportMode.PDF: - assert pdf_engine is not None, 'PDF engine does not exist' - payload['pdf_engine'] = pdf_engine.value - self.dataimport = corpus.imports.create( creator=self.request.user, mode=mode, diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 57b47f1c50ea6b75c0ddea8bf20eb65566ef3e2e..001c4446940f54c29f2abeffcfa88a583d2cceaf 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -4,7 +4,7 @@ from django.conf import settings from django.utils.functional import cached_property from rest_framework.exceptions import ValidationError from enumfields import EnumField, Enum -from arkindex_common.enums import DataImportMode, DataImportPDFEngine +from arkindex_common.enums import DataImportMode from arkindex_common.ml_tool import MLToolType from arkindex.project.aws import S3FileModelMixin from arkindex.project.models import IndexableModel @@ -47,14 +47,6 @@ class DataImport(IndexableModel): else: return self.workflow.state - @property - def pdf_engine(self): - if not self.payload: - return - if not self.payload.get('pdf_engine'): - return DataImportPDFEngine.Convert - return DataImportPDFEngine(self.payload.get('pdf_engine')) - def build_workflow(self): ''' Create a ponos workflow with a recipe according to configuration diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index 6d06845f208779922ca58d39ed4b9c5f09a1f970..49b56cee5ea5dfcbb3fbd55cf64494fb5dac023e 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -4,7 +4,7 @@ from rest_framework.utils import model_meta from django.conf import settings from django.db.models import Q from ponos.models import State -from arkindex_common.enums import DataImportMode, DataImportPDFEngine +from arkindex_common.enums import DataImportMode from arkindex.project.serializer_fields import EnumField, BestClassField from arkindex.documents.models import Corpus, Element, Classification, ClassificationState from arkindex.dataimport.models import DataImport, DataFile @@ -43,14 +43,6 @@ class ImagesPayloadSerializer(serializers.Serializer): ml_tools = MLToolTaskSerializer(many=True, required=False, default=list) -class PDFPayloadSerializer(ImagesPayloadSerializer): - """ - Serialize a pdf importing payload - """ - - pdf_engine = EnumField(DataImportPDFEngine, default=DataImportPDFEngine.Convert) - - class DataImportSerializer(DataImportLightSerializer): """ Serialize a data importing workflow with its payload @@ -68,7 +60,6 @@ class DataImportSerializer(DataImportLightSerializer): return payload_serializers = { DataImportMode.Images: ImagesPayloadSerializer, - DataImportMode.PDF: PDFPayloadSerializer, } self.fields['payload'] = payload_serializers.get(self.instance.mode, serializers.JSONField)() @@ -105,7 +96,6 @@ class DataImportFromFilesSerializer(serializers.Serializer): folder_name = serializers.CharField(max_length=250, required=False, allow_null=True) folder_type = serializers.SlugField(required=False, allow_null=True) element_type = serializers.SlugField() - pdf_engine = EnumField(DataImportPDFEngine, default=DataImportPDFEngine.Convert) ml_tools = MLToolTaskSerializer(many=True, required=False, allow_null=True) default_error_messages = { diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index fa93f12447ea28086affd69cc46b71cd28eafcca..2473317e73f164d0271ca784489a89af2a5b3683 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -2,7 +2,7 @@ from unittest.mock import patch, call, MagicMock from django.urls import reverse from rest_framework import status from arkindex_common.ml_tool import MLToolType -from arkindex_common.enums import DataImportMode, DataImportPDFEngine +from arkindex_common.enums import DataImportMode from arkindex.dataimport.models import DataImport, DataFile from arkindex.documents.models import Element, Corpus from arkindex.project.tests import FixtureAPITestCase @@ -276,40 +276,6 @@ class TestImports(FixtureAPITestCase): self.assertListEqual(list(dataimport.files.all()), [self.pdf_df]) self.assertEqual(Element.objects.get(id=dataimport.payload['folder_id']).name, 'Import pdf test') - def test_from_files_pdf_convert(self): - self.client.force_login(self.user) - response = self.client.post(reverse('api:import-from-files'), { - 'files': [str(self.pdf_df.id)], - 'mode': 'pdf', - 'pdf_engine': 'convert', - 'folder_type': 'volume', - 'element_type': 'page', - }, format='json') - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - dataimport = DataImport.objects.get(id=data['id']) - self.assertEqual(dataimport.mode, DataImportMode.PDF) - self.assertListEqual(list(dataimport.files.all()), [self.pdf_df]) - self.assertEqual(dataimport.pdf_engine, DataImportPDFEngine.Convert) - self.assertEqual(Element.objects.get(id=dataimport.payload['folder_id']).name, 'Import pdf test') - - def test_from_files_pdf_poppler(self): - self.client.force_login(self.user) - response = self.client.post(reverse('api:import-from-files'), { - 'files': [str(self.pdf_df.id)], - 'mode': 'pdf', - 'pdf_engine': 'poppler', - 'folder_type': 'volume', - 'element_type': 'page', - }, format='json') - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - dataimport = DataImport.objects.get(id=data['id']) - self.assertEqual(dataimport.mode, DataImportMode.PDF) - self.assertListEqual(list(dataimport.files.all()), [self.pdf_df]) - self.assertEqual(dataimport.pdf_engine, DataImportPDFEngine.Poppler) - self.assertEqual(Element.objects.get(id=dataimport.payload['folder_id']).name, 'Import pdf test') - def test_from_files_iiif(self): self.client.force_login(self.user) response = self.client.post(reverse('api:import-from-files'), {