Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (2)
......@@ -478,7 +478,7 @@ class FilesProcess(CreateAPIView):
folder_type = serializer.validated_data.get('folder_type')
element_type = serializer.validated_data['element_type']
if folder and folder.corpus != corpus:
if folder and folder.corpus_id != corpus.id:
# The files' corpus is already validated as writable
raise ValidationError({'__all__': ['Element and files are in different corpora']})
......
......@@ -24,7 +24,6 @@ from arkindex.project.validators import MaxValueValidator
from arkindex.training.models import ModelVersionState
from arkindex.users.models import Role
from arkindex.users.utils import get_max_level
from transkribus import TranskribusAPI
class ProcessLightSerializer(serializers.ModelSerializer):
......@@ -242,7 +241,10 @@ class ProcessListSerializer(ProcessLightSerializer):
class FilesProcessSerializer(serializers.Serializer):
mode = EnumField(ProcessMode, default=ProcessMode.Files)
files = serializers.PrimaryKeyRelatedField(queryset=DataFile.objects.all(), many=True)
files = serializers.PrimaryKeyRelatedField(
queryset=DataFile.objects.select_related('corpus'),
many=True,
)
folder_id = serializers.UUIDField(required=False, allow_null=True)
folder_type = serializers.SlugField(required=False, allow_null=True)
element_type = serializers.SlugField()
......@@ -253,7 +255,7 @@ class FilesProcessSerializer(serializers.Serializer):
'unique_corpus': 'Imports can only run on files from a single corpus',
'corpus_read_only': 'Cannot write in corpus',
'folder_not_found': 'Folder does not exist',
'image_or_pdf': 'File imports can only import images or PDF documents',
'unsupported_content_type': 'File imports can only import images, PDF documents or ZIP archives',
'iiif_only': 'IIIF imports can only import IIIF documents',
'folder_required': 'Either folder_type, folder_id or both are required',
'iiif_folder_required': 'IIIF imports require both folder_type and element_type',
......@@ -274,7 +276,7 @@ class FilesProcessSerializer(serializers.Serializer):
if len(corpora) > 1:
self.fail('unique_corpus')
corpus = corpora.pop()
if corpus not in Corpus.objects.writable(self.context['request'].user):
if not Corpus.objects.writable(self.context['request'].user).filter(id=corpus.id).exists():
self.fail('corpus_read_only')
return files
......@@ -291,8 +293,12 @@ class FilesProcessSerializer(serializers.Serializer):
def validate(self, data):
if data['mode'] == ProcessMode.Files:
if not all(f.content_type == 'application/pdf' or f.content_type.startswith('image/') for f in data['files']):
self.fail('image_or_pdf')
if not all(
f.content_type in ('application/pdf', 'application/zip', 'application/x-zip-compressed')
or f.content_type.startswith('image/')
for f in data['files']
):
self.fail('unsupported_content_type')
elif data['mode'] == ProcessMode.IIIF:
if not set(f.content_type.split(';')[0] for f in data['files']) <= {'application/json', 'application/ld+json'}:
......@@ -621,52 +627,6 @@ class CorpusProcessSerializer(serializers.Serializer):
return data
class ImportTranskribusSerializer(serializers.Serializer):
"""
Serialize a Transkribus import
"""
collection_id = serializers.IntegerField(min_value=1)
def validate(self, data):
collection_id = data.get('collection_id')
# Check Transkribus email
transkribus_email = self.context['request'].user.transkribus_email
if not transkribus_email:
raise serializers.ValidationError(
{"__all__": ["You have not registered your transkribus email"]}
)
# Login as Arkindex user on transkribus
try:
transkribus_client = TranskribusAPI(email=settings.TRANSKRIBUS_EMAIL, password=settings.TRANSKRIBUS_PASSWORD)
except Exception:
raise serializers.ValidationError(
{"__all__" : [f"Failed to login on Transkribus as {settings.TRANSKRIBUS_EMAIL}"]}
)
# Check Arkindex's right
try:
users = transkribus_client.list_user_collection(collection_id)
except Exception:
raise serializers.ValidationError(
{"collection_id" : [f"User {settings.TRANSKRIBUS_EMAIL} is not a member of the collection {collection_id}"]}
)
# Check user's right
user = next(filter(lambda user: user["email"] == transkribus_email, users), None)
if not user:
raise serializers.ValidationError(
{"collection_id": [f"User {transkribus_email} is not a member of the collection {collection_id}"]}
)
return data
class CreateImportTranskribusErrorResponseSerializer(serializers.Serializer):
collection_id = serializers.CharField(required=False, help_text="Errors that occurred during collection ID field validation.")
class ProcessElementLightSerializer(serializers.ModelSerializer):
"""
Serialises an Element, using optimized query for ListProcessElement
......
......@@ -58,6 +58,16 @@ class TestProcesses(FixtureAPITestCase):
size=42,
content_type='application/json',
)
cls.zip_df = cls.corpus.files.create(
name='test.zip',
size=1337,
content_type='application/zip',
)
cls.windows_zip_df = cls.corpus.files.create(
name='windows.zip',
size=1337,
content_type='application/x-zip-compressed',
)
cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug='page')
cls.volume_type = ElementType.objects.get(corpus=cls.corpus, slug='volume')
cls.ml_class = cls.corpus.ml_classes.create(name='clafoutis')
......@@ -1902,20 +1912,35 @@ class TestProcesses(FixtureAPITestCase):
self.assertIsNone(process.element)
@override_settings(IMPORTS_WORKER_VERSION=None)
def test_from_files_image_and_pdf(self):
def test_from_files_multiple_types(self):
self.client.force_login(self.user)
with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
with self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)), self.assertNumQueries(30):
response = self.client.post(reverse('api:files-process'), {
'files': [str(self.pdf_df.id), str(self.img_df.id)],
'files': [
str(self.pdf_df.id),
str(self.img_df.id),
str(self.zip_df.id),
str(self.windows_zip_df.id),
],
'mode': 'files',
'folder_type': 'volume',
'element_type': 'page',
}, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
process = Process.objects.get(id=data['id'])
self.assertEqual(process.mode, ProcessMode.Files)
self.assertListEqual(list(process.files.all()), [self.img_df, self.pdf_df])
self.assertQuerysetEqual(
process.files.all(), [
self.img_df,
self.pdf_df,
self.zip_df,
self.windows_zip_df,
],
ordered=False,
)
self.assertIsNone(process.element)
@override_settings(
......@@ -1925,7 +1950,7 @@ class TestProcesses(FixtureAPITestCase):
def test_from_files_iiif(self):
self.client.force_login(self.user)
with self.assertNumQueries(28), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
with self.assertNumQueries(27), self.settings(IMPORTS_WORKER_VERSION=str(self.import_worker_version.id)):
response = self.client.post(reverse('api:files-process'), {
'files': [str(self.iiif_df.id)],
'mode': 'iiif',
......@@ -2033,14 +2058,17 @@ class TestProcesses(FixtureAPITestCase):
def test_from_files_files_wrong_type(self):
self.client.force_login(self.user)
response = self.client.post(reverse('api:files-process'), {
'files': [str(self.iiif_df.id)],
'folder_type': 'volume',
'element_type': 'page',
'mode': 'files',
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images or PDF documents']})
with self.assertNumQueries(6):
response = self.client.post(reverse('api:files-process'), {
'files': [str(self.iiif_df.id)],
'folder_type': 'volume',
'element_type': 'page',
'mode': 'files',
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'non_field_errors': ['File imports can only import images, PDF documents or ZIP archives']})
@override_settings(IMPORTS_WORKER_VERSION=None)
def test_from_files_folder_id(self):
......