diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index d0971a6b8d18187536dcad7165ce158876e8be58..2c42faa1a694f38bd1bfd1ff1a74eda7f4dbff95 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -371,26 +371,19 @@ class DataFileUpload(CorpusACLMixin, APIView): file_type = magic.from_buffer(file_obj.read(1024), mime=True) # libmagic 5.35 recognizes JSON, but older versions detect it as text/plain. - # To allow for IIIF imports, if the file is small enough, try to read as JSON. - # JSON-LD files with an expected IIIF context will use application/ld+json. - # JSON and JSON-LD files without the IIIF context will use application/json. - if file_type in ('text/plain', 'application/json') and file_obj.size < 5e6: + # To allow for IIIF imports, if the file is small enough, try to read as JSON, + # and use application/json instead. + if file_type == 'text/plain' and file_obj.size < 5e6: # Reopen file to reread from beginning file_obj.open() try: - jsonld_context = next(ijson.items(file_obj, '@context')) + for _ in ijson.parse(file_obj): + # Do nothing, just parse through the whole file to check for its syntax without using memory + pass except ijson.JSONError: pass - except StopIteration: - file_type = 'application/json' else: - # The JSON-LD @context attribute can be a string or an array of strings - if isinstance(jsonld_context, str): - jsonld_context = [jsonld_context] - if isinstance(jsonld_context, list) and settings.IIIF_PRESENTATION_CONTEXT in jsonld_context: - file_type = 'application/ld+json' - else: - file_type = 'application/json' + file_type = 'application/json' df = DataFile( corpus=corpus, diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index 49b56cee5ea5dfcbb3fbd55cf64494fb5dac023e..9444f53df5aa7c6b05e42bc206e71f91d89e2151 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -153,7 +153,7 @@ class DataImportFromFilesSerializer(serializers.Serializer): self.fail('images_only') elif data['mode'] == DataImportMode.IIIF: - if not set(f.content_type for f in data['files']) == {'application/ld+json'}: + if not set(f.content_type for f in data['files']) <= {'application/json', 'application/ld+json'}: self.fail('iiif_only') else: diff --git a/arkindex/dataimport/tests/test_files.py b/arkindex/dataimport/tests/test_files.py index 8deb01dce54c12fef9d8688cbf890ce745faad5c..4bed87a259b865b84afb73036f1f6d8220ea8932 100644 --- a/arkindex/dataimport/tests/test_files.py +++ b/arkindex/dataimport/tests/test_files.py @@ -186,7 +186,7 @@ class TestFiles(FixtureAPITestCase): @patch('arkindex.project.aws.s3') def test_file_upload_json(self, s3_mock): """ - Assert uploading a JSON document (not JSON-LD) uses application/json + Assert uploading a JSON document uses application/json """ f = SimpleUploadedFile('manifest', json.dumps({ 'a': 'b', @@ -202,43 +202,3 @@ class TestFiles(FixtureAPITestCase): df = DataFile.objects.get(id=data['id']) self.assertEqual(df.name, 'manifest') self.assertEqual(df.content_type, 'application/json') - - @patch('arkindex.project.aws.s3') - def test_file_upload_iiif(self, s3_mock): - """ - Assert uploading a JSON-LD document with an IIIF context uses application/ld+json - """ - f = SimpleUploadedFile('manifest', json.dumps({ - '@context': 'http://iiif.io/api/presentation/2/context.json', - }).encode('utf-8')) - s3_mock.Object.return_value.content_length = 62 - s3_mock.Object.return_value.content_type = 'application/ld+json' - - response = self.client.post(reverse('api:file-upload', kwargs={'pk': self.corpus.id}), data={'file': f}) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - self.assertIn('id', data) - - df = DataFile.objects.get(id=data['id']) - self.assertEqual(df.name, 'manifest') - self.assertEqual(df.content_type, 'application/ld+json') - - @patch('arkindex.project.aws.s3') - def test_file_upload_not_iiif(self, s3_mock): - """ - Assert uploading a JSON-LD document without an IIIF context uses application/json - """ - f = SimpleUploadedFile('manifest', json.dumps({ - '@context': 'http://iiif.io/api/presentation/42/context.json', - }).encode('utf-8')) - s3_mock.Object.return_value.content_length = 63 - s3_mock.Object.return_value.content_type = 'application/json' - - response = self.client.post(reverse('api:file-upload', kwargs={'pk': self.corpus.id}), data={'file': f}) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - data = response.json() - self.assertIn('id', data) - - df = DataFile.objects.get(id=data['id']) - self.assertEqual(df.name, 'manifest') - self.assertEqual(df.content_type, 'application/json') diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index b9bebeac792a2d5e41f6704b338f761c996ab463..a616e4f1bc6d22a5f9bbf6fa5caf50fb09529ba9 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -34,7 +34,7 @@ class TestImports(FixtureAPITestCase): cls.iiif_df = cls.corpus.files.create( name='test.json', size=42, - content_type='application/ld+json', + content_type='application/json', ) def setUp(self):