diff --git a/.isort.cfg b/.isort.cfg index 6bae546cc08cd6c91786cf5cc219c6deae143407..6ed26f71278eecaa9b5f64d6f2bb8db47d3d15fe 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -8,4 +8,4 @@ line_length = 120 default_section=FIRSTPARTY known_first_party = ponos,transkribus -known_third_party = boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml +known_third_party = bleach,boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml diff --git a/arkindex/documents/migrations/0030_convert_html_metadata_as_markdown.py b/arkindex/documents/migrations/0030_convert_html_metadata_as_markdown.py new file mode 100644 index 0000000000000000000000000000000000000000..87d49a4fe7012c3d88405517f96563792bea533a --- /dev/null +++ b/arkindex/documents/migrations/0030_convert_html_metadata_as_markdown.py @@ -0,0 +1,33 @@ +# Generated by Django 3.1.7 on 2021-03-17 08:26 +from django.db import migrations + +from arkindex.documents.models import MetaType + + +def convert_html_metadata_to_markdown(apps, schema_editor): + MetaData = apps.get_model('documents', 'MetaData') + AllowedMetaData = apps.get_model('documents', 'AllowedMetaData') + MetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.Markdown) + AllowedMetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.Markdown) + + +def convert_markdown_metadata_to_html(apps, schema_editor): + MetaData = apps.get_model('documents', 'MetaData') + AllowedMetaData = apps.get_model('documents', 'AllowedMetaData') + MetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.HTML) + AllowedMetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.HTML) + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0029_corpus_top_level_type'), + ] + + operations = [ + migrations.RunPython( + convert_html_metadata_to_markdown, + reverse_code=convert_markdown_metadata_to_html, + elidable=True, + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 4cd6bed61345075746abad10e03be9e1b2f990e5..3fa88944d72c874ae1dea95f9504e024cbbdad7b 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -515,7 +515,7 @@ class Classification(models.Model): class MetaType(Enum): Text = 'text' - HTML = 'html' + Markdown = 'markdown' Date = 'date' Location = 'location' # Element's original structure reference (intended to be indexed) diff --git a/arkindex/documents/serializers/iiif/manifests.py b/arkindex/documents/serializers/iiif/manifests.py index 7ab2a1aa4826e8c774b2794686ef310d84470ddf..05a2d33a4f1bafff58611c70a8282921aae5eb00 100644 --- a/arkindex/documents/serializers/iiif/manifests.py +++ b/arkindex/documents/serializers/iiif/manifests.py @@ -164,7 +164,7 @@ class FolderManifestSerializer(serializers.Serializer): "viewingDirection": "left-to-right", "service": services, "metadata": ManifestMetadataSerializer( - element.metadatas.exclude(type=MetaType.HTML), + element.metadatas.exclude(type=MetaType.Markdown), context=self.context, many=True, ).data, diff --git a/arkindex/documents/serializers/light.py b/arkindex/documents/serializers/light.py index cc1062f9f8cfef7c5053d70b0c18bdc73f0bc8e8..49ec88231984f2bcf94831b709595f17721e4808 100644 --- a/arkindex/documents/serializers/light.py +++ b/arkindex/documents/serializers/light.py @@ -1,3 +1,4 @@ +import bleach from django.db.models import Max from rest_framework import serializers from rest_framework.exceptions import APIException, ValidationError @@ -101,6 +102,20 @@ class MetaDataLightSerializer(serializers.ModelSerializer): revision = RevisionSerializer(read_only=True) dates = InterpretedDateSerializer(many=True, source='get_dates', read_only=True) + def to_representation(self, instance): + # Clean metadata value + tags = [] + if instance.type == MetaType.Markdown: + tags = [ + 'a', 'b', 'blockquote', 'body', 'br', 'div', + 'em', 'h1', 'h2', 'h3', 'html', 'i', 'iframe', + 'img', 'li', 'marquee', 'ol', 'p', 'sup', + 'table', 'tbody', 'td', 'th', 'thead', 'tr', + 'ul' + ] + instance.value = bleach.clean(instance.value, tags=tags) + return super().to_representation(instance) + def reindex_element(self, elt): reindex_start(element=elt, elements=True) diff --git a/arkindex/documents/tests/test_manifest.py b/arkindex/documents/tests/test_manifest.py index 701559d37bcbf5b9d5ab1a102a2d90dec034ce6d..44fab622b3e0fe91f7e4a8c6c0746cc4ae548b79 100644 --- a/arkindex/documents/tests/test_manifest.py +++ b/arkindex/documents/tests/test_manifest.py @@ -19,7 +19,7 @@ class TestFolderManifestSerializer(FixtureAPITestCase): cls.vol.metadatas.create(name='test 1', type=MetaType.Text, value='Blah') cls.vol.metadatas.create(name='test 2', type=MetaType.Date, value='1337-01-01') cls.vol.metadatas.create(name='test 3', type=MetaType.Location, value='Somewhere') - cls.vol.metadatas.create(name='test 4', type=MetaType.HTML, value='<p>oh no</p>') + cls.vol.metadatas.create(name='test 4', type=MetaType.Markdown, value='<p>oh no</p>') cls.page = Element.objects.get(name='Volume 1, page 1r') def setUp(self): diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py index 77c7efc9daba63cb5aeda57e47f19a637158a1a8..a6263225168ce5e23bb262c3583096976b6ebab4 100644 --- a/arkindex/documents/tests/test_metadata.py +++ b/arkindex/documents/tests/test_metadata.py @@ -35,7 +35,7 @@ class TestMetaData(FixtureAPITestCase): def setUp(self): super().setUp() self.metadata = self.vol.metadatas.create(type=MetaType.Text, name='folio', value='123') - self.private_metadata = self.private_vol.metadatas.create(type=MetaType.HTML, name='leet', value='1337') + self.private_metadata = self.private_vol.metadatas.create(type=MetaType.Markdown, name='leet', value='1337') def test_metadata_forbidden_methods(self): """ @@ -563,3 +563,98 @@ class TestMetaData(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) metadata.refresh_from_db() self.assertIsNone(metadata.entity) + + def test_render_markdown_metadata(self): + metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='# Title\n## Subtitle\nbla') + self.client.force_login(self.user) + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual( + response.json(), + { + 'id': str(metadata.id), + 'type': 'markdown', + 'name': 'Some text', + 'value': '# Title\n## Subtitle\nbla', + 'entity': None, + 'dates': [], + 'worker_version': None + } + ) + + def test_render_html_metadata(self): + metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='<h1>Title</h1>') + self.client.force_login(self.user) + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual( + response.json(), + { + 'id': str(metadata.id), + 'type': 'markdown', + 'name': 'Some text', + 'value': '<h1>Title</h1>', + 'entity': None, + 'dates': [], + 'worker_version': None + } + ) + + def test_unrender_html_metadata(self): + metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='<style type="text/css">* { display: none !important; }</style>') + self.client.force_login(self.user) + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual( + response.json(), + { + 'id': str(metadata.id), + 'type': 'markdown', + 'name': 'Some text', + 'value': '<style type="text/css">* { display: none !important; }</style>', + 'entity': None, + 'dates': [], + 'worker_version': None + } + ) + + def test_create_markdown_metadata(self): + AllowedMetaData.objects.create(corpus=self.corpus, type=MetaType.Markdown, name='text') + self.client.force_login(self.user) + response = self.client.post( + reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), + data={'type': 'markdown', 'name': 'text', 'value': '# Title\n## Subtitle\nbla'} + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + md = self.vol.metadatas.get(type=MetaType.Markdown, name='text') + self.assertEqual(md.value, '# Title\n## Subtitle\nbla') + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()['value'], '# Title\n## Subtitle\nbla') + + def test_create_html_metadata(self): + AllowedMetaData.objects.create(corpus=self.corpus, type=MetaType.Markdown, name='text') + self.client.force_login(self.user) + response = self.client.post( + reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), + data={'type': 'markdown', 'name': 'text', 'value': '<style type="text/css">* { display: none !important; }</style>'} + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + md = self.vol.metadatas.get(type=MetaType.Markdown, name='text') + self.assertEqual(md.value, '<style type="text/css">* { display: none !important; }</style>') + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()['value'], '<style type="text/css">* { display: none !important; }</style>') + + def test_create_html_metadata_with_other_type(self): + self.client.force_login(self.user) + response = self.client.post( + reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), + data={'type': 'date', 'name': 'edition', 'value': '<a href="lalaland">oops</a>'} + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + md = self.vol.metadatas.get(type=MetaType.Date, name='edition') + self.assertEqual(md.value, '<a href="lalaland">oops</a>') + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()['value'], '<a href="lalaland">oops</a>') diff --git a/requirements.txt b/requirements.txt index 2f15387e5294cce161f2a526f1f0ac06fc762384..40ec288a87035ef87f1f99d37fab56e4acb9e437 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ apistar==0.7.2 git+https://gitlab.com/teklia/apistar.git#egg=apistar +bleach==3.3.0 django-admin-hstore-widget==1.1.0 django-cors-headers==3.7.0 django-enumfields==2.1.1