diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 05ac69d9f5872bdb4864a52928877ef2892eb02d..753dc38542e91b195573de6ce3b98cbc089d55a2 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -1626,6 +1626,10 @@ class ElementMetadata(ListCreateAPIView): The `reference` type is also indexed in search APIs and allows searching by known identifiers or references, or any arbitrary string. + + Values on metadata with a `markdown` type will allow Markdown syntax with a limited set of HTML tags. + The value will be sanitized and HTML entities will be escaped, but Markdown rendering has to be performed + on the client side. """ permission_classes = (IsVerifiedOrReadOnly, ) pagination_class = None diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py index 98c63467914f44f0159e156d6824b77952de3617..cbe8e1d7d0c2f7b360ba7469ef147f41d8eb566d 100644 --- a/arkindex/documents/tests/test_metadata.py +++ b/arkindex/documents/tests/test_metadata.py @@ -1032,17 +1032,24 @@ class TestMetaData(FixtureAPITestCase): self.assertEqual(response.json()['value'], '<style type="text/css">* { display: none !important; }</style>') def test_create_html_metadata_with_other_type(self): - self.client.force_login(self.user) - response = self.client.post( - reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), - data={'type': 'date', 'name': 'edition', 'value': '<a href="lalaland">oops</a>'} - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - md = self.vol.metadatas.get(type=MetaType.Date, name='edition') - self.assertEqual(md.value, '<a href="lalaland">oops</a>') - response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()['value'], '<a href="lalaland">oops</a>') + """ + Creating a metadata of a type other than Markdown should not cause any HTML sanitization + """ + self.client.force_login(self.superuser) + for type in {MetaType.Text, MetaType.Date, MetaType.Location, MetaType.Reference}: + with self.subTest(type=type): + response = self.client.post( + reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), + data={'type': type.value, 'name': 'edition', 'value': '<a href="lalaland">oops</a>'} + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.json()) + + md = self.vol.metadatas.get(type=type, name='edition') + self.assertEqual(md.value, '<a href="lalaland">oops</a>') + + response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()['value'], '<a href="lalaland">oops</a>') def test_create_numeric_metadata(self): self.client.force_login(self.superuser) diff --git a/arkindex/project/serializer_fields.py b/arkindex/project/serializer_fields.py index 234b5d8177b018add9968842f5e75be608dd56d8..882cb1e0836594c594433a62d5ec3983dc503239 100644 --- a/arkindex/project/serializer_fields.py +++ b/arkindex/project/serializer_fields.py @@ -82,6 +82,15 @@ class IIIFPathField(serializers.CharField): return value +METADATA_ALLOWED_HTML_TAGS = [ + 'a', 'b', 'blockquote', 'body', 'br', 'div', + 'em', 'h1', 'h2', 'h3', 'html', 'i', 'iframe', + 'img', 'li', 'marquee', 'ol', 'p', 'sup', + 'table', 'tbody', 'td', 'th', 'thead', 'tr', + 'ul' +] + + @extend_schema_field({ 'oneOf': [ {'type': 'string'}, @@ -101,15 +110,8 @@ class MetaDataValueField(serializers.CharField): if instance.type == MetaType.Numeric: return float(instance.value) - # Clean the metadata value of HTML tags - tags = [] + # Clean the metadata value of HTML tags, only for markdown metadata if instance.type == MetaType.Markdown: - tags = [ - 'a', 'b', 'blockquote', 'body', 'br', 'div', - 'em', 'h1', 'h2', 'h3', 'html', 'i', 'iframe', - 'img', 'li', 'marquee', 'ol', 'p', 'sup', - 'table', 'tbody', 'td', 'th', 'thead', 'tr', - 'ul' - ] - - return bleach.clean(instance.value, tags=tags) + return bleach.clean(instance.value, tags=METADATA_ALLOWED_HTML_TAGS) + + return instance.value