Skip to content
Snippets Groups Projects
Commit 20e01152 authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'convert-html-metadata-as-markdown' into 'master'

Convert HTML metadatas as markdown

Closes #78

See merge request !1275
parents c326c378 447a22bb
No related branches found
No related tags found
1 merge request!1275Convert HTML metadatas as markdown
......@@ -8,4 +8,4 @@ line_length = 120
default_section=FIRSTPARTY
known_first_party = ponos,transkribus
known_third_party = boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml
known_third_party = bleach,boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml
# Generated by Django 3.1.7 on 2021-03-17 08:26
from django.db import migrations
from arkindex.documents.models import MetaType
def convert_html_metadata_to_markdown(apps, schema_editor):
MetaData = apps.get_model('documents', 'MetaData')
AllowedMetaData = apps.get_model('documents', 'AllowedMetaData')
MetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.Markdown)
AllowedMetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.Markdown)
def convert_markdown_metadata_to_html(apps, schema_editor):
MetaData = apps.get_model('documents', 'MetaData')
AllowedMetaData = apps.get_model('documents', 'AllowedMetaData')
MetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.HTML)
AllowedMetaData.objects.exclude(type__in=[mt.value for mt in MetaType]).update(type=MetaType.HTML)
class Migration(migrations.Migration):
dependencies = [
('documents', '0029_corpus_top_level_type'),
]
operations = [
migrations.RunPython(
convert_html_metadata_to_markdown,
reverse_code=convert_markdown_metadata_to_html,
elidable=True,
),
]
......@@ -515,7 +515,7 @@ class Classification(models.Model):
class MetaType(Enum):
Text = 'text'
HTML = 'html'
Markdown = 'markdown'
Date = 'date'
Location = 'location'
# Element's original structure reference (intended to be indexed)
......
......@@ -164,7 +164,7 @@ class FolderManifestSerializer(serializers.Serializer):
"viewingDirection": "left-to-right",
"service": services,
"metadata": ManifestMetadataSerializer(
element.metadatas.exclude(type=MetaType.HTML),
element.metadatas.exclude(type=MetaType.Markdown),
context=self.context,
many=True,
).data,
......
import bleach
from django.db.models import Max
from rest_framework import serializers
from rest_framework.exceptions import APIException, ValidationError
......@@ -101,6 +102,20 @@ class MetaDataLightSerializer(serializers.ModelSerializer):
revision = RevisionSerializer(read_only=True)
dates = InterpretedDateSerializer(many=True, source='get_dates', read_only=True)
def to_representation(self, instance):
# Clean metadata value
tags = []
if instance.type == MetaType.Markdown:
tags = [
'a', 'b', 'blockquote', 'body', 'br', 'div',
'em', 'h1', 'h2', 'h3', 'html', 'i', 'iframe',
'img', 'li', 'marquee', 'ol', 'p', 'sup',
'table', 'tbody', 'td', 'th', 'thead', 'tr',
'ul'
]
instance.value = bleach.clean(instance.value, tags=tags)
return super().to_representation(instance)
def reindex_element(self, elt):
reindex_start(element=elt, elements=True)
......
......@@ -19,7 +19,7 @@ class TestFolderManifestSerializer(FixtureAPITestCase):
cls.vol.metadatas.create(name='test 1', type=MetaType.Text, value='Blah')
cls.vol.metadatas.create(name='test 2', type=MetaType.Date, value='1337-01-01')
cls.vol.metadatas.create(name='test 3', type=MetaType.Location, value='Somewhere')
cls.vol.metadatas.create(name='test 4', type=MetaType.HTML, value='<p>oh no</p>')
cls.vol.metadatas.create(name='test 4', type=MetaType.Markdown, value='<p>oh no</p>')
cls.page = Element.objects.get(name='Volume 1, page 1r')
def setUp(self):
......
......@@ -35,7 +35,7 @@ class TestMetaData(FixtureAPITestCase):
def setUp(self):
super().setUp()
self.metadata = self.vol.metadatas.create(type=MetaType.Text, name='folio', value='123')
self.private_metadata = self.private_vol.metadatas.create(type=MetaType.HTML, name='leet', value='1337')
self.private_metadata = self.private_vol.metadatas.create(type=MetaType.Markdown, name='leet', value='1337')
def test_metadata_forbidden_methods(self):
"""
......@@ -563,3 +563,98 @@ class TestMetaData(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
metadata.refresh_from_db()
self.assertIsNone(metadata.entity)
def test_render_markdown_metadata(self):
metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='# Title\n## Subtitle\nbla')
self.client.force_login(self.user)
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(
response.json(),
{
'id': str(metadata.id),
'type': 'markdown',
'name': 'Some text',
'value': '# Title\n## Subtitle\nbla',
'entity': None,
'dates': [],
'worker_version': None
}
)
def test_render_html_metadata(self):
metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='<h1>Title</h1>')
self.client.force_login(self.user)
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(
response.json(),
{
'id': str(metadata.id),
'type': 'markdown',
'name': 'Some text',
'value': '<h1>Title</h1>',
'entity': None,
'dates': [],
'worker_version': None
}
)
def test_unrender_html_metadata(self):
metadata = self.vol.metadatas.create(type=MetaType.Markdown, name='Some text', value='<style type="text/css">* { display: none !important; }</style>')
self.client.force_login(self.user)
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(
response.json(),
{
'id': str(metadata.id),
'type': 'markdown',
'name': 'Some text',
'value': '&lt;style type="text/css"&gt;* { display: none !important; }&lt;/style&gt;',
'entity': None,
'dates': [],
'worker_version': None
}
)
def test_create_markdown_metadata(self):
AllowedMetaData.objects.create(corpus=self.corpus, type=MetaType.Markdown, name='text')
self.client.force_login(self.user)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'markdown', 'name': 'text', 'value': '# Title\n## Subtitle\nbla'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
md = self.vol.metadatas.get(type=MetaType.Markdown, name='text')
self.assertEqual(md.value, '# Title\n## Subtitle\nbla')
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.json()['value'], '# Title\n## Subtitle\nbla')
def test_create_html_metadata(self):
AllowedMetaData.objects.create(corpus=self.corpus, type=MetaType.Markdown, name='text')
self.client.force_login(self.user)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'markdown', 'name': 'text', 'value': '<style type="text/css">* { display: none !important; }</style>'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
md = self.vol.metadatas.get(type=MetaType.Markdown, name='text')
self.assertEqual(md.value, '<style type="text/css">* { display: none !important; }</style>')
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.json()['value'], '&lt;style type="text/css"&gt;* { display: none !important; }&lt;/style&gt;')
def test_create_html_metadata_with_other_type(self):
self.client.force_login(self.user)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'date', 'name': 'edition', 'value': '<a href="lalaland">oops</a>'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
md = self.vol.metadatas.get(type=MetaType.Date, name='edition')
self.assertEqual(md.value, '<a href="lalaland">oops</a>')
response = self.client.get(reverse('api:metadata-edit', kwargs={'pk': str(md.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.json()['value'], '&lt;a href="lalaland"&gt;oops&lt;/a&gt;')
......@@ -2,6 +2,7 @@
apistar==0.7.2
git+https://gitlab.com/teklia/apistar.git#egg=apistar
bleach==3.3.0
django-admin-hstore-widget==1.1.0
django-cors-headers==3.7.0
django-enumfields==2.1.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment