From 32177f7f2151259ab569ad88bfeaaf9ec59cb7ec Mon Sep 17 00:00:00 2001
From: Manon Blanco <blanco@teklia.com>
Date: Wed, 17 Mar 2021 13:54:37 +0100
Subject: [PATCH] Use a global sanitizer config

---
 arkindex/documents/serializers/light.py |  4 ++--
 arkindex/project/settings.py            | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arkindex/documents/serializers/light.py b/arkindex/documents/serializers/light.py
index f151e8ae45..e686640b43 100644
--- a/arkindex/documents/serializers/light.py
+++ b/arkindex/documents/serializers/light.py
@@ -1,6 +1,6 @@
 import markdown
 from django.db.models import Max
-from html_sanitizer import Sanitizer
+from html_sanitizer.django import get_sanitizer
 from rest_framework import serializers
 from rest_framework.exceptions import APIException, ValidationError
 
@@ -106,7 +106,7 @@ class MetaDataLightSerializer(serializers.ModelSerializer):
     def to_representation(self, instance):
         # The value must be HTML
         if instance.type == MetaType.Markdown:
-            sanitizer = Sanitizer()
+            sanitizer = get_sanitizer()
             html = markdown.markdown(instance.value)
             instance.value = sanitizer.sanitize(html)
         return super().to_representation(instance)
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index d8e9835a81..4569c2ab8e 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -274,6 +274,19 @@ SPECTACULAR_SETTINGS = {
     ]
 }
 
+# Sanitizer config
+HTML_SANITIZERS = {
+    'default': {
+        'tags': {
+            'a', 'h1', 'h2', 'h3', 'strong', 'em', 'p',
+            'ul', 'ol', 'li', 'br', 'sub', 'sup', 'hr',
+            'table', 'thead', 'tbody', 'tr', 'th', 'td'
+        },
+        'empty': {'hr', 'a', 'br', 'th'},
+        'is_mergeable': lambda e1, e2: False,
+    },
+}
+
 SEARCH_FILTER_MAX_TERMS = 10
 
 # Elastic search config
-- 
GitLab