Skip to content
Snippets Groups Projects
Commit 58eddb09 authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'escape' into 'master'

Handle ElasticSearch character escaping

See merge request !60
parents 170b74a0 2ced11e4
No related branches found
No related tags found
1 merge request!60Handle ElasticSearch character escaping
......@@ -25,6 +25,7 @@ from arkindex.images.models import Zone
from arkindex.images.importer import bulk_transcriptions
from arkindex.project.elastic import ESQuerySet
from arkindex.project.polygon import Polygon
from arkindex.project.tools import elasticsearch_escape
class ElementsList(ListAPIView):
......@@ -255,7 +256,7 @@ class TranscriptionSearch(ListAPIView):
return ESQuerySet(
query=ESQuerySet.make_query(
'elastic/search_transcriptions.json',
ctx={'query': query, 'corpus': corpus, 'type': type},
ctx={'query': elasticsearch_escape(query), 'corpus': corpus, 'type': type},
),
sort={"score": {"order": "desc", "mode": "max"}},
es_index=settings.ES_INDEX_TRANSCRIPTIONS,
......@@ -278,7 +279,7 @@ class ActSearch(ListAPIView):
_source=False,
query=ESQuerySet.make_query(
'elastic/search_acts.json',
ctx={'query': query, 'corpus': corpus, 'type': type},
ctx={'query': elasticsearch_escape(query), 'corpus': corpus, 'type': type},
),
es_index=settings.ES_INDEX_ACTS,
es_type=Act.INDEX_TYPE,
......
from django.conf import settings
from django.template.loader import render_to_string
from elasticsearch import Elasticsearch
from rest_framework.exceptions import APIException
import json
......@@ -63,4 +64,7 @@ class ESQuerySet(object):
"""
Use a Django template to generate a query.
"""
return json.loads(render_to_string(template_name, context=ctx))
try:
return json.loads(render_to_string(template_name, context=ctx))
except json.JSONDecodeError as e:
raise APIException('Failed to parse query string')
from django.test import TestCase
from arkindex.project.polygon import Polygon
from arkindex.images.models import ImageServer, Image, Zone
from arkindex.project.tools import elasticsearch_escape
class TestPolygonField(TestCase):
......@@ -80,3 +81,16 @@ class TestPolygon(TestCase):
self.assertEqual(a.y, 0)
self.assertEqual(a.width, 20)
self.assertEqual(a.height, 10)
class TestTools(TestCase):
def test_elasticsearch_escape(self):
self.assertEqual(elasticsearch_escape('abcdef'), 'abcdef')
self.assertEqual(elasticsearch_escape('aaaaa+b'), 'aaaaa\\\\+b')
self.assertEqual(elasticsearch_escape('\\'), '\\\\')
self.assertEqual(elasticsearch_escape('[]'), '\\\\[\\\\]')
self.assertEqual(elasticsearch_escape(' \" '), ' \\\\\" ')
self.assertEqual(elasticsearch_escape('&|'), '&|')
self.assertEqual(elasticsearch_escape('&&||'), '\\\\&&\\\\||')
self.assertEqual(elasticsearch_escape('a<a>a'), 'aaa')
......@@ -2,6 +2,7 @@ from urllib.parse import urlsplit, SplitResult
from django.conf import settings
import random
import string
import re
def sslify_url(url):
......@@ -30,3 +31,23 @@ def random_string(n):
random.choice(string.ascii_letters + string.digits)
for _ in range(n)
])
ES_ESCAPE_REGEX = re.compile(r'([+\-=\\><!(){}\[\]^"~*?:/]|\|\||&&)')
def elasticsearch_escape(s):
'''
Escape a string for use by ElasticSearch
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters # noqa
'''
def replace(matchobj):
if matchobj.group(0) in ['<', '>']:
return ''
if matchobj.group(0) == '\\':
return '\\\\'
if matchobj.group(0) == '"':
return '\\\\\\"'
return '\\\\' + matchobj.group(0)
return ES_ESCAPE_REGEX.sub(replace, s)
......@@ -31,7 +31,7 @@
{% endif %}
{
"match": {
"transcriptions.text": "{{ query }}"
"transcriptions.text": "{{ query|safe }}"
}
}
]
......
......@@ -3,7 +3,7 @@
"must": [
{
"match": {
"text": "{{ query }}"
"text": "{{ query|safe }}"
}
},
{% if corpus %}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment