Skip to content
Snippets Groups Projects
Commit 756d6fdc authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'fix-corpus-filter' into 'master'

Fix corpus filtering in all searches

See merge request !102
parents 9ef733aa 9785480d
No related branches found
No related tags found
1 merge request!102Fix corpus filtering in all searches
from arkindex.project.tests import FixtureAPITestCase
from arkindex.documents.models import Transcription, Act, Element
from arkindex.documents.models import Transcription, Act, Element, Corpus
from django.urls import reverse
from django.contrib.auth.models import AnonymousUser
from rest_framework import status
from unittest.mock import patch
......@@ -93,11 +94,37 @@ class TestSearchAPI(FixtureAPITestCase):
map(str, expected.values_list('id', flat=True)),
)
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'transcriptions')
self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], True)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], len(expected))
self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
conditions = {
list(cond.keys())[0]: list(cond.values())[0]
for cond in kwargs['body']['query']['bool']['must']
}
self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
self.assertEqual(conditions['match']['text'], 'paris')
self.assertEqual(conditions['range']['score']['gte'], 0.5)
self.assertCountEqual(
conditions['terms']['corpus.keyword'],
map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
)
def test_act_search(self):
act = Act.objects.get(number="1")
ts = Transcription.objects.filter(text__in=["PARIS", "ROY"], zone__image__path='img1')
self.es_mock().count.return_value = {'count': len(ts)}
self.es_mock().count.return_value = {'count': 1}
self.es_mock().search.return_value = self.build_es_response(
[self.make_act_hit(act, ts), ],
)
......@@ -114,23 +141,58 @@ class TestSearchAPI(FixtureAPITestCase):
[t['id'] for t in result['transcriptions']],
map(str, ts.values_list('id', flat=True)),
)
self.assertEqual(result['total_transcriptions'], len(ts))
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'acts')
self.assertEqual(kwargs['doc_type'], Act.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], False)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], 1)
self.assertEqual(kwargs['body']['query']['nested']['score_mode'], 'sum')
self.assertEqual(kwargs['body']['query']['nested']['path'], 'transcriptions')
self.assertIn('inner_hits', kwargs['body']['query']['nested'])
function_score = kwargs['body']['query']['nested']['query']['function_score']
self.assertListEqual(function_score['functions'], [
{
"field_value_factor": {
"field": "transcriptions.score",
}
}
])
self.assertIsInstance(function_score['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in function_score['query']['bool']['must']))
conditions = {
list(cond.keys())[0]: list(cond.values())[0]
for cond in function_score['query']['bool']['must']
}
self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
self.assertEqual(conditions['match']['transcriptions.text'], 'paris roy')
self.assertEqual(conditions['range']['transcriptions.score']['gte'], 0.5)
self.assertCountEqual(
conditions['terms']['transcriptions.corpus'],
map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
)
def test_iiif_transcription_search(self):
# Filter to only get transcriptions from volume 1
unfiltered = Transcription.objects.filter(text="PARIS")
expected = Transcription.objects.filter(text="PARIS", zone__image__path__in=['img1', 'img2', 'img3'])
vol = Element.objects.get(name='Volume 1')
self.es_mock().count.return_value = {'count': len(unfiltered)}
self.es_mock().search.return_value = self.build_es_response(
list(map(self.make_transcription_hit, unfiltered))
)
response = self.client.get(reverse(
'api:ts-search-manifest',
kwargs={'pk': str(Element.objects.get(name='Volume 1').id)}
), {'q': 'paris'})
response = self.client.get(reverse('api:ts-search-manifest', kwargs={'pk': str(vol.id)}), {'q': 'paris'})
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
......@@ -150,3 +212,24 @@ class TestSearchAPI(FixtureAPITestCase):
self.assertTrue(all(res['resource']['@type'] == 'cnt:ContentAsText' for res in resources))
self.assertTrue(all(res['resource']['format'] == 'text/plain' for res in resources))
self.assertTrue(all(res['resource']['chars'] == 'PARIS' for res in resources))
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'transcriptions')
self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], True)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], len(expected))
self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
conditions = kwargs['body']['query']['bool']['must']
text_match = next(c['match']['text'] for c in conditions if 'match' in c and 'text' in c['match'])
corpus_match = next(c['match']['corpus'] for c in conditions if 'match' in c and 'corpus' in c['match'])
self.assertEqual(text_match, 'paris')
self.assertEqual(corpus_match, str(vol.corpus_id))
self.assertEqual(next(c['range']['score']['gte'] for c in conditions if 'range' in c), 0.5)
......@@ -45,7 +45,7 @@ class SearchAPIMixin(CorpusACLMixin):
}
if 'corpus' in self.request.query_params:
try:
context['corpus_id'] = self.get_corpus(self.request.query_params['corpus'])
context['corpus_id'] = str(self.get_corpus(self.request.query_params['corpus']).id)
except Corpus.DoesNotExist:
raise PermissionDenied
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment