Skip to content
Snippets Groups Projects
Commit 9785480d authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Fix corpus filtering in all searches

parent 9ef733aa
No related branches found
No related tags found
No related merge requests found
from arkindex.project.tests import FixtureAPITestCase
from arkindex.documents.models import Transcription, Act, Element
from arkindex.documents.models import Transcription, Act, Element, Corpus
from django.urls import reverse
from django.contrib.auth.models import AnonymousUser
from rest_framework import status
from unittest.mock import patch
......@@ -93,11 +94,37 @@ class TestSearchAPI(FixtureAPITestCase):
map(str, expected.values_list('id', flat=True)),
)
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'transcriptions')
self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], True)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], len(expected))
self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
conditions = {
list(cond.keys())[0]: list(cond.values())[0]
for cond in kwargs['body']['query']['bool']['must']
}
self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
self.assertEqual(conditions['match']['text'], 'paris')
self.assertEqual(conditions['range']['score']['gte'], 0.5)
self.assertCountEqual(
conditions['terms']['corpus.keyword'],
map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
)
def test_act_search(self):
act = Act.objects.get(number="1")
ts = Transcription.objects.filter(text__in=["PARIS", "ROY"], zone__image__path='img1')
self.es_mock().count.return_value = {'count': len(ts)}
self.es_mock().count.return_value = {'count': 1}
self.es_mock().search.return_value = self.build_es_response(
[self.make_act_hit(act, ts), ],
)
......@@ -114,23 +141,58 @@ class TestSearchAPI(FixtureAPITestCase):
[t['id'] for t in result['transcriptions']],
map(str, ts.values_list('id', flat=True)),
)
self.assertEqual(result['total_transcriptions'], len(ts))
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'acts')
self.assertEqual(kwargs['doc_type'], Act.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], False)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], 1)
self.assertEqual(kwargs['body']['query']['nested']['score_mode'], 'sum')
self.assertEqual(kwargs['body']['query']['nested']['path'], 'transcriptions')
self.assertIn('inner_hits', kwargs['body']['query']['nested'])
function_score = kwargs['body']['query']['nested']['query']['function_score']
self.assertListEqual(function_score['functions'], [
{
"field_value_factor": {
"field": "transcriptions.score",
}
}
])
self.assertIsInstance(function_score['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in function_score['query']['bool']['must']))
conditions = {
list(cond.keys())[0]: list(cond.values())[0]
for cond in function_score['query']['bool']['must']
}
self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
self.assertEqual(conditions['match']['transcriptions.text'], 'paris roy')
self.assertEqual(conditions['range']['transcriptions.score']['gte'], 0.5)
self.assertCountEqual(
conditions['terms']['transcriptions.corpus'],
map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
)
def test_iiif_transcription_search(self):
# Filter to only get transcriptions from volume 1
unfiltered = Transcription.objects.filter(text="PARIS")
expected = Transcription.objects.filter(text="PARIS", zone__image__path__in=['img1', 'img2', 'img3'])
vol = Element.objects.get(name='Volume 1')
self.es_mock().count.return_value = {'count': len(unfiltered)}
self.es_mock().search.return_value = self.build_es_response(
list(map(self.make_transcription_hit, unfiltered))
)
response = self.client.get(reverse(
'api:ts-search-manifest',
kwargs={'pk': str(Element.objects.get(name='Volume 1').id)}
), {'q': 'paris'})
response = self.client.get(reverse('api:ts-search-manifest', kwargs={'pk': str(vol.id)}), {'q': 'paris'})
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
......@@ -150,3 +212,24 @@ class TestSearchAPI(FixtureAPITestCase):
self.assertTrue(all(res['resource']['@type'] == 'cnt:ContentAsText' for res in resources))
self.assertTrue(all(res['resource']['format'] == 'text/plain' for res in resources))
self.assertTrue(all(res['resource']['chars'] == 'PARIS' for res in resources))
args, kwargs = self.es_mock().search.call_args
self.assertTupleEqual(args, ())
self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
self.assertEqual(kwargs['index'], 'transcriptions')
self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
self.assertEqual(kwargs['body']['_source'], True)
self.assertEqual(kwargs['body']['from'], 0)
self.assertEqual(kwargs['body']['size'], len(expected))
self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
conditions = kwargs['body']['query']['bool']['must']
text_match = next(c['match']['text'] for c in conditions if 'match' in c and 'text' in c['match'])
corpus_match = next(c['match']['corpus'] for c in conditions if 'match' in c and 'corpus' in c['match'])
self.assertEqual(text_match, 'paris')
self.assertEqual(corpus_match, str(vol.corpus_id))
self.assertEqual(next(c['range']['score']['gte'] for c in conditions if 'range' in c), 0.5)
......@@ -45,7 +45,7 @@ class SearchAPIMixin(CorpusACLMixin):
}
if 'corpus' in self.request.query_params:
try:
context['corpus_id'] = self.get_corpus(self.request.query_params['corpus'])
context['corpus_id'] = str(self.get_corpus(self.request.query_params['corpus']).id)
except Corpus.DoesNotExist:
raise PermissionDenied
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment