From 9785480da6c2737c384b2e0fb61a50e33081ad7f Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 14 Sep 2018 12:51:04 +0000 Subject: [PATCH] Fix corpus filtering in all searches --- arkindex/documents/tests/test_search.py | 97 +++++++++++++++++++++++-- arkindex/project/mixins.py | 2 +- 2 files changed, 91 insertions(+), 8 deletions(-) diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py index 61c4efffc6..77c7944f0d 100644 --- a/arkindex/documents/tests/test_search.py +++ b/arkindex/documents/tests/test_search.py @@ -1,6 +1,7 @@ from arkindex.project.tests import FixtureAPITestCase -from arkindex.documents.models import Transcription, Act, Element +from arkindex.documents.models import Transcription, Act, Element, Corpus from django.urls import reverse +from django.contrib.auth.models import AnonymousUser from rest_framework import status from unittest.mock import patch @@ -93,11 +94,37 @@ class TestSearchAPI(FixtureAPITestCase): map(str, expected.values_list('id', flat=True)), ) + args, kwargs = self.es_mock().search.call_args + self.assertTupleEqual(args, ()) + self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type']) + self.assertEqual(kwargs['index'], 'transcriptions') + self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE) + + self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs']) + self.assertEqual(kwargs['body']['_source'], True) + self.assertEqual(kwargs['body']['from'], 0) + self.assertEqual(kwargs['body']['size'], len(expected)) + + self.assertIsInstance(kwargs['body']['query']['bool']['must'], list) + self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must'])) + conditions = { + list(cond.keys())[0]: list(cond.values())[0] + for cond in kwargs['body']['query']['bool']['must'] + } + + self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range']) + self.assertEqual(conditions['match']['text'], 'paris') + self.assertEqual(conditions['range']['score']['gte'], 0.5) + self.assertCountEqual( + conditions['terms']['corpus.keyword'], + map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)), + ) + def test_act_search(self): act = Act.objects.get(number="1") ts = Transcription.objects.filter(text__in=["PARIS", "ROY"], zone__image__path='img1') - self.es_mock().count.return_value = {'count': len(ts)} + self.es_mock().count.return_value = {'count': 1} self.es_mock().search.return_value = self.build_es_response( [self.make_act_hit(act, ts), ], ) @@ -114,23 +141,58 @@ class TestSearchAPI(FixtureAPITestCase): [t['id'] for t in result['transcriptions']], map(str, ts.values_list('id', flat=True)), ) - self.assertEqual(result['total_transcriptions'], len(ts)) + args, kwargs = self.es_mock().search.call_args + self.assertTupleEqual(args, ()) + self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type']) + self.assertEqual(kwargs['index'], 'acts') + self.assertEqual(kwargs['doc_type'], Act.INDEX_TYPE) + + self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs']) + self.assertEqual(kwargs['body']['_source'], False) + self.assertEqual(kwargs['body']['from'], 0) + self.assertEqual(kwargs['body']['size'], 1) + + self.assertEqual(kwargs['body']['query']['nested']['score_mode'], 'sum') + self.assertEqual(kwargs['body']['query']['nested']['path'], 'transcriptions') + self.assertIn('inner_hits', kwargs['body']['query']['nested']) + + function_score = kwargs['body']['query']['nested']['query']['function_score'] + self.assertListEqual(function_score['functions'], [ + { + "field_value_factor": { + "field": "transcriptions.score", + } + } + ]) + self.assertIsInstance(function_score['query']['bool']['must'], list) + + self.assertTrue(all(len(cond.keys()) == 1 for cond in function_score['query']['bool']['must'])) + conditions = { + list(cond.keys())[0]: list(cond.values())[0] + for cond in function_score['query']['bool']['must'] + } + self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range']) + self.assertEqual(conditions['match']['transcriptions.text'], 'paris roy') + self.assertEqual(conditions['range']['transcriptions.score']['gte'], 0.5) + self.assertCountEqual( + conditions['terms']['transcriptions.corpus'], + map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)), + ) + def test_iiif_transcription_search(self): # Filter to only get transcriptions from volume 1 unfiltered = Transcription.objects.filter(text="PARIS") expected = Transcription.objects.filter(text="PARIS", zone__image__path__in=['img1', 'img2', 'img3']) + vol = Element.objects.get(name='Volume 1') self.es_mock().count.return_value = {'count': len(unfiltered)} self.es_mock().search.return_value = self.build_es_response( list(map(self.make_transcription_hit, unfiltered)) ) - response = self.client.get(reverse( - 'api:ts-search-manifest', - kwargs={'pk': str(Element.objects.get(name='Volume 1').id)} - ), {'q': 'paris'}) + response = self.client.get(reverse('api:ts-search-manifest', kwargs={'pk': str(vol.id)}), {'q': 'paris'}) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -150,3 +212,24 @@ class TestSearchAPI(FixtureAPITestCase): self.assertTrue(all(res['resource']['@type'] == 'cnt:ContentAsText' for res in resources)) self.assertTrue(all(res['resource']['format'] == 'text/plain' for res in resources)) self.assertTrue(all(res['resource']['chars'] == 'PARIS' for res in resources)) + + args, kwargs = self.es_mock().search.call_args + self.assertTupleEqual(args, ()) + self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type']) + self.assertEqual(kwargs['index'], 'transcriptions') + self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE) + + self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs']) + self.assertEqual(kwargs['body']['_source'], True) + self.assertEqual(kwargs['body']['from'], 0) + self.assertEqual(kwargs['body']['size'], len(expected)) + + self.assertIsInstance(kwargs['body']['query']['bool']['must'], list) + self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must'])) + conditions = kwargs['body']['query']['bool']['must'] + + text_match = next(c['match']['text'] for c in conditions if 'match' in c and 'text' in c['match']) + corpus_match = next(c['match']['corpus'] for c in conditions if 'match' in c and 'corpus' in c['match']) + self.assertEqual(text_match, 'paris') + self.assertEqual(corpus_match, str(vol.corpus_id)) + self.assertEqual(next(c['range']['score']['gte'] for c in conditions if 'range' in c), 0.5) diff --git a/arkindex/project/mixins.py b/arkindex/project/mixins.py index 50c46cb191..b1fb9ff660 100644 --- a/arkindex/project/mixins.py +++ b/arkindex/project/mixins.py @@ -45,7 +45,7 @@ class SearchAPIMixin(CorpusACLMixin): } if 'corpus' in self.request.query_params: try: - context['corpus_id'] = self.get_corpus(self.request.query_params['corpus']) + context['corpus_id'] = str(self.get_corpus(self.request.query_params['corpus']).id) except Corpus.DoesNotExist: raise PermissionDenied else: -- GitLab