From 9785480da6c2737c384b2e0fb61a50e33081ad7f Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Fri, 14 Sep 2018 12:51:04 +0000
Subject: [PATCH] Fix corpus filtering in all searches

---
 arkindex/documents/tests/test_search.py | 97 +++++++++++++++++++++++--
 arkindex/project/mixins.py              |  2 +-
 2 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py
index 61c4efffc6..77c7944f0d 100644
--- a/arkindex/documents/tests/test_search.py
+++ b/arkindex/documents/tests/test_search.py
@@ -1,6 +1,7 @@
 from arkindex.project.tests import FixtureAPITestCase
-from arkindex.documents.models import Transcription, Act, Element
+from arkindex.documents.models import Transcription, Act, Element, Corpus
 from django.urls import reverse
+from django.contrib.auth.models import AnonymousUser
 from rest_framework import status
 from unittest.mock import patch
 
@@ -93,11 +94,37 @@ class TestSearchAPI(FixtureAPITestCase):
             map(str, expected.values_list('id', flat=True)),
         )
 
+        args, kwargs = self.es_mock().search.call_args
+        self.assertTupleEqual(args, ())
+        self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
+        self.assertEqual(kwargs['index'], 'transcriptions')
+        self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
+
+        self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
+        self.assertEqual(kwargs['body']['_source'], True)
+        self.assertEqual(kwargs['body']['from'], 0)
+        self.assertEqual(kwargs['body']['size'], len(expected))
+
+        self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
+        self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
+        conditions = {
+            list(cond.keys())[0]: list(cond.values())[0]
+            for cond in kwargs['body']['query']['bool']['must']
+        }
+
+        self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
+        self.assertEqual(conditions['match']['text'], 'paris')
+        self.assertEqual(conditions['range']['score']['gte'], 0.5)
+        self.assertCountEqual(
+            conditions['terms']['corpus.keyword'],
+            map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
+        )
+
     def test_act_search(self):
         act = Act.objects.get(number="1")
         ts = Transcription.objects.filter(text__in=["PARIS", "ROY"], zone__image__path='img1')
 
-        self.es_mock().count.return_value = {'count': len(ts)}
+        self.es_mock().count.return_value = {'count': 1}
         self.es_mock().search.return_value = self.build_es_response(
             [self.make_act_hit(act, ts), ],
         )
@@ -114,23 +141,58 @@ class TestSearchAPI(FixtureAPITestCase):
             [t['id'] for t in result['transcriptions']],
             map(str, ts.values_list('id', flat=True)),
         )
-
         self.assertEqual(result['total_transcriptions'], len(ts))
 
+        args, kwargs = self.es_mock().search.call_args
+        self.assertTupleEqual(args, ())
+        self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
+        self.assertEqual(kwargs['index'], 'acts')
+        self.assertEqual(kwargs['doc_type'], Act.INDEX_TYPE)
+
+        self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
+        self.assertEqual(kwargs['body']['_source'], False)
+        self.assertEqual(kwargs['body']['from'], 0)
+        self.assertEqual(kwargs['body']['size'], 1)
+
+        self.assertEqual(kwargs['body']['query']['nested']['score_mode'], 'sum')
+        self.assertEqual(kwargs['body']['query']['nested']['path'], 'transcriptions')
+        self.assertIn('inner_hits', kwargs['body']['query']['nested'])
+
+        function_score = kwargs['body']['query']['nested']['query']['function_score']
+        self.assertListEqual(function_score['functions'], [
+            {
+                "field_value_factor": {
+                    "field": "transcriptions.score",
+                }
+            }
+        ])
+        self.assertIsInstance(function_score['query']['bool']['must'], list)
+
+        self.assertTrue(all(len(cond.keys()) == 1 for cond in function_score['query']['bool']['must']))
+        conditions = {
+            list(cond.keys())[0]: list(cond.values())[0]
+            for cond in function_score['query']['bool']['must']
+        }
+        self.assertCountEqual(conditions.keys(), ['match', 'terms', 'range'])
+        self.assertEqual(conditions['match']['transcriptions.text'], 'paris roy')
+        self.assertEqual(conditions['range']['transcriptions.score']['gte'], 0.5)
+        self.assertCountEqual(
+            conditions['terms']['transcriptions.corpus'],
+            map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
+        )
+
     def test_iiif_transcription_search(self):
         # Filter to only get transcriptions from volume 1
         unfiltered = Transcription.objects.filter(text="PARIS")
         expected = Transcription.objects.filter(text="PARIS", zone__image__path__in=['img1', 'img2', 'img3'])
+        vol = Element.objects.get(name='Volume 1')
 
         self.es_mock().count.return_value = {'count': len(unfiltered)}
         self.es_mock().search.return_value = self.build_es_response(
             list(map(self.make_transcription_hit, unfiltered))
         )
 
-        response = self.client.get(reverse(
-            'api:ts-search-manifest',
-            kwargs={'pk': str(Element.objects.get(name='Volume 1').id)}
-        ), {'q': 'paris'})
+        response = self.client.get(reverse('api:ts-search-manifest', kwargs={'pk': str(vol.id)}), {'q': 'paris'})
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         data = response.json()
 
@@ -150,3 +212,24 @@ class TestSearchAPI(FixtureAPITestCase):
         self.assertTrue(all(res['resource']['@type'] == 'cnt:ContentAsText' for res in resources))
         self.assertTrue(all(res['resource']['format'] == 'text/plain' for res in resources))
         self.assertTrue(all(res['resource']['chars'] == 'PARIS' for res in resources))
+
+        args, kwargs = self.es_mock().search.call_args
+        self.assertTupleEqual(args, ())
+        self.assertCountEqual(kwargs.keys(), ['body', 'index', 'doc_type'])
+        self.assertEqual(kwargs['index'], 'transcriptions')
+        self.assertEqual(kwargs['doc_type'], Transcription.INDEX_TYPE)
+
+        self.assertCountEqual(kwargs['body'].keys(), ['_source', 'from', 'size', 'query', 'sort', 'aggs'])
+        self.assertEqual(kwargs['body']['_source'], True)
+        self.assertEqual(kwargs['body']['from'], 0)
+        self.assertEqual(kwargs['body']['size'], len(expected))
+
+        self.assertIsInstance(kwargs['body']['query']['bool']['must'], list)
+        self.assertTrue(all(len(cond.keys()) == 1 for cond in kwargs['body']['query']['bool']['must']))
+        conditions = kwargs['body']['query']['bool']['must']
+
+        text_match = next(c['match']['text'] for c in conditions if 'match' in c and 'text' in c['match'])
+        corpus_match = next(c['match']['corpus'] for c in conditions if 'match' in c and 'corpus' in c['match'])
+        self.assertEqual(text_match, 'paris')
+        self.assertEqual(corpus_match, str(vol.corpus_id))
+        self.assertEqual(next(c['range']['score']['gte'] for c in conditions if 'range' in c), 0.5)
diff --git a/arkindex/project/mixins.py b/arkindex/project/mixins.py
index 50c46cb191..b1fb9ff660 100644
--- a/arkindex/project/mixins.py
+++ b/arkindex/project/mixins.py
@@ -45,7 +45,7 @@ class SearchAPIMixin(CorpusACLMixin):
         }
         if 'corpus' in self.request.query_params:
             try:
-                context['corpus_id'] = self.get_corpus(self.request.query_params['corpus'])
+                context['corpus_id'] = str(self.get_corpus(self.request.query_params['corpus']).id)
             except Corpus.DoesNotExist:
                 raise PermissionDenied
         else:
-- 
GitLab