From abdb4a527f1c8c1de2c711d54bccaf51286de15f Mon Sep 17 00:00:00 2001
From: Valentin Rigal <rigal@teklia.com>
Date: Fri, 29 May 2020 12:48:34 +0000
Subject: [PATCH] Prevent transcriptions without a zone to be added to
 transcriptions ElasticSearch index

---
 arkindex/documents/consumers.py               |  7 +++--
 .../tests/consumers/test_reindex_consumer.py  | 31 +++++++++++++++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/arkindex/documents/consumers.py b/arkindex/documents/consumers.py
index fbbc38e55e..f5e9c9c122 100644
--- a/arkindex/documents/consumers.py
+++ b/arkindex/documents/consumers.py
@@ -81,13 +81,16 @@ class ReindexConsumer(SyncConsumer):
                 # Pick all elements in the corpus
                 elements_queryset = Element.objects.filter(corpus_id=corpus_id)
 
-            transcriptions_queryset = Transcription.objects.filter(element__in=elements_queryset)
+            transcriptions_queryset = Transcription.objects.filter(
+                element__in=elements_queryset,
+                zone__isnull=False
+            )
             entities_queryset = Entity.objects.filter(
                 Q(metadatas__element__in=elements_queryset)
                 | Q(transcriptions__element__in=elements_queryset)
             )
         else:
-            transcriptions_queryset = Transcription.objects.all()
+            transcriptions_queryset = Transcription.objects.filter(zone__isnull=False)
             elements_queryset = Element.objects.all()
             entities_queryset = Entity.objects.all()
 
diff --git a/arkindex/documents/tests/consumers/test_reindex_consumer.py b/arkindex/documents/tests/consumers/test_reindex_consumer.py
index b0b16331b3..9ea454c49c 100644
--- a/arkindex/documents/tests/consumers/test_reindex_consumer.py
+++ b/arkindex/documents/tests/consumers/test_reindex_consumer.py
@@ -71,7 +71,11 @@ class TestReindexConsumer(FixtureTestCase):
 
     def _assert_all_transcriptions(self, call_args):
         (queryset, ), kwargs = call_args
-        self.assertQuerysetEqual(queryset, Transcription.objects.all())
+        self.assertQuerysetEqual(
+            queryset,
+            # Only transcriptions with a zone may be added to transcriptions index
+            Transcription.objects.filter(zone__isnull=False)
+        )
         self.assertDictEqual(kwargs, {'bulk_size': 400})
 
     def _assert_all(self, mock):
@@ -143,7 +147,10 @@ class TestReindexConsumer(FixtureTestCase):
         self.assertDictEqual(kwargs, {'bulk_size': 400})
 
         (queryset, ), kwargs = ts_call
-        self.assertQuerysetEqual(queryset, Transcription.objects.filter(element__corpus_id=self.corpus.id))
+        self.assertQuerysetEqual(
+            queryset,
+            Transcription.objects.filter(element__corpus_id=self.corpus.id, zone__isnull=False)
+        )
         self.assertDictEqual(kwargs, {'bulk_size': 400})
 
     def test_reindex_element(self, mock):
@@ -172,5 +179,23 @@ class TestReindexConsumer(FixtureTestCase):
         self.assertDictEqual(kwargs, {'bulk_size': 400})
 
         (queryset, ), kwargs = ts_call
-        self.assertQuerysetEqual(queryset, Transcription.objects.filter(element__in=elements_list))
+        self.assertQuerysetEqual(
+            queryset,
+            Transcription.objects.filter(element__in=elements_list, zone__isnull=False)
+        )
         self.assertDictEqual(kwargs, {'bulk_size': 400})
+
+    def test_reindex_transcriptions_without_zone(self, mock):
+        """
+        Transcriptions with no zone may not be indexed in transcriptions index
+        """
+        ReindexConsumer({}).reindex_start({
+            'transcriptions': True,
+            'entities': False,
+            'elements': False,
+        })
+        transcription = Transcription.objects.filter(zone__isnull=True).first()
+        self.assertNotEqual(transcription, None)
+        self.assertEqual(mock().run_index.call_count, 1)
+        (queryset, ), kwargs = mock().run_index.call_args
+        self.assertFalse(queryset.filter(id=transcription.id).exists())
-- 
GitLab