From 5b5d8b8033eacf01c073b811c9a65ed959ba22f6 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Wed, 19 Jan 2022 08:36:08 +0000
Subject: [PATCH] Filter on the corpus ID of ElementTypes

---
 arkindex/documents/api/elements.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py
index 2018fa0946..1c9c5cea43 100644
--- a/arkindex/documents/api/elements.py
+++ b/arkindex/documents/api/elements.py
@@ -571,6 +571,17 @@ class ElementsListBase(CorpusACLMixin, DestroyModelMixin, ListAPIView):
         if self.type_filter:
             filters['type'] = self.type_filter
         elif self.folder_filter is not None:
+            # When filtering for folder or non-folder elements, using only the type__folder filter
+            # can cause Postgres to retrieve all the {non-}folder types on every corpus
+            # This can reach hundreds of types as the database grows, so Postgres can end up using a Hash Join
+            # to handle joining this large amount of elements and types.
+            # Since Postgres estimates this to represent a large amount of rows, it might also use multi-processing,
+            # which has a very high overhead.
+            # This can be avoided by also filtering on the type's corpus: Postgres will then access the index
+            # on the type's corpus ID.  The query planner's statistics will give it a very low estimation since there
+            # rarely are a ton of types in a corpus, so Postgres will also use the type_id index on elements, which
+            # will lower the amount of rows much more quickly, making it stop using multi-processing.
+            filters['type__corpus'] = self.selected_corpus
             filters['type__folder'] = self.folder_filter
 
         if 'worker_version' in self.clean_params:
-- 
GitLab