diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 2018fa094655d6a397fe40f818ed38cd7ee29e4c..1c9c5cea43099c95aa28b370519fc8addb190f3f 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -571,6 +571,17 @@ class ElementsListBase(CorpusACLMixin, DestroyModelMixin, ListAPIView): if self.type_filter: filters['type'] = self.type_filter elif self.folder_filter is not None: + # When filtering for folder or non-folder elements, using only the type__folder filter + # can cause Postgres to retrieve all the {non-}folder types on every corpus + # This can reach hundreds of types as the database grows, so Postgres can end up using a Hash Join + # to handle joining this large amount of elements and types. + # Since Postgres estimates this to represent a large amount of rows, it might also use multi-processing, + # which has a very high overhead. + # This can be avoided by also filtering on the type's corpus: Postgres will then access the index + # on the type's corpus ID. The query planner's statistics will give it a very low estimation since there + # rarely are a ton of types in a corpus, so Postgres will also use the type_id index on elements, which + # will lower the amount of rows much more quickly, making it stop using multi-processing. + filters['type__corpus'] = self.selected_corpus filters['type__folder'] = self.folder_filter if 'worker_version' in self.clean_params: