diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index af2d6fd5c297899fb9f54e475b2122c24ace0d77..cf0dc554d03c862eb898cc74ff64f20d2586f110 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -20,14 +20,13 @@ from arkindex.documents.models import Corpus, ElementType, Element, Classificati from arkindex.documents.api.elements import ElementsListMixin from arkindex.dataimport.models import \ DataImport, DataFile, Repository, RepositoryType, Revision, Worker, WorkerVersion, WorkerRun -from arkindex.documents.serializers.light import ElementLightSerializer from arkindex.dataimport.serializers.files import DataFileSerializer, DataFileCreateSerializer from arkindex.dataimport.serializers.git import \ ExternalRepositorySerializer, RevisionSerializer from arkindex.dataimport.serializers.imports import ( DataImportLightSerializer, DataImportSerializer, DataImportFromFilesSerializer, ElementsWorkflowSerializer, WorkerRunLightSerializer, WorkerRunSerializer, - ImportTranskribusSerializer, StartProcessSerializer + ImportTranskribusSerializer, StartProcessSerializer, ElementLightSerializer ) from arkindex.dataimport.serializers.workers import WorkerSerializer, WorkerVersionSerializer, RepositorySerializer from arkindex.users.models import OAuthCredentials, User @@ -910,7 +909,10 @@ class ListProcessElements(CustomPaginationViewMixin, ListAPIView): if elements is not None and dataimport.load_children: # Load all the children elements whose path contains the pre-selected elements # Those children are appended to the pre-selection - elements |= Element.objects.filter(paths__path__overlap=map(str, elements.values_list('id', flat=True))).filter(**base_filters) + elements |= Element.objects.filter( + paths__path__overlap=map(str, elements.values_list('id', flat=True)), + **base_filters + ) # Load the full corpus, only when elements has not been populated before if elements is None: @@ -919,19 +921,25 @@ class ListProcessElements(CustomPaginationViewMixin, ListAPIView): # Filter elements depending on process properties elements = elements.filter(**base_filters) - elements = elements.prefetch_related('type') class_filters = self.get_classifications_filters(dataimport) if class_filters is not None: elements = elements.filter(class_filters).distinct() - return elements + # Only retrieve necessary values for the serializer + return elements.values('id', 'type__slug', 'name') def get_filters(self, dataimport): - filters = {} + filters = { + "corpus_id": dataimport.corpus_id, + } if dataimport.name_contains: filters['name__contains'] = dataimport.name_contains if dataimport.element_type: filters['type_id'] = dataimport.element_type_id + else: + # Limit the scope of types available to merge + # This prevent memory from exploding when no type is selected + filters['type__corpus_id'] = dataimport.corpus_id return filters diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index 5ea60102df973b51f610278ee850bc7eb9e95949..2b9d070e97b850d199042229c579701bf02a73dd 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -355,3 +355,18 @@ class ImportTranskribusSerializer(serializers.Serializer): ) return data + + +class ElementLightSerializer(serializers.ModelSerializer): + """ + Serialises an Element, using optimized query for ListProcessElement + """ + type = serializers.CharField(source='type__slug') + + class Meta: + model = Element + fields = ( + 'id', + 'type', + 'name', + ) diff --git a/arkindex/dataimport/tests/test_process_elements.py b/arkindex/dataimport/tests/test_process_elements.py index b4c4aba3b1bfebcc97cacf92bdb2e4054a5373e0..f0ad9808c2c85b169dd4317fe3d9e70b009a57d8 100644 --- a/arkindex/dataimport/tests/test_process_elements.py +++ b/arkindex/dataimport/tests/test_process_elements.py @@ -239,7 +239,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.page_1, self.page_5] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -262,7 +262,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.page_1] self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -283,7 +283,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.folder_2] self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -306,7 +306,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.line_1, self.line_2, self.line_3] self.client.force_login(self.superuser) - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -327,7 +327,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.page_5, self.page_3, self.folder_2, self.page_2] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -348,7 +348,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.page_1, self.page_5, self.page_3, self.folder_2, self.page_2] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -369,7 +369,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.line_1, self.line_2, self.line_3, self.line_4, self.line_5, self.page_4] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -389,7 +389,7 @@ class TestProcessElements(FixtureAPITestCase): self.dataimport.save() self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -407,7 +407,7 @@ class TestProcessElements(FixtureAPITestCase): self.dataimport.elements.add(self.page_1.id, self.folder_2.id) self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -433,7 +433,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.page_1, self.page_5] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -455,7 +455,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.folder_2] self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -477,7 +477,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_2, self.page_2, self.page_3, self.page_5] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -499,7 +499,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.page_1, self.page_5, self.page_3, self.folder_2, self.page_2] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get( reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -522,7 +522,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.line_1, self.line_2, self.line_3, self.line_4, self.line_5, self.page_4] self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -544,7 +544,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.page_1, self.page_3, self.line_1, self.line_2, self.line_3, self.page_2] self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -566,7 +566,7 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.page_1, self.page_5, self.page_3, self.line_1, self.line_3, self.line_4, self.line_5, self.folder_2, self.page_4] self.client.force_login(self.superuser) - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -587,7 +587,7 @@ class TestProcessElements(FixtureAPITestCase): elements = Element.objects.filter(corpus=self.private_corpus).order_by('name', 'type__slug') self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -620,7 +620,7 @@ class TestProcessElements(FixtureAPITestCase): for mode in (DataImportMode.Elements, DataImportMode.Workers): self.dataimport.mode = mode self.dataimport.save() - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -654,12 +654,12 @@ class TestProcessElements(FixtureAPITestCase): self.dataimport.save() self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): page_1 = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(len(page_1.json()['results']), 20) next_page = page_1.json().get('next') self.assertIsNotNone(next_page) - with self.assertNumQueries(7): + with self.assertNumQueries(6): page_2 = self.client.get(next_page) self.assertIsNone(page_2.json()['next']) qs_1 = Element.objects.filter(id__in=[elt['id'] for elt in page_1.json()['results']]) @@ -682,7 +682,7 @@ class TestProcessElements(FixtureAPITestCase): Elements count can be retrieved with with_count parameter """ self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertNumQueries(6): response = self.client.get( reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}), {'page_size': 6, 'with_count': True} @@ -717,7 +717,7 @@ class TestProcessElements(FixtureAPITestCase): self.dataimport.save() self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertNumQueries(5): response = self.client.get( reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}), {'page_size': 50}