diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index 4ec2383a5796eb310a1ee3988f7d93084d7cbdc2..3b3a6b179edab400612d0d5ea7fd5f06874ae4d3 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -1320,7 +1320,7 @@ class ListProcessElements(CustomPaginationViewMixin, CorpusACLMixin, ListAPIView raise ValidationError({'__all__': [str(e)]}) if not self.with_image: - return queryset.values('id', 'type__slug', 'name') + return queryset.values('id', 'type_id', 'name') return queryset.annotate( # Build the image URL by concatenating the server's URL to the image's path @@ -1337,7 +1337,7 @@ class ListProcessElements(CustomPaginationViewMixin, CorpusACLMixin, ListAPIView ) ).values( 'id', - 'type__slug', + 'type_id', 'name', 'image_id', 'image__width', diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index c44257a7c3ca1a06356eac3a79397506407874fd..457bc9aa2159e6b06afb61b28cedf4e6ea6a8acd 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -138,10 +138,6 @@ class DataImport(IndexableModel): filters['name__contains'] = self.name_contains if self.element_type: filters['type_id'] = self.element_type_id - else: - # Limit the scope of types available to merge - # This prevent memory from exploding when no type is selected - filters['type__corpus_id'] = self.corpus_id return filters diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index 7e289c664620d5bdc9bb53ecd64baff5d6f4667b..1b7b9b279935957a887f97eea62da730cd30abed 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -462,13 +462,11 @@ class ProcessElementLightSerializer(serializers.ModelSerializer): """ Serialises an Element, using optimized query for ListProcessElement """ - type = serializers.CharField(source='type__slug') - class Meta: model = Element fields = ( 'id', - 'type', + 'type_id', 'name', ) read_only_fields = fields diff --git a/arkindex/dataimport/tests/test_process_elements.py b/arkindex/dataimport/tests/test_process_elements.py index b099ccea5db91fbf523f25d4a8eac15c496b7cc5..6aa4b6d8e72e80e0ebaf5bf4fb566761a31d1706 100644 --- a/arkindex/dataimport/tests/test_process_elements.py +++ b/arkindex/dataimport/tests/test_process_elements.py @@ -228,7 +228,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -251,7 +251,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -263,7 +263,12 @@ class TestProcessElements(FixtureAPITestCase): elements = [self.folder_1, self.folder_2] self.client.force_login(self.superuser) - with self.assertNumQueries(7): + with self.assertExactQueries('process_elements_filter_type.sql', skip=1, params={ + 'user_id': self.superuser.id, + 'dataimport_id': str(self.dataimport.id), + 'corpus_id': str(self.private_corpus.id), + 'type_id': str(self.folder_type.id), + }): response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() @@ -272,7 +277,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -295,7 +300,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -315,7 +320,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(self.page_1.id), - 'type': self.page_1.type.slug, + 'type_id': str(self.page_1.type_id), 'name': self.page_1.name } ]) @@ -333,12 +338,12 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(self.page_1.id), - 'type': self.page_1.type.slug, + 'type_id': str(self.page_1.type_id), 'name': self.page_1.name }, { 'id': str(self.folder_2.id), - 'type': self.folder_2.type.slug, + 'type_id': str(self.folder_2.type_id), 'name': self.folder_2.name } ]) @@ -359,7 +364,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -381,7 +386,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -403,7 +408,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -425,7 +430,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -446,7 +451,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -481,7 +486,7 @@ class TestProcessElements(FixtureAPITestCase): self.assertCountEqual(data["results"], [ { 'id': str(element.id), - 'type': element.type.slug, + 'type_id': str(element.type_id), 'name': element.name } for element in elements @@ -592,7 +597,11 @@ class TestProcessElements(FixtureAPITestCase): self.client.force_login(self.superuser) self.dataimport.load_children = True self.dataimport.save() - with self.assertNumQueries(6): + with self.assertExactQueries('process_elements_with_image.sql', skip=1, params={ + 'user_id': self.superuser.id, + 'dataimport_id': str(self.dataimport.id), + 'corpus_id': str(self.private_corpus.id), + }): response = self.client.get( reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}), {'with_image': True} @@ -603,7 +612,7 @@ class TestProcessElements(FixtureAPITestCase): { 'id': str(element.id), 'name': element.name, - 'type': element.type.slug, + 'type_id': str(element.type_id), 'image_id': str(element.image_id) if element.image_id else None, 'image_width': element.image.width if element.image else None, 'image_height': element.image.height if element.image else None, @@ -617,7 +626,11 @@ class TestProcessElements(FixtureAPITestCase): def test_corpus_top_level(self): self.client.force_login(self.superuser) - with self.assertNumQueries(6): + with self.assertExactQueries('process_elements_top_level.sql', skip=1, params={ + 'user_id': self.superuser.id, + 'dataimport_id': str(self.dataimport.id), + 'corpus_id': str(self.private_corpus.id), + }): response = self.client.get( reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}), ) @@ -625,11 +638,11 @@ class TestProcessElements(FixtureAPITestCase): { 'id': str(self.folder_1.id), 'name': 'Baba au rhum', - 'type': 'folder', + 'type_id': str(self.folder_1.type_id), }, { 'id': str(self.folder_2.id), 'name': 'Montgolfière', - 'type': 'folder', + 'type_id': str(self.folder_2.type_id), } ]) diff --git a/arkindex/sql_validation/process_elements_filter_type.sql b/arkindex/sql_validation/process_elements_filter_type.sql new file mode 100644 index 0000000000000000000000000000000000000000..74adacdd2e09f996c6605ab6ae56b3aa49a5f016 --- /dev/null +++ b/arkindex/sql_validation/process_elements_filter_type.sql @@ -0,0 +1,82 @@ +SELECT "users_user"."id", + "users_user"."password", + "users_user"."last_login", + "users_user"."email", + "users_user"."display_name", + "users_user"."transkribus_email", + "users_user"."is_active", + "users_user"."is_internal", + "users_user"."is_admin", + "users_user"."verified_email", + "users_user"."created", + "users_user"."updated" +FROM "users_user" +WHERE "users_user"."id" = {user_id} +LIMIT 21; + +SELECT "dataimport_dataimport"."id", + "dataimport_dataimport"."created", + "dataimport_dataimport"."updated", + "dataimport_dataimport"."name", + "dataimport_dataimport"."creator_id", + "dataimport_dataimport"."corpus_id", + "dataimport_dataimport"."mode", + "dataimport_dataimport"."revision_id", + "dataimport_dataimport"."workflow_id", + "dataimport_dataimport"."activity_state", + "dataimport_dataimport"."element_id", + "dataimport_dataimport"."folder_type_id", + "dataimport_dataimport"."element_type_id", + "dataimport_dataimport"."name_contains", + "dataimport_dataimport"."load_children", + "dataimport_dataimport"."collection_id", + "dataimport_dataimport"."build_entities", + "dataimport_dataimport"."use_cache", + "dataimport_dataimport"."use_gpu", + "dataimport_dataimport"."template_id" +FROM "dataimport_dataimport" +WHERE ("dataimport_dataimport"."corpus_id" IS NOT NULL + AND "dataimport_dataimport"."id" = '{dataimport_id}'::uuid) +LIMIT 21; + +SELECT "documents_corpus"."created", + "documents_corpus"."updated", + "documents_corpus"."id", + "documents_corpus"."name", + "documents_corpus"."description", + "documents_corpus"."repository_id", + "documents_corpus"."top_level_type_id", + "documents_corpus"."public", + "documents_corpus"."indexable", + "documents_corpus"."thumbnail_id" +FROM "documents_corpus" +WHERE "documents_corpus"."id" = '{corpus_id}'::uuid +LIMIT 21; + +SELECT (1) AS "a" +FROM "documents_element" +INNER JOIN "dataimport_dataimportelement" ON ("documents_element"."id" = "dataimport_dataimportelement"."element_id") +WHERE "dataimport_dataimportelement"."dataimport_id" = '{dataimport_id}'::uuid +LIMIT 1; + +SELECT "documents_elementtype"."id", + "documents_elementtype"."corpus_id", + "documents_elementtype"."slug", + "documents_elementtype"."display_name", + "documents_elementtype"."folder", + "documents_elementtype"."indexable" +FROM "documents_elementtype" +WHERE "documents_elementtype"."id" = '{type_id}'::uuid +LIMIT 21; + +SELECT "documents_element"."id", + "documents_element"."type_id", + "documents_element"."name" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_elementpath"."path" = (ARRAY[])::uuid[] + AND "documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."type_id" = '{type_id}'::uuid) +ORDER BY "documents_element"."id" ASC +LIMIT 21 diff --git a/arkindex/sql_validation/process_elements_top_level.sql b/arkindex/sql_validation/process_elements_top_level.sql new file mode 100644 index 0000000000000000000000000000000000000000..a390baeedec910c3598ed2c75f5110e617b21da2 --- /dev/null +++ b/arkindex/sql_validation/process_elements_top_level.sql @@ -0,0 +1,71 @@ +SELECT "users_user"."id", + "users_user"."password", + "users_user"."last_login", + "users_user"."email", + "users_user"."display_name", + "users_user"."transkribus_email", + "users_user"."is_active", + "users_user"."is_internal", + "users_user"."is_admin", + "users_user"."verified_email", + "users_user"."created", + "users_user"."updated" +FROM "users_user" +WHERE "users_user"."id" = {user_id} +LIMIT 21; + +SELECT "dataimport_dataimport"."id", + "dataimport_dataimport"."created", + "dataimport_dataimport"."updated", + "dataimport_dataimport"."name", + "dataimport_dataimport"."creator_id", + "dataimport_dataimport"."corpus_id", + "dataimport_dataimport"."mode", + "dataimport_dataimport"."revision_id", + "dataimport_dataimport"."workflow_id", + "dataimport_dataimport"."activity_state", + "dataimport_dataimport"."element_id", + "dataimport_dataimport"."folder_type_id", + "dataimport_dataimport"."element_type_id", + "dataimport_dataimport"."name_contains", + "dataimport_dataimport"."load_children", + "dataimport_dataimport"."collection_id", + "dataimport_dataimport"."build_entities", + "dataimport_dataimport"."use_cache", + "dataimport_dataimport"."use_gpu", + "dataimport_dataimport"."template_id" +FROM "dataimport_dataimport" +WHERE ("dataimport_dataimport"."corpus_id" IS NOT NULL + AND "dataimport_dataimport"."id" = '{dataimport_id}'::uuid) +LIMIT 21; + +SELECT "documents_corpus"."created", + "documents_corpus"."updated", + "documents_corpus"."id", + "documents_corpus"."name", + "documents_corpus"."description", + "documents_corpus"."repository_id", + "documents_corpus"."top_level_type_id", + "documents_corpus"."public", + "documents_corpus"."indexable", + "documents_corpus"."thumbnail_id" +FROM "documents_corpus" +WHERE "documents_corpus"."id" = '{corpus_id}'::uuid +LIMIT 21; + +SELECT (1) AS "a" +FROM "documents_element" +INNER JOIN "dataimport_dataimportelement" ON ("documents_element"."id" = "dataimport_dataimportelement"."element_id") +WHERE "dataimport_dataimportelement"."dataimport_id" = '{dataimport_id}'::uuid +LIMIT 1; + +SELECT "documents_element"."id", + "documents_element"."type_id", + "documents_element"."name" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_elementpath"."path" = (ARRAY[])::uuid[] + AND "documents_element"."corpus_id" = '{corpus_id}'::uuid) +ORDER BY "documents_element"."id" ASC +LIMIT 21 diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql new file mode 100644 index 0000000000000000000000000000000000000000..c4510483467549e1eaf1d4d4b4f8b6f36d295ba0 --- /dev/null +++ b/arkindex/sql_validation/process_elements_with_image.sql @@ -0,0 +1,78 @@ +SELECT "users_user"."id", + "users_user"."password", + "users_user"."last_login", + "users_user"."email", + "users_user"."display_name", + "users_user"."transkribus_email", + "users_user"."is_active", + "users_user"."is_internal", + "users_user"."is_admin", + "users_user"."verified_email", + "users_user"."created", + "users_user"."updated" +FROM "users_user" +WHERE "users_user"."id" = {user_id} +LIMIT 21; + +SELECT "dataimport_dataimport"."id", + "dataimport_dataimport"."created", + "dataimport_dataimport"."updated", + "dataimport_dataimport"."name", + "dataimport_dataimport"."creator_id", + "dataimport_dataimport"."corpus_id", + "dataimport_dataimport"."mode", + "dataimport_dataimport"."revision_id", + "dataimport_dataimport"."workflow_id", + "dataimport_dataimport"."activity_state", + "dataimport_dataimport"."element_id", + "dataimport_dataimport"."folder_type_id", + "dataimport_dataimport"."element_type_id", + "dataimport_dataimport"."name_contains", + "dataimport_dataimport"."load_children", + "dataimport_dataimport"."collection_id", + "dataimport_dataimport"."build_entities", + "dataimport_dataimport"."use_cache", + "dataimport_dataimport"."use_gpu", + "dataimport_dataimport"."template_id" +FROM "dataimport_dataimport" +WHERE ("dataimport_dataimport"."corpus_id" IS NOT NULL + AND "dataimport_dataimport"."id" = '{dataimport_id}'::uuid) +LIMIT 21; + +SELECT "documents_corpus"."created", + "documents_corpus"."updated", + "documents_corpus"."id", + "documents_corpus"."name", + "documents_corpus"."description", + "documents_corpus"."repository_id", + "documents_corpus"."top_level_type_id", + "documents_corpus"."public", + "documents_corpus"."indexable", + "documents_corpus"."thumbnail_id" +FROM "documents_corpus" +WHERE "documents_corpus"."id" = '{corpus_id}'::uuid +LIMIT 21; + +SELECT (1) AS "a" +FROM "documents_element" +INNER JOIN "dataimport_dataimportelement" ON ("documents_element"."id" = "dataimport_dataimportelement"."element_id") +WHERE "dataimport_dataimportelement"."dataimport_id" = '{dataimport_id}'::uuid +LIMIT 1; + +SELECT "documents_element"."id", + "documents_element"."type_id", + "documents_element"."name", + "documents_element"."image_id", + "images_image"."width", + "images_image"."height", + "documents_element"."polygon"::bytea, + "documents_element"."rotation_angle", + "documents_element"."mirrored", + NULLIF(CONCAT(RTRIM("images_imageserver"."url", '/'), CONCAT('/', ("images_image"."path")::varchar)), '/') AS "image_url" +FROM "documents_element" +LEFT OUTER JOIN "images_image" ON ("documents_element"."image_id" = "images_image"."id") +LEFT OUTER JOIN "images_imageserver" ON ("images_image"."server_id" = "images_imageserver"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."corpus_id" = '{corpus_id}'::uuid) +ORDER BY "documents_element"."id" ASC +LIMIT 21