From f381702cc59f2127972fd8bb7a42ced035ca0566 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Tue, 18 May 2021 12:39:03 +0000 Subject: [PATCH] Optimize ListCorpusWorkerVersions --- arkindex/dataimport/api.py | 34 +++++-- arkindex/dataimport/tests/test_workers.py | 114 +++++++++++++++------- 2 files changed, 105 insertions(+), 43 deletions(-) diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index b8d053f61b..edbeed5ced 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -80,6 +80,7 @@ from arkindex.project.mixins import ( SelectionMixin, WorkerACLMixin, ) +from arkindex.project.pagination import CustomCursorPagination from arkindex.project.permissions import IsInternalUser, IsVerified, IsVerifiedOrReadOnly from arkindex.project.tools import RTrimChr from arkindex.users.models import OAuthCredentials, Role @@ -907,18 +908,27 @@ class WorkerVersionList(WorkerACLMixin, ListCreateAPIView): @extend_schema_view( get=extend_schema( - tags=['ml'], operation_id='ListCorpusWorkerVersions', + tags=['ml'], description=( 'List worker versions used by elements of a given corpus.\n\n' 'No check is performed on workers access level in order to allow any user to see versions.' ), + parameters=[ + OpenApiParameter( + 'with_element_count', + type=bool, + default=False, + description='Include element counts in the response.', + ) + ], ) ) class CorpusWorkerVersionList(CorpusACLMixin, ListAPIView): """ List worker versions used by elements of a given corpus. """ + pagination_class = CustomCursorPagination permission_classes = (IsVerifiedOrReadOnly, ) serializer_class = WorkerVersionSerializer # For OpenAPI type discovery @@ -929,12 +939,24 @@ class CorpusWorkerVersionList(CorpusACLMixin, ListAPIView): def get_queryset(self): corpus = self.get_corpus() - return WorkerVersion.objects \ + queryset = WorkerVersion.objects \ .filter(elements__corpus_id=corpus.id) \ - .select_related('revision__repo', 'worker', 'worker__repository') \ - .prefetch_related('revision__refs', 'revision__versions') \ - .order_by('-revision__created') \ - .annotate(element_count=Count('id')) + .prefetch_related( + 'revision__repo', + 'revision__refs', + 'revision__versions', + 'worker__repository', + ) \ + .order_by('-id') + + if self.request.query_params.get('with_element_count', '').lower() in ('true', '1'): + queryset = queryset.annotate(element_count=Count('id')) + else: + # The Count() causes Django to add a GROUP BY, and without a count we need a DISTINCT + # because filtering on `elements` causes worker versions to be duplicated. + queryset = queryset.distinct() + + return queryset @extend_schema(tags=['repos']) diff --git a/arkindex/dataimport/tests/test_workers.py b/arkindex/dataimport/tests/test_workers.py index abc232f04d..8a04e4b11c 100644 --- a/arkindex/dataimport/tests/test_workers.py +++ b/arkindex/dataimport/tests/test_workers.py @@ -842,45 +842,49 @@ class TestWorkersWorkerVersions(FixtureAPITestCase): self.assertEqual(self.version_1.docker_command, 'mysupercommand') self.version_1.configuration = {"test": "test1"} - def _assert_corpus_worker_version_list(self, response): - self.assertDictEqual(response.json(), { - 'count': 1, - 'next': None, - 'number': 1, - 'previous': None, - 'results': [{ - 'id': str(self.version_1.id), - 'configuration': {'test': 42}, - 'revision': { - 'id': str(self.version_1.revision_id), - 'hash': '1337', - 'author': 'Test user', - 'message': 'My w0rk3r', - 'created': '2020-02-02T01:23:45.678000Z', - 'commit_url': 'http://my_repo.fake/workers/worker/commit/1337', - 'refs': [] - }, - 'docker_image': str(self.version_1.docker_image_id), - 'docker_image_iid': None, - 'docker_image_name': self.version_1.docker_image_name, - 'state': 'available', - 'element_count': 9, - 'worker': { - 'id': str(self.worker_1.id), - 'name': self.worker_1.name, - 'type': self.worker_1.type, - 'slug': self.worker_1.slug, - } - }] - }) + def _serialize_worker_version(self, version, element_count=False): + data = { + 'id': str(version.id), + 'configuration': {'test': 42}, + 'revision': { + 'id': str(version.revision_id), + 'hash': '1337', + 'author': 'Test user', + 'message': 'My w0rk3r', + 'created': '2020-02-02T01:23:45.678000Z', + 'commit_url': 'http://my_repo.fake/workers/worker/commit/1337', + 'refs': [] + }, + 'docker_image': str(version.docker_image_id), + 'docker_image_iid': None, + 'docker_image_name': version.docker_image_name, + 'state': 'available', + 'worker': { + 'id': str(version.worker.id), + 'name': version.worker.name, + 'type': version.worker.type, + 'slug': version.worker.slug, + } + } + if element_count: + data['element_count'] = version.elements.filter(corpus=self.corpus).count() + return data def test_corpus_worker_version_no_login(self): self.corpus.elements.filter(type__slug='word').update(worker_version=self.version_1) - with self.assertNumQueries(7): + with self.assertNumQueries(8): response = self.client.get(reverse('api:corpus-versions', kwargs={'pk': self.corpus.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) - self._assert_corpus_worker_version_list(response) + + self.assertDictEqual(response.json(), { + 'count': None, + 'previous': None, + 'next': None, + 'results': [ + self._serialize_worker_version(self.version_1) + ] + }) def test_corpus_worker_version_not_verified(self): self.user.verified_email = False @@ -888,16 +892,52 @@ class TestWorkersWorkerVersions(FixtureAPITestCase): self.client.force_login(self.user) self.corpus.elements.filter(type__slug='word').update(worker_version=self.version_1) - with self.assertNumQueries(11): + with self.assertNumQueries(12): response = self.client.get(reverse('api:corpus-versions', kwargs={'pk': self.corpus.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) - self._assert_corpus_worker_version_list(response) + + self.assertDictEqual(response.json(), { + 'count': None, + 'previous': None, + 'next': None, + 'results': [ + self._serialize_worker_version(self.version_1) + ] + }) def test_corpus_worker_version_list(self): self.client.force_login(self.user) self.corpus.elements.filter(type__slug='word').update(worker_version=self.version_1) - with self.assertNumQueries(11): + with self.assertNumQueries(12): response = self.client.get(reverse('api:corpus-versions', kwargs={'pk': self.corpus.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) - self._assert_corpus_worker_version_list(response) + + self.assertDictEqual(response.json(), { + 'count': None, + 'previous': None, + 'next': None, + 'results': [ + self._serialize_worker_version(self.version_1) + ] + }) + + def test_corpus_worker_version_list_with_element_count(self): + self.client.force_login(self.user) + self.corpus.elements.filter(type__slug='word').update(worker_version=self.version_1) + + with self.assertNumQueries(12): + response = self.client.get( + reverse('api:corpus-versions', kwargs={'pk': self.corpus.id}), + {'with_element_count': 'true'} + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + self.assertDictEqual(response.json(), { + 'count': None, + 'previous': None, + 'next': None, + 'results': [ + self._serialize_worker_version(self.version_1, element_count=True) + ] + }) -- GitLab