diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 3ace9a9f8f79f49404cd516fb60f28fe8df4579b..61b30042dcbaf657cf8ce4a8fc6655561925b783 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -1263,14 +1263,6 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView): get=extend_schema( operation_id='ListElementNeighbors', tags=['elements'], - parameters=[ - OpenApiParameter( - 'n', - type={'type': 'integer', 'minimum': 1, 'maximum': 10}, - description='Number of neighbors to retrieve around the element', - required=False, - ) - ], ) ) class ElementNeighbors(ACLMixin, ListAPIView): @@ -1284,16 +1276,9 @@ class ElementNeighbors(ACLMixin, ListAPIView): queryset = Element.objects.none() def get_queryset(self): - n = self.request.query_params.get('n', 1) - try: - n = int(n) - except (TypeError, ValueError): - raise ValidationError({'n': 'Should be an integer between 1 and 10'}) - if not 1 <= n <= 10: - raise ValidationError({'n': 'Should be an integer between 1 and 10'}) element = get_object_or_404( - Element.objects.select_related('corpus'), + Element.objects.select_related('corpus').only('id', 'corpus__public'), id=self.kwargs['pk'] ) @@ -1301,7 +1286,7 @@ class ElementNeighbors(ACLMixin, ListAPIView): if not self.has_access(element.corpus, Role.Guest.value): raise PermissionDenied(detail='You do not have a read access to this element.') - return Element.objects.get_neighbors(element, n) + return Element.objects.get_neighbors(element) @extend_schema(tags=['elements'], request=None) diff --git a/arkindex/documents/managers.py b/arkindex/documents/managers.py index 7dae21c3e3d1ac0d4f02e499c1abe8a15a83bed9..7cedd13bd45f449eb4e480187fe2cf206cdec15d 100644 --- a/arkindex/documents/managers.py +++ b/arkindex/documents/managers.py @@ -138,59 +138,102 @@ class ElementManager(models.Manager): """ return self.filter(paths__path__contains=[parent_id], **filters).order_by('paths__ordering').distinct() - def get_neighbor_paths(self, element, n=1): + def get_neighbor_paths(self, element): """ - Get n neighbor paths: current, previous and next element paths + Get previous and next element IDs for each path related to this element. This uses a single raw SQL query to use row numbers, instead of the ordering numeric values, to avoid issues with duplicated or missing orderings. - * The `neighbors` subquery lists all paths for an element, and then list all possible neighbors - with the same parents. An ordering consistent with rows numbers is added as `position` - * The main query combines the `neighbors` subquery with itself to have a full product, and only retains - the combinations with a distance under the requested limit, with elements of the same type - * The output is ordered by parent path, then by position (row numbers) + Performance tests are available at https://gitlab.com/teklia/arkindex/backend/-/issues/1011 + + * The elt_path subquery lists all paths for the current element. + * The main query lists all neighbors for each path, retrieve the previous and next elements using + a window function. + * Paths are filtered depending on element corpus. That step requires another join, but allows top elements + to be positioned correctly as their paths are similar among each corpus (i.e. empty). + * The output is ordered by parent path, by ordering, then name and id for top elements. """ from arkindex.documents.models import ElementPath return ElementPath.objects.raw( """ - with neighbors as ( - select link.*, row_number() over (partition by p.id order by link.ordering) as position - from documents_elementpath p - inner join documents_elementpath link on (p.path=link.path) - inner join documents_element as e on (e.id=link.element_id and e.type_id = %(element_type)s) - where p.element_id = %(element_id)s + with elt_path as ( + select path + from documents_elementpath + where element_id = %(element_id)s + ), neighbors as ( + select + neighbor.id, + neighbor.path, + neighbor.ordering, + neighbor.element_id, + lag(element_id) OVER (partition BY elt_path.path order by neighbor.ordering) as previous, + lead(element_id) OVER (partition BY elt_path.path order by neighbor.ordering) as next, + lag(ordering) OVER (partition BY elt_path.path order by neighbor.ordering) as previous_ord, + lead(ordering) OVER (partition BY elt_path.path order by neighbor.ordering) as next_ord + from + documents_elementpath as neighbor + inner join elt_path + on neighbor.path = elt_path.path + inner join documents_element + on documents_element.id = neighbor.element_id + where + documents_element.corpus_id = %(element_corpus)s + order by path, ordering, name, id ) - - select distinct n.* - from neighbors as n - inner join neighbors as m on (m.element_id = %(element_id)s and m.path = n.path) - where abs(n.position - m.position) <= %(n)s - order by n.path, n.position, n.element_id + select * + from neighbors + where element_id = %(element_id)s """, params={ 'element_id': element.id, - 'element_type': element.type_id, - 'n': n, - } + 'element_corpus': element.corpus_id, + }, ) - def get_neighbors(self, element, n=1): + def get_neighbors(self, element): """ Returns a list of neighboring ElementPaths for an element, with a prefetched `element` attribute and a list of prefetched parent elements in the `parents` attribute. """ - paths = list(self.get_neighbor_paths(element, n)) - models.prefetch_related_objects(paths, 'element__type') - # Build a set of all IDs to load from all paths, then load them into a dict + paths = list(self.get_neighbor_paths(element)) + + # Build a set of all IDs to load related elements (neighbors, parents) then load them into a dict + related_elt_ids = set(chain( + (element.id,), + *((path.previous, path.next) for path in paths), + *(path.path for path in paths), + )) elements = { elt.id: elt - for elt in self.filter(id__in=set(chain(*(p.path for p in paths)))).select_related('type') + for elt in ( + self.filter(id__in=filter(None, related_elt_ids)) + .select_related('type') + .only('id', 'type__slug', 'name') + ) } - # Dispatch the elements by ID to each path + + # Generate an output corresponding to endpoint expectations (compatibility purpose) + output = [] for path in paths: - path.parents = list(map(elements.get, path.path)) - return paths + if path.previous: + output.append({ + 'ordering': path.previous_ord, + 'element': elements.get(path.previous), + 'parents': list(map(elements.get, path.path)), + }) + output.append({ + 'ordering': path.ordering, + 'element': elements.get(element.id), + 'parents': list(map(elements.get, path.path)), + }) + if path.next: + output.append({ + 'ordering': path.next_ord, + 'element': elements.get(path.next), + 'parents': list(map(elements.get, path.path)), + }) + return output class CorpusManager(models.Manager): diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index e125a4f39527403fc42ac0d57c97b1c60da84a63..5b4168afa43495adaa3f64c7523a0050e26feaa3 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -674,6 +674,7 @@ class ElementSerializer(ElementSlimSerializer): class ElementNeighborsSerializer(serializers.Serializer): + # position attribute is left for compatibility but represents the real path ordering position = serializers.IntegerField(source='ordering') element = ElementLightSerializer() parents = serializers.ListField( diff --git a/arkindex/documents/tests/test_element_manager.py b/arkindex/documents/tests/test_element_manager.py index 3c27ed533a5fd409278131864dae060bd8b9af48..2564de889501575b26ff191addb88c8a13d9659e 100644 --- a/arkindex/documents/tests/test_element_manager.py +++ b/arkindex/documents/tests/test_element_manager.py @@ -39,9 +39,9 @@ class TestElementManager(FixtureTestCase): self.assertCountEqual(ids, []) def test_get_neighbors(self): - with self.assertNumQueries(4): - self.assertEqual(len(Element.objects.get_neighbors(self.p1, 0)), 1) - with self.assertNumQueries(4): + with self.assertNumQueries(2): + self.assertEqual(len(Element.objects.get_neighbors(self.p1)), 2) + with self.assertNumQueries(2): self.assertEqual(len(Element.objects.get_neighbors(self.p2)), 3) - with self.assertNumQueries(4): - self.assertEqual(len(Element.objects.get_neighbors(self.p3, 5)), 3) + with self.assertNumQueries(2): + self.assertEqual(len(Element.objects.get_neighbors(self.p3)), 2) diff --git a/arkindex/documents/tests/test_neighbors.py b/arkindex/documents/tests/test_neighbors.py index 7ff6730b21fafedef3df465dee8dd0d263bc47e9..89ae801d9af786b78e0a1ff2ba389ea249817965 100644 --- a/arkindex/documents/tests/test_neighbors.py +++ b/arkindex/documents/tests/test_neighbors.py @@ -38,8 +38,13 @@ class TestElementNeighbors(FixtureAPITestCase): self.assertDictEqual(response.json(), {'detail': 'You do not have a read access to this element.'}) def test_element_neighbors(self): - """ + r""" A non authenticated user is able to list neighbors of a public element + Z + | + X Y + /|X \ + B A C D """ elements = build_tree( { @@ -47,31 +52,11 @@ class TestElementNeighbors(FixtureAPITestCase): 'B': 'X', 'A': ['X', 'Y'], 'C': 'X', + 'D': 'Y', }, corpus=self.corpus, type=self.volume_type, ) - # Neighbors of A with a different type - element = Element.objects.create( - corpus=self.corpus, - type=self.page_type, - name='D', - ) - element.add_parent(elements['Y']) - element = Element.objects.create( - corpus=self.corpus, - type=self.page_type, - name='E', - ) - element.add_parent(elements['Y']) - - # Neighbors of A with the same type - element = Element.objects.create( - corpus=self.corpus, - type=self.volume_type, - name='F', - ) - element.add_parent(elements['Y']) response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['A'].id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -79,6 +64,7 @@ class TestElementNeighbors(FixtureAPITestCase): self.assertListEqual( results, sorted([{ + # 2 neighbors via X (B, A, C) A has position 1 'position': 0, 'element': { 'id': str(elements['B'].id), @@ -115,6 +101,7 @@ class TestElementNeighbors(FixtureAPITestCase): 'name': elements['X'].name }] }, { + # 1 neighbors via Y (A, D) A has position 0 'position': 0, 'element': { 'id': str(elements['A'].id), @@ -131,11 +118,11 @@ class TestElementNeighbors(FixtureAPITestCase): 'name': elements['Y'].name }] }, { - 'position': 3, + 'position': 1, 'element': { - 'id': str(element.id), + 'id': str(elements['D'].id), 'type': 'volume', - 'name': element.name + 'name': elements['D'].name }, 'parents': [{ 'id': str(elements['Z'].id), @@ -151,63 +138,6 @@ class TestElementNeighbors(FixtureAPITestCase): key=lambda path: ''.join([p['id'] for p in path['parents']])) ) - def test_n_element_neighbors(self): - elements = build_tree( - { - 'A': 'W', - 'B': 'W', - 'C': 'W', - }, - corpus=self.corpus, - type=self.volume_type, - ) - - response = self.client.get( - reverse('api:elements-neighbors', kwargs={'pk': str(elements['A'].id)}), - data={'n': 2}) - self.assertEqual(response.status_code, status.HTTP_200_OK) - results = response.json()['results'] - self.assertListEqual( - results, - [{ - 'position': 0, - 'element': { - 'id': str(elements['A'].id), - 'type': 'volume', - 'name': elements['A'].name - }, - 'parents': [{ - 'id': str(elements['W'].id), - 'type': 'volume', - 'name': elements['W'].name - }] - }, { - 'position': 1, - 'element': { - 'id': str(elements['B'].id), - 'type': 'volume', - 'name': 'B' - }, - 'parents': [{ - 'id': str(elements['W'].id), - 'type': 'volume', - 'name': elements['W'].name - }] - }, { - 'position': 2, - 'element': { - 'id': str(elements['C'].id), - 'type': 'volume', - 'name': elements['C'].name - }, - 'parents': [{ - 'id': str(elements['W'].id), - 'type': 'volume', - 'name': elements['W'].name - }] - }] - ) - def test_element_neighbors_missing_ordering(self): """ Check ElementNeighbors handles non-sequential orderings @@ -278,15 +208,6 @@ class TestElementNeighbors(FixtureAPITestCase): }] ) - def test_neighbors_n_limit(self): - bad_values = ['lol', 'nan', 'inf', '-inf', 42, 0, -7] - for value in bad_values: - response = self.client.get( - reverse('api:elements-neighbors', kwargs={'pk': str(self.corpus.elements.first().id)}), - data={'n': value}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - def test_sub_neighbors(self): r""" Ensure the children of some elements of the same type do not interfere with the neighboring elements