Skip to content
Snippets Groups Projects
Commit b52e1ddb authored by Valentin Rigal's avatar Valentin Rigal Committed by Erwan Rouchet
Browse files

Improve ListElementNeighbors performances

parent 50de85c8
No related branches found
No related tags found
1 merge request!2038Improve ListElementNeighbors performances
......@@ -1263,14 +1263,6 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView):
get=extend_schema(
operation_id='ListElementNeighbors',
tags=['elements'],
parameters=[
OpenApiParameter(
'n',
type={'type': 'integer', 'minimum': 1, 'maximum': 10},
description='Number of neighbors to retrieve around the element',
required=False,
)
],
)
)
class ElementNeighbors(ACLMixin, ListAPIView):
......@@ -1284,16 +1276,9 @@ class ElementNeighbors(ACLMixin, ListAPIView):
queryset = Element.objects.none()
def get_queryset(self):
n = self.request.query_params.get('n', 1)
try:
n = int(n)
except (TypeError, ValueError):
raise ValidationError({'n': 'Should be an integer between 1 and 10'})
if not 1 <= n <= 10:
raise ValidationError({'n': 'Should be an integer between 1 and 10'})
element = get_object_or_404(
Element.objects.select_related('corpus'),
Element.objects.select_related('corpus').only('id', 'corpus__public'),
id=self.kwargs['pk']
)
......@@ -1301,7 +1286,7 @@ class ElementNeighbors(ACLMixin, ListAPIView):
if not self.has_access(element.corpus, Role.Guest.value):
raise PermissionDenied(detail='You do not have a read access to this element.')
return Element.objects.get_neighbors(element, n)
return Element.objects.get_neighbors(element)
@extend_schema(tags=['elements'], request=None)
......
......@@ -138,59 +138,102 @@ class ElementManager(models.Manager):
"""
return self.filter(paths__path__contains=[parent_id], **filters).order_by('paths__ordering').distinct()
def get_neighbor_paths(self, element, n=1):
def get_neighbor_paths(self, element):
"""
Get n neighbor paths: current, previous and next element paths
Get previous and next element IDs for each path related to this element.
This uses a single raw SQL query to use row numbers, instead of the ordering numeric values,
to avoid issues with duplicated or missing orderings.
* The `neighbors` subquery lists all paths for an element, and then list all possible neighbors
with the same parents. An ordering consistent with rows numbers is added as `position`
* The main query combines the `neighbors` subquery with itself to have a full product, and only retains
the combinations with a distance under the requested limit, with elements of the same type
* The output is ordered by parent path, then by position (row numbers)
Performance tests are available at https://gitlab.com/teklia/arkindex/backend/-/issues/1011
* The elt_path subquery lists all paths for the current element.
* The main query lists all neighbors for each path, retrieve the previous and next elements using
a window function.
* Paths are filtered depending on element corpus. That step requires another join, but allows top elements
to be positioned correctly as their paths are similar among each corpus (i.e. empty).
* The output is ordered by parent path, by ordering, then name and id for top elements.
"""
from arkindex.documents.models import ElementPath
return ElementPath.objects.raw(
"""
with neighbors as (
select link.*, row_number() over (partition by p.id order by link.ordering) as position
from documents_elementpath p
inner join documents_elementpath link on (p.path=link.path)
inner join documents_element as e on (e.id=link.element_id and e.type_id = %(element_type)s)
where p.element_id = %(element_id)s
with elt_path as (
select path
from documents_elementpath
where element_id = %(element_id)s
), neighbors as (
select
neighbor.id,
neighbor.path,
neighbor.ordering,
neighbor.element_id,
lag(element_id) OVER (partition BY elt_path.path order by neighbor.ordering) as previous,
lead(element_id) OVER (partition BY elt_path.path order by neighbor.ordering) as next,
lag(ordering) OVER (partition BY elt_path.path order by neighbor.ordering) as previous_ord,
lead(ordering) OVER (partition BY elt_path.path order by neighbor.ordering) as next_ord
from
documents_elementpath as neighbor
inner join elt_path
on neighbor.path = elt_path.path
inner join documents_element
on documents_element.id = neighbor.element_id
where
documents_element.corpus_id = %(element_corpus)s
order by path, ordering, name, id
)
select distinct n.*
from neighbors as n
inner join neighbors as m on (m.element_id = %(element_id)s and m.path = n.path)
where abs(n.position - m.position) <= %(n)s
order by n.path, n.position, n.element_id
select *
from neighbors
where element_id = %(element_id)s
""",
params={
'element_id': element.id,
'element_type': element.type_id,
'n': n,
}
'element_corpus': element.corpus_id,
},
)
def get_neighbors(self, element, n=1):
def get_neighbors(self, element):
"""
Returns a list of neighboring ElementPaths for an element, with a prefetched `element` attribute
and a list of prefetched parent elements in the `parents` attribute.
"""
paths = list(self.get_neighbor_paths(element, n))
models.prefetch_related_objects(paths, 'element__type')
# Build a set of all IDs to load from all paths, then load them into a dict
paths = list(self.get_neighbor_paths(element))
# Build a set of all IDs to load related elements (neighbors, parents) then load them into a dict
related_elt_ids = set(chain(
(element.id,),
*((path.previous, path.next) for path in paths),
*(path.path for path in paths),
))
elements = {
elt.id: elt
for elt in self.filter(id__in=set(chain(*(p.path for p in paths)))).select_related('type')
for elt in (
self.filter(id__in=filter(None, related_elt_ids))
.select_related('type')
.only('id', 'type__slug', 'name')
)
}
# Dispatch the elements by ID to each path
# Generate an output corresponding to endpoint expectations (compatibility purpose)
output = []
for path in paths:
path.parents = list(map(elements.get, path.path))
return paths
if path.previous:
output.append({
'ordering': path.previous_ord,
'element': elements.get(path.previous),
'parents': list(map(elements.get, path.path)),
})
output.append({
'ordering': path.ordering,
'element': elements.get(element.id),
'parents': list(map(elements.get, path.path)),
})
if path.next:
output.append({
'ordering': path.next_ord,
'element': elements.get(path.next),
'parents': list(map(elements.get, path.path)),
})
return output
class CorpusManager(models.Manager):
......
......@@ -674,6 +674,7 @@ class ElementSerializer(ElementSlimSerializer):
class ElementNeighborsSerializer(serializers.Serializer):
# position attribute is left for compatibility but represents the real path ordering
position = serializers.IntegerField(source='ordering')
element = ElementLightSerializer()
parents = serializers.ListField(
......
......@@ -39,9 +39,9 @@ class TestElementManager(FixtureTestCase):
self.assertCountEqual(ids, [])
def test_get_neighbors(self):
with self.assertNumQueries(4):
self.assertEqual(len(Element.objects.get_neighbors(self.p1, 0)), 1)
with self.assertNumQueries(4):
with self.assertNumQueries(2):
self.assertEqual(len(Element.objects.get_neighbors(self.p1)), 2)
with self.assertNumQueries(2):
self.assertEqual(len(Element.objects.get_neighbors(self.p2)), 3)
with self.assertNumQueries(4):
self.assertEqual(len(Element.objects.get_neighbors(self.p3, 5)), 3)
with self.assertNumQueries(2):
self.assertEqual(len(Element.objects.get_neighbors(self.p3)), 2)
......@@ -38,8 +38,13 @@ class TestElementNeighbors(FixtureAPITestCase):
self.assertDictEqual(response.json(), {'detail': 'You do not have a read access to this element.'})
def test_element_neighbors(self):
"""
r"""
A non authenticated user is able to list neighbors of a public element
Z
|
X Y
/|X \
B A C D
"""
elements = build_tree(
{
......@@ -47,31 +52,11 @@ class TestElementNeighbors(FixtureAPITestCase):
'B': 'X',
'A': ['X', 'Y'],
'C': 'X',
'D': 'Y',
},
corpus=self.corpus,
type=self.volume_type,
)
# Neighbors of A with a different type
element = Element.objects.create(
corpus=self.corpus,
type=self.page_type,
name='D',
)
element.add_parent(elements['Y'])
element = Element.objects.create(
corpus=self.corpus,
type=self.page_type,
name='E',
)
element.add_parent(elements['Y'])
# Neighbors of A with the same type
element = Element.objects.create(
corpus=self.corpus,
type=self.volume_type,
name='F',
)
element.add_parent(elements['Y'])
response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['A'].id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
......@@ -79,6 +64,7 @@ class TestElementNeighbors(FixtureAPITestCase):
self.assertListEqual(
results,
sorted([{
# 2 neighbors via X (B, A, C) A has position 1
'position': 0,
'element': {
'id': str(elements['B'].id),
......@@ -115,6 +101,7 @@ class TestElementNeighbors(FixtureAPITestCase):
'name': elements['X'].name
}]
}, {
# 1 neighbors via Y (A, D) A has position 0
'position': 0,
'element': {
'id': str(elements['A'].id),
......@@ -131,11 +118,11 @@ class TestElementNeighbors(FixtureAPITestCase):
'name': elements['Y'].name
}]
}, {
'position': 3,
'position': 1,
'element': {
'id': str(element.id),
'id': str(elements['D'].id),
'type': 'volume',
'name': element.name
'name': elements['D'].name
},
'parents': [{
'id': str(elements['Z'].id),
......@@ -151,63 +138,6 @@ class TestElementNeighbors(FixtureAPITestCase):
key=lambda path: ''.join([p['id'] for p in path['parents']]))
)
def test_n_element_neighbors(self):
elements = build_tree(
{
'A': 'W',
'B': 'W',
'C': 'W',
},
corpus=self.corpus,
type=self.volume_type,
)
response = self.client.get(
reverse('api:elements-neighbors', kwargs={'pk': str(elements['A'].id)}),
data={'n': 2})
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.json()['results']
self.assertListEqual(
results,
[{
'position': 0,
'element': {
'id': str(elements['A'].id),
'type': 'volume',
'name': elements['A'].name
},
'parents': [{
'id': str(elements['W'].id),
'type': 'volume',
'name': elements['W'].name
}]
}, {
'position': 1,
'element': {
'id': str(elements['B'].id),
'type': 'volume',
'name': 'B'
},
'parents': [{
'id': str(elements['W'].id),
'type': 'volume',
'name': elements['W'].name
}]
}, {
'position': 2,
'element': {
'id': str(elements['C'].id),
'type': 'volume',
'name': elements['C'].name
},
'parents': [{
'id': str(elements['W'].id),
'type': 'volume',
'name': elements['W'].name
}]
}]
)
def test_element_neighbors_missing_ordering(self):
"""
Check ElementNeighbors handles non-sequential orderings
......@@ -278,15 +208,6 @@ class TestElementNeighbors(FixtureAPITestCase):
}]
)
def test_neighbors_n_limit(self):
bad_values = ['lol', 'nan', 'inf', '-inf', 42, 0, -7]
for value in bad_values:
response = self.client.get(
reverse('api:elements-neighbors', kwargs={'pk': str(self.corpus.elements.first().id)}),
data={'n': value},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_sub_neighbors(self):
r"""
Ensure the children of some elements of the same type do not interfere with the neighboring elements
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment