From dcf8aba6af5c5dfb624ca078e2016802d3f3fc0d Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 6 Jan 2023 08:30:27 +0000 Subject: [PATCH] Add polygon centroid sorting --- arkindex/documents/api/elements.py | 112 +++++++++++--- .../tests/test_element_list_centroid.py | 137 ++++++++++++++++++ arkindex/project/gis.py | 43 +++++- 3 files changed, 270 insertions(+), 22 deletions(-) create mode 100644 arkindex/documents/tests/test_element_list_centroid.py diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 9a1ec1396d..db3699ce0e 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -8,7 +8,18 @@ from uuid import UUID from django.conf import settings from django.core.exceptions import ValidationError as DjangoValidationError from django.db import connection, transaction -from django.db.models import CharField, Count, FloatField, Max, Prefetch, Q, QuerySet, Value, prefetch_related_objects +from django.db.models import ( + CharField, + Count, + F, + FloatField, + Max, + Prefetch, + Q, + QuerySet, + Value, + prefetch_related_objects, +) from django.db.models.functions import Cast from django.shortcuts import get_object_or_404 from django.utils.functional import cached_property @@ -43,6 +54,7 @@ from arkindex.documents.models import ( MetaType, MLClass, Selection, + TextOrientation, Transcription, ) from arkindex.documents.serializers.elements import ( @@ -152,7 +164,38 @@ METADATA_OPERATORS = { # Operators that do not require the metadata value to be a number METADATA_STRING_OPERATORS = {'eq', 'contains'} -ELEMENT_ORDERING_FIELDS = {'name', 'created', 'random'} +# Ordering options available on element lists. +# The keys are the values that the user can set in `&order=…` +# and the values are tuples of fields sent as the arguments to QuerySet.order_by() for an ascending order. +ELEMENT_ORDERINGS = { + 'name': ('name', 'id'), + 'created': ('created', 'id'), + 'random': ('?', ), + # In PostgreSQL, ASC orderings implicitly use NULLS LAST, and DESC orderings use NULLS FIRST. + # Since we have to use a descending order for right-to-left orderings, nulls end up coming first. + # To stay consistent in what looks like an ascending order for the user, we explicitly set NULLS LAST + # using F() expressions. + TextOrientation.HorizontalLeftToRight.value: ( + F('polygon__centroid__y').asc(nulls_last=True), + F('polygon__centroid__x').asc(nulls_last=True), + 'id', + ), + TextOrientation.HorizontalRightToLeft.value: ( + F('polygon__centroid__y').asc(nulls_last=True), + F('polygon__centroid__x').desc(nulls_last=True), + 'id', + ), + TextOrientation.VerticalLeftToRight.value: ( + F('polygon__centroid__x').asc(nulls_last=True), + F('polygon__centroid__y').asc(nulls_last=True), + 'id', + ), + TextOrientation.VerticalRightToLeft.value: ( + F('polygon__centroid__x').desc(nulls_last=True), + F('polygon__centroid__y').asc(nulls_last=True), + 'id', + ), +} class ElementsListAutoSchema(AutoSchema): @@ -354,10 +397,34 @@ class ElementsListAutoSchema(AutoSchema): ), OpenApiParameter( 'order', - description='Sort elements by a specific field, or sort randomly using `random`. ' - 'Random sorting is stateless: an element can be returned multiple times or not at all ' - 'in different pages of results, and requesting the same page twice will return different results.', - enum=ELEMENT_ORDERING_FIELDS, + description=dedent(""" + Defines how the elements should be sorted: + + * `name` sorts the elements by name alphabetically. Elements with duplicate names are then sorted by UUID. + * `created` sorts the elements by creation date. Elements with duplicate creation dates are then sorted by UUID. + * `random` sorts the elements randomly. + + Random sorting is stateless: an element can be returned multiple times or not at all + in different pages of results, and requesting the same page twice will return different results. + The `order_direction` parameter will be ignored. + + The following options allow sorting by the centroid of the element's polygon: + + * `horizontal-lr` sorts from top to bottom, then left to right, which matches the text reading order of Western languages. + * `horizontal-rl` sorts from top to bottom, then right to left. + * `vertical-lr` sorts from left to right, then top to bottom. + * `vertical-rl` sorts from right to left, then top to bottom. + + For each of those options, elements with duplicate centroids are then sorted by UUID. + Elements without a polygon will be sorted last, by UUID. + + The `order_direction` parameter is supported on centroid sorts. + For example, when it is set to `desc`, `horizontal-lr` will sort from bottom to top, then right to left. + Elements without a polygon will come first, sorted by descending UUID. + + The `rotation_angle` and `mirrored` attributes do not affect the centroid ordering. + """).strip(), + enum=ELEMENT_ORDERINGS.keys(), default='name', required=False, ), @@ -798,24 +865,21 @@ class ElementsListBase(CorpusACLMixin, DestroyModelMixin, ListAPIView): return prefetch - def get_order_direction(self): + @property + def is_descending(self): direction = self.clean_params.get('order_direction', 'asc').lower() if direction not in ('asc', 'desc'): raise ValidationError({'order_direction': ['Unknown sorting direction']}) - return '-' if direction == 'desc' else '' + return direction == 'desc' def get_order_by(self): - sort_field = self.clean_params.get('order', 'name').lower() + ordering = self.clean_params.get('order', 'name').lower() - if sort_field not in ELEMENT_ORDERING_FIELDS: + order_expressions = ELEMENT_ORDERINGS.get(ordering) + if not order_expressions: raise ValidationError({'order': ['Unknown sorting field']}) - if sort_field == 'random': - return ('?', ) - - direction = self.get_order_direction() - # The sorting field is not unique; fallback on ordering by ID to ensure a consistent ordering - return (f'{direction}{sort_field}', f'{direction}id') + return order_expressions def filter_queryset(self, queryset): queryset = queryset \ @@ -823,6 +887,9 @@ class ElementsListBase(CorpusACLMixin, DestroyModelMixin, ListAPIView): .prefetch_related(*self.get_prefetch()) \ .order_by(*self.get_order_by()) + if self.is_descending: + queryset = queryset.reverse() + with_has_children = self.clean_params.get('with_has_children') if with_has_children and with_has_children.lower() not in ('false', '0'): queryset = BulkMap(_fetch_has_children, queryset) @@ -985,8 +1052,14 @@ class ElementParents(ElementsListBase): get=extend_schema(operation_id='ListElementChildren', parameters=[ OpenApiParameter( 'order', - description='Sort elements by a specific field.', - enum=ELEMENT_ORDERING_FIELDS | {'position'}, + description=dedent(""" + Sort elements by a specific field. + + In addition to the sorting options as for [ListElements](#operation/ListElements) and + [ListElementParents](#operation/ListElementParents), the `position` option is available + to sort elements by their position within the parent element. + """).strip(), + enum=set(ELEMENT_ORDERINGS.keys()) | {'position'}, default='position', required=False, ) @@ -1029,8 +1102,7 @@ class ElementChildren(ElementsListBase): is ListElementNeighbors, in a field named `position`. """ if self.clean_params.get('order', 'position').lower() == 'position': - direction = self.get_order_direction() - return (f'{direction}paths__ordering', f'{direction}id') + return ('paths__ordering', 'id') return super().get_order_by() def get_queryset(self): diff --git a/arkindex/documents/tests/test_element_list_centroid.py b/arkindex/documents/tests/test_element_list_centroid.py new file mode 100644 index 0000000000..65b375e06e --- /dev/null +++ b/arkindex/documents/tests/test_element_list_centroid.py @@ -0,0 +1,137 @@ +from uuid import UUID + +from django.urls import reverse +from rest_framework import status + +from arkindex.project.tests import FixtureAPITestCase + + +class TestElementListsCentroid(FixtureAPITestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.element_type = cls.corpus.types.create(slug='potatoid', display_name='Potatoid', folder=False) + cls.image = cls.imgsrv.images.get(path='img6') + + # We create some elements with centroids arranged on the image like so: + # +------+ + # | A B | + # | C D | + # +------+ + # Two elements will have the same D centroid, so that we can test the fallback UUID sorting, + # and two more elements will have no polygon so we can test that they are first or last, as well as their own UUID fallback. + a = cls.corpus.elements.create( + type=cls.element_type, + name='A', + image=cls.image, + polygon=((10, 10), (10, 30), (30, 30), (30, 10), (10, 10)), + ) + b = cls.corpus.elements.create( + type=cls.element_type, + name='B', + image=cls.image, + polygon=((110, 10), (110, 30), (130, 30), (130, 10), (110, 10)), + ) + c = cls.corpus.elements.create( + type=cls.element_type, + name='C', + image=cls.image, + polygon=((10, 110), (10, 130), (30, 130), (30, 110), (10, 110)), + ) + + d1 = cls.corpus.elements.create( + id=UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'), + type=cls.element_type, + name='D1', + image=cls.image, + polygon=((110, 110), (110, 130), (130, 130), (130, 110), (110, 110)), + ) + d2 = cls.corpus.elements.create( + id=UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaab'), + type=cls.element_type, + name='D2', + image=cls.image, + # Different polygon, but just 10 pixels larger on all sides, so it has the same centroid + polygon=((100, 100), (100, 140), (140, 140), (140, 100), (100, 100)), + ) + + none1 = cls.corpus.elements.create( + id=UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1'), + type=cls.element_type, + name='No polygon 1', + ) + none2 = cls.corpus.elements.create( + id=UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2'), + type=cls.element_type, + name='No polygon 2', + ) + + cls.elements = [a, b, c, d1, d2, none1, none2] + + cls.cases = [ + ['horizontal-lr', 'asc', ['A', 'B', 'C', 'D1', 'D2', 'No polygon 1', 'No polygon 2']], + ['horizontal-rl', 'asc', ['B', 'A', 'D1', 'D2', 'C', 'No polygon 1', 'No polygon 2']], + ['vertical-lr', 'asc', ['A', 'C', 'B', 'D1', 'D2', 'No polygon 1', 'No polygon 2']], + ['vertical-rl', 'asc', ['B', 'D1', 'D2', 'A', 'C', 'No polygon 1', 'No polygon 2']], + ['horizontal-lr', 'desc', ['No polygon 2', 'No polygon 1', 'D2', 'D1', 'C', 'B', 'A']], + ['horizontal-rl', 'desc', ['No polygon 2', 'No polygon 1', 'C', 'D2', 'D1', 'A', 'B']], + ['vertical-lr', 'desc', ['No polygon 2', 'No polygon 1', 'D2', 'D1', 'B', 'C', 'A']], + ['vertical-rl', 'desc', ['No polygon 2', 'No polygon 1', 'C', 'A', 'D2', 'D1', 'B']], + ] + + def test_list_elements(self): + for order, order_direction, expected_elements in self.cases: + with self.subTest(order=order, order_direction=order_direction): + with self.assertNumQueries(6): + response = self.client.get( + reverse('api:corpus-elements', kwargs={'corpus': self.corpus.id}), + {'type': self.element_type.slug, 'order': order, 'order_direction': order_direction}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + # Pagination is not supported in this test, so any response with more than 1 page could cause trouble. + self.assertIsNone(data['next']) + actual_elements = [element['name'] for element in response.json()['results']] + self.assertListEqual(expected_elements, actual_elements) + + def test_list_element_children(self): + parent = self.corpus.elements.get(name='Volume 2') + for element in self.elements: + element.add_parent(parent) + + for order, order_direction, expected_elements in self.cases: + with self.subTest(order=order, order_direction=order_direction): + with self.assertNumQueries(6): + response = self.client.get( + reverse('api:elements-children', kwargs={'pk': str(parent.id)}), + {'type': self.element_type.slug, 'order': order, 'order_direction': order_direction}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + # Pagination is not supported in this test, so any response with more than 1 page could cause trouble. + self.assertIsNone(data['next']) + actual_elements = [element['name'] for element in response.json()['results']] + self.assertListEqual(expected_elements, actual_elements) + + def test_list_element_parents(self): + child = self.corpus.elements.get(name='Surface E') + for element in self.elements: + child.add_parent(element) + + for order, order_direction, expected_elements in self.cases: + with self.subTest(order=order, order_direction=order_direction): + with self.assertNumQueries(6): + response = self.client.get( + reverse('api:elements-parents', kwargs={'pk': str(child.id)}), + {'type': self.element_type.slug, 'order': order, 'order_direction': order_direction}, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + data = response.json() + # Pagination is not supported in this test, so any response with more than 1 page could cause trouble. + self.assertIsNone(data['next']) + actual_elements = [element['name'] for element in response.json()['results']] + self.assertListEqual(expected_elements, actual_elements) diff --git a/arkindex/project/gis.py b/arkindex/project/gis.py index aaad726b0a..94dee486c7 100644 --- a/arkindex/project/gis.py +++ b/arkindex/project/gis.py @@ -1,13 +1,13 @@ from collections.abc import Iterable from itertools import groupby -from django.contrib.gis.db.models.fields import BaseSpatialField, LineStringField +from django.contrib.gis.db.models.fields import BaseSpatialField, LineStringField, PointField from django.contrib.gis.db.models.functions import GeoFuncMixin, GeomOutputGeoFunc from django.contrib.gis.db.models.functions import MemSize as MemSizeFunc from django.contrib.gis.db.models.functions import NumPoints as NumPointsFunc from django.contrib.gis.db.models.lookups import EqualsLookup from django.contrib.gis.geos import GEOSGeometry, LineString, Polygon -from django.db.models import BooleanField, Transform +from django.db.models import BooleanField, FloatField, Transform def ensure_linear_ring(value): @@ -96,6 +96,45 @@ class MemSize(MemSizeFunc, Transform): lookup_name = 'memsize' +@BaseSpatialField.register_lookup +class Centroid(GeoFuncMixin, Transform): + """ + GeoDjango's Centroid function, extended just to be available as a transform: + Element.objects.order_by('polygon__centroid__x') + + https://postgis.net/docs/ST_Centroid.html + https://docs.djangoproject.com/en/4.1/ref/contrib/gis/functions/#centroid + """ + arity = 1 + lookup_name = 'centroid' + + # This does not support geometries with SRIDs. + # Using the GeomOutputGeoFunc base class would add SRID support, + # but would return a GeometryField instead of a PointField. + # This is not relevant for element polygons as they do not use an SRID. + output_field = PointField() + + +@PointField.register_lookup +class X(GeoFuncMixin, Transform): + """ + X coordinate of a point. + https://postgis.net/docs/ST_X.html + """ + lookup_name = 'x' + output_field = FloatField() + + +@PointField.register_lookup +class Y(GeoFuncMixin, Transform): + """ + Y coordinate of a point. + https://postgis.net/docs/ST_Y.html + """ + lookup_name = 'y' + output_field = FloatField() + + # Django uses the `~=` (same-as) operator to compare geometries by default; # however, in PostGIS >=1.5, this causes a comparison by bounding boxes only. # This line rewrites the `__exact` lookup (the default lookup) to be `ST_Equals`, -- GitLab