From 5846c9c466cdaee2ee3016ec0d64053c8a9f26aa Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Wed, 28 Jun 2023 08:28:31 +0000
Subject: [PATCH] Replace ElementPath exclusion constraints with triggers

---
 .../migrations/0005_path_triggers.py          |  32 ++
 arkindex/documents/models.py                  |  78 +++--
 arkindex/documents/tests/test_neighbors.py    |  38 ++-
 .../documents/tests/test_path_constraints.py  | 273 ++++++++++++++++--
 4 files changed, 370 insertions(+), 51 deletions(-)
 create mode 100644 arkindex/documents/migrations/0005_path_triggers.py

diff --git a/arkindex/documents/migrations/0005_path_triggers.py b/arkindex/documents/migrations/0005_path_triggers.py
new file mode 100644
index 0000000000..e359d8493c
--- /dev/null
+++ b/arkindex/documents/migrations/0005_path_triggers.py
@@ -0,0 +1,32 @@
+# Generated by Django 4.1.7 on 2023-06-20 12:51
+
+from django.db import migrations
+
+import pgtrigger.compiler
+import pgtrigger.migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0004_initial'),
+    ]
+
+    operations = [
+        pgtrigger.migrations.AddTrigger(
+            model_name='elementpath',
+            trigger=pgtrigger.compiler.Trigger(name='unique_element_orderings', sql=pgtrigger.compiler.UpsertTriggerSql(constraint='CONSTRAINT', func="\n                IF EXISTS (\n                    SELECT 1\n                    FROM documents_elementpath p\n                    WHERE\n                        p.path && NEW.path\n                        AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]\n                        AND p.element_id = NEW.element_id\n                        AND p.ordering <> NEW.ordering\n                ) THEN\n                    RAISE EXCEPTION 'Each element may only have one ordering within the same parent';\n                END IF;\n                RETURN NULL;\n                ", hash='ede7bc92910da8dae8ea06625d9d3b08f216831c', operation='INSERT OR UPDATE', pgid='pgtrigger_unique_element_orderings_340d8', table='documents_elementpath', timing='DEFERRABLE INITIALLY DEFERRED', when='AFTER')),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name='elementpath',
+            trigger=pgtrigger.compiler.Trigger(name='unique_parent_orderings', sql=pgtrigger.compiler.UpsertTriggerSql(constraint='CONSTRAINT', func="\n                IF EXISTS (\n                    SELECT 1\n                    FROM documents_elementpath p\n                    WHERE\n                        p.path && NEW.path\n                        AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]\n                        AND p.ordering = NEW.ordering\n                        AND p.element_id <> NEW.element_id\n                ) THEN\n                    RAISE EXCEPTION 'Each element within a parent must have a distinct ordering';\n                END IF;\n                RETURN NULL;\n                ", hash='45b3745ab2856a2e48ff3e4fc8430b1abda961e5', operation='INSERT OR UPDATE', pgid='pgtrigger_unique_parent_orderings_bbcd7', table='documents_elementpath', timing='DEFERRABLE INITIALLY DEFERRED', when='AFTER')),
+        ),
+        migrations.RemoveConstraint(
+            model_name='elementpath',
+            name='unique_element_orderings',
+        ),
+        migrations.RemoveConstraint(
+            model_name='elementpath',
+            name='unique_parent_orderings',
+        ),
+    ]
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 51d92a1347..0d87d139c6 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -15,6 +15,7 @@ from django.db.models.functions import Cast, Least
 from django.utils.functional import cached_property
 from enumfields import Enum, EnumField
 
+import pgtrigger
 from arkindex.documents.dates import InterpretedDateMixin
 from arkindex.documents.deletion import delete_element
 from arkindex.documents.managers import CorpusManager, ElementManager
@@ -120,34 +121,65 @@ class ElementPath(models.Model):
                 ],
                 deferrable=Deferrable.DEFERRED,
             ),
+        ]
+        triggers = [
             # An element cannot be in two positions within the same parent, but its paths can be duplicated
             # as there might be multiple paths to handle multiple grandparents on the same parent.
-            # We cannot use a unique constraint, but we can use an exclusion constraint to forbid different orderings.
-            ExclusionConstraint(
+            # We therefore cannot use a unique constraint. We could use an exclusion constraint to forbid
+            # different orderings, but that requires a GiST index which is heavy and slow. If the query planner
+            # decides to use it instead of the GIN index, this can result in severe slowdowns in element lists
+            # or element deletions. We use a deferrable trigger instead, which behaves very similarly to how
+            # PostgreSQL implements foreign key constraints.
+            # After one row is inserted or updated, we run a query to see if there is another path
+            # for the same element on the same parent that has a different ordering.
+            # If this element has two orderings on the same parent, then the trigger raises an exception.
+            pgtrigger.Trigger(
                 name='unique_element_orderings',
-                expressions=[
-                    # On the same element…
-                    ('element', RangeOperators.EQUAL),
-                    # Within the same parent…
-                    ('path__last', RangeOperators.EQUAL),
-                    # There cannot be any orderings different from each other.
-                    ('ordering', RangeOperators.NOT_EQUAL),
-                ],
-                deferrable=Deferrable.DEFERRED,
+                operation=pgtrigger.Insert | pgtrigger.Update,
+                when=pgtrigger.After,
+                level=pgtrigger.Row,
+                timing=pgtrigger.Deferred,
+                func="""
+                IF EXISTS (
+                    SELECT 1
+                    FROM documents_elementpath p
+                    WHERE
+                        p.path && NEW.path
+                        AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]
+                        AND p.element_id = NEW.element_id
+                        AND p.ordering <> NEW.ordering
+                ) THEN
+                    RAISE EXCEPTION 'Each element may only have one ordering within the same parent';
+                END IF;
+                RETURN NULL;
+                """,
             ),
-            # Within the same parent, there cannot be two elements with the same ordering, but since paths can be
-            # duplicated, there can be the same element twice with the same ordering, so exclusion constraint again.
-            ExclusionConstraint(
+            # Within the same parent, there cannot be two elements with the same ordering,
+            # but since paths can be duplicated, there can be the same element twice with the same ordering.
+            # We again use another trigger, but with a different condition.
+            # After one row is inserted or updated, we run a query to see if there is another path
+            # with the same parent and the same ordering but a different element ID.
+            # If there is another element in this parent with that same ordering, then the trigger raises an exception.
+            pgtrigger.Trigger(
                 name='unique_parent_orderings',
-                expressions=[
-                    # Within the same parent…
-                    ('path__last', RangeOperators.EQUAL),
-                    # At the same ordering…
-                    ('ordering', RangeOperators.EQUAL),
-                    # There cannot be any element IDs different from each other.
-                    ('element', RangeOperators.NOT_EQUAL),
-                ],
-                deferrable=Deferrable.DEFERRED,
+                operation=pgtrigger.Insert | pgtrigger.Update,
+                when=pgtrigger.After,
+                level=pgtrigger.Row,
+                timing=pgtrigger.Deferred,
+                func="""
+                IF EXISTS (
+                    SELECT 1
+                    FROM documents_elementpath p
+                    WHERE
+                        p.path && NEW.path
+                        AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]
+                        AND p.ordering = NEW.ordering
+                        AND p.element_id <> NEW.element_id
+                ) THEN
+                    RAISE EXCEPTION 'Each element within a parent must have a distinct ordering';
+                END IF;
+                RETURN NULL;
+                """,
             ),
         ]
 
diff --git a/arkindex/documents/tests/test_neighbors.py b/arkindex/documents/tests/test_neighbors.py
index 546467af3d..7ff6730b21 100644
--- a/arkindex/documents/tests/test_neighbors.py
+++ b/arkindex/documents/tests/test_neighbors.py
@@ -1,3 +1,4 @@
+from django.db import transaction
 from django.db.models import F
 from django.urls import reverse
 from rest_framework import status
@@ -222,9 +223,18 @@ class TestElementNeighbors(FixtureAPITestCase):
         )
         c_path = elements['C'].paths.get()
         c_path.ordering = 6584
-        c_path.save()
 
-        response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['B'].id)}))
+        # The unique_element_orderings trigger will fail because an element is not supposed to have
+        # multiple orderings within the same parent. This trigger is deferred, meaning it will only run
+        # after the test ends, during teardown, so we cannot ignore it with pgtrigger.ignore().
+        # Instead, we use a savepoint that we rollback immediately after the request, so that the trigger never runs.
+        sid = transaction.savepoint()
+        try:
+            c_path.save()
+            response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['B'].id)}))
+        finally:
+            transaction.savepoint_rollback(sid)
+
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         results = response.json()['results']
         self.assertListEqual(
@@ -376,14 +386,24 @@ class TestElementNeighbors(FixtureAPITestCase):
             corpus=self.corpus,
             type=self.volume_type,
         )
-        # Put X at the orderings we want it to have, then add 1 to D E F G and L to account for the move
-        elements['X'].paths.filter(path__contains=[elements['A'].id]).update(ordering=1)
-        elements['X'].paths.filter(path__contains=[elements['B'].id]).update(ordering=4)
-        ElementPath.objects.filter(element__name__in=list('DEFGL')).update(ordering=F('ordering') + 1)
+        # The unique_element_orderings trigger will fail because an element is not supposed to have
+        # multiple orderings within the same parent. This trigger is deferred, meaning it will only run
+        # after the test ends, during teardown, so we cannot ignore it with pgtrigger.ignore().
+        # Instead, we use a savepoint that we rollback immediately after the request, so that the trigger never runs.
+        sid = transaction.savepoint()
+
+        try:
+            # Put X at the orderings we want it to have, then add 1 to D E F G and L to account for the move
+            elements['X'].paths.filter(path__contains=[elements['A'].id]).update(ordering=1)
+            elements['X'].paths.filter(path__contains=[elements['B'].id]).update(ordering=4)
+            ElementPath.objects.filter(element__name__in=list('DEFGL')).update(ordering=F('ordering') + 1)
+
+            # Ask for X's neighbors, should get C, X, D, K, X, and L.
+            # Without the fix, would get the 12 children
+            response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['X'].id)}))
+        finally:
+            transaction.savepoint_rollback(sid)
 
-        # Ask for X's neighbors, should get C, X, D, K, X, and L.
-        # Without the fix, would get the 12 children
-        response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['X'].id)}))
         self.assertEqual(response.status_code, status.HTTP_200_OK)
 
         # The endpoint is ordered by ID, so depending on the random UUIDs we get, we can have CXDKXL or KXLCXD
diff --git a/arkindex/documents/tests/test_path_constraints.py b/arkindex/documents/tests/test_path_constraints.py
index 3be218d728..e3b966df37 100644
--- a/arkindex/documents/tests/test_path_constraints.py
+++ b/arkindex/documents/tests/test_path_constraints.py
@@ -1,35 +1,270 @@
-from django.db import IntegrityError
+from django.db import IntegrityError, connections, transaction
+from django.db.utils import InternalError
 
+from arkindex.documents.models import ElementPath
 from arkindex.project.tests import FixtureTestCase
 
 
 class TestPathConstraints(FixtureTestCase):
 
-    def test_unique_paths(self):
-        """
-        Ensure an Element cannot have strictly the same paths twice
-        """
-        element_type = self.corpus.types.first()
-        parent1 = self.corpus.elements.get(name='Volume 1')
-        parent2 = self.corpus.elements.get(name='Volume 2')
-        element = self.corpus.elements.create(
-            type=element_type,
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.element_type = cls.corpus.types.first()
+        cls.parent1 = cls.corpus.elements.get(name='Volume 1')
+        cls.parent2 = cls.corpus.elements.get(name='Volume 2')
+        cls.element = cls.corpus.elements.create(
+            type=cls.element_type,
             name='Test element',
         )
-        child = self.corpus.elements.create(
-            type=element_type,
+        cls.child = cls.corpus.elements.create(
+            type=cls.element_type,
             name='Test child',
         )
 
-        element.add_parent(parent1)
-        element.add_parent(parent2)
+        cls.element.add_parent(cls.parent1)
+        cls.element.add_parent(cls.parent2)
 
-        child.add_parent(element)
-        self.assertCountEqual(child.paths.values_list('path', 'ordering'), [
-            ([parent1.id, element.id], 0),
-            ([parent2.id, element.id], 0)
+        cls.child.add_parent(cls.element)
+
+    def test_unique_paths(self):
+        """
+        Ensure an Element cannot have strictly the same paths twice
+        """
+        self.assertCountEqual(self.child.paths.values_list('path', 'ordering'), [
+            ([self.parent1.id, self.element.id], 0),
+            ([self.parent2.id, self.element.id], 0)
         ])
 
         # Cannot create a path that already exists
         with self.assertRaises(IntegrityError):
-            child.paths.create(path=[parent1.id, element.id], ordering=42)
+            self.child.paths.create(path=[self.parent1.id, self.element.id], ordering=42)
+
+    def test_unique_element_orderings_insert(self):
+        self.child.paths.get(path=[self.parent2.id, self.element.id]).delete()
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # Creating a path should not raise anything: the trigger only runs on commit because it is deferred.
+        self.child.paths.create(
+            path=[self.parent2.id, self.element.id],
+            # Cause the child to have two orderings within self.element
+            ordering=11111,
+        )
+
+        with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_element_orderings_update_element(self):
+        # self.child is the only child of self.element, it has two paths
+        # because self.element has two parents, but it only has one ordering
+        self.assertEqual(self.child.paths.count(), 2)
+        self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0])
+
+        path = self.child.paths.get(path=[self.parent1.id, self.element.id])
+
+        # Assign this path to another element, and assign a different ordering
+        path.element = self.corpus.elements.create(type=self.element_type, name='Another child')
+        path.ordering = 65535
+        path.save()
+
+        # Nothing is wrong with the constraints yet, the path is just a little strange.
+        connections['default'].check_constraints()
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # Reassign the path to the child, without touching the ordering, so that the child now has
+        # two different orderings within self.element.
+        path.element = self.child
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the element gets updated.
+        path.save(update_fields=['element'])
+
+        with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_element_orderings_update_path(self):
+        # self.child is the only child of self.element, it has two paths
+        # because self.element has two parents, but it only has one ordering
+        self.assertEqual(self.child.paths.count(), 2)
+        self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0])
+
+        path = self.child.paths.get(path=[self.parent1.id, self.element.id])
+        # Change the path in some way, so that this path no longer ends with self.element
+        path.path = [self.parent1.id]
+        # Change the ordering
+        path.ordering = 42
+        path.save()
+
+        # Nothing is wrong with the constraints yet, the path is just a little strange.
+        connections['default'].check_constraints()
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # Restore the old path: now we are only saving to update the path, not the ordering,
+        # but this will cause the child to have two orderings within self.element.
+        path.path = [self.parent1.id, self.element.id]
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the path gets updated.
+        path.save(update_fields=['path'])
+
+        with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_element_orderings_update_ordering(self):
+        # self.child is the only child of self.element, it has two paths
+        # because self.element has two parents, but it only has one ordering
+        self.assertEqual(self.child.paths.count(), 2)
+        self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0])
+
+        path = self.child.paths.first()
+        path.ordering = 42
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the ordering gets updated.
+        path.save(update_fields=['ordering'])
+
+        with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_parent_orderings_insert(self):
+        self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3)
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # Creating a path should not raise anything: the trigger only runs on commit because it is deferred.
+        self.child.paths.create(
+            path=[self.parent2.id],
+            # Cause ordering 0 to be assigned to both self.element and self.child
+            ordering=3,
+        )
+
+        with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_parent_orderings_update_element(self):
+        self.assertEqual(self.child.paths.count(), 2)
+        self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0])
+
+        path = self.child.paths.first()
+        # Change the element so that one path is on self.child with ordering 0, and one path is on this element with the same ordering
+        path.element = self.corpus.elements.create(type=self.element_type, name='Another child')
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the element gets updated.
+        path.save(update_fields=['element'])
+
+        with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_parent_orderings_update_path(self):
+        connections['default'].check_constraints()
+
+        self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3)
+
+        # Replicate the path of self.element, first with just the ordering
+        # We update all of the child's paths to avoid breaking unique_element_orderings
+        self.child.paths.update(ordering=3)
+
+        # Nothing is wrong with the constraints yet, the paths are just a little strange.
+        connections['default'].check_constraints()
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        path = self.child.paths.get(path=[self.parent2.id, self.element.id])
+        # Now we are updating the path alone, which will cause both self.element and self.child
+        # to have the same orderings within self.parent2
+        path.path = [self.parent2.id]
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the path gets updated.
+        path.save(update_fields=['path'])
+
+        with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
+
+    def test_unique_parent_orderings_update_ordering(self):
+        connections['default'].check_constraints()
+
+        self.assertTrue(ElementPath.objects.filter(path__last=self.parent2.id, ordering=0).exists())
+        self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3)
+
+        path = self.element.paths.get(path=[self.parent2.id])
+        # Update the ordering so that conflicts with the existing path
+        path.ordering = 0
+
+        # Committing the transaction should cause an error since the trigger will raise an exception.
+        # Since we are within a Django TestCase, such a failure will cause the whole test class to fail,
+        # so we use a savepoint to handle the failure gracefully.
+        sid = transaction.savepoint()
+
+        # path.save() should not raise anything: the trigger only runs on commit because it is deferred.
+        # We save using update_fields to really ensure only the ordering gets updated.
+        path.save()
+
+        with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'):
+            # Committing the savepoint would not execute the deferred trigger, and committing the transaction
+            # would mess with the test class and any subsequent unit tests, so the next best thing is to act
+            # like we are about to commit by forcing all constraint checks to run.
+            connections['default'].check_constraints()
+
+        transaction.savepoint_rollback(sid)
-- 
GitLab