From 5846c9c466cdaee2ee3016ec0d64053c8a9f26aa Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Wed, 28 Jun 2023 08:28:31 +0000 Subject: [PATCH] Replace ElementPath exclusion constraints with triggers --- .../migrations/0005_path_triggers.py | 32 ++ arkindex/documents/models.py | 78 +++-- arkindex/documents/tests/test_neighbors.py | 38 ++- .../documents/tests/test_path_constraints.py | 273 ++++++++++++++++-- 4 files changed, 370 insertions(+), 51 deletions(-) create mode 100644 arkindex/documents/migrations/0005_path_triggers.py diff --git a/arkindex/documents/migrations/0005_path_triggers.py b/arkindex/documents/migrations/0005_path_triggers.py new file mode 100644 index 0000000000..e359d8493c --- /dev/null +++ b/arkindex/documents/migrations/0005_path_triggers.py @@ -0,0 +1,32 @@ +# Generated by Django 4.1.7 on 2023-06-20 12:51 + +from django.db import migrations + +import pgtrigger.compiler +import pgtrigger.migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0004_initial'), + ] + + operations = [ + pgtrigger.migrations.AddTrigger( + model_name='elementpath', + trigger=pgtrigger.compiler.Trigger(name='unique_element_orderings', sql=pgtrigger.compiler.UpsertTriggerSql(constraint='CONSTRAINT', func="\n IF EXISTS (\n SELECT 1\n FROM documents_elementpath p\n WHERE\n p.path && NEW.path\n AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]\n AND p.element_id = NEW.element_id\n AND p.ordering <> NEW.ordering\n ) THEN\n RAISE EXCEPTION 'Each element may only have one ordering within the same parent';\n END IF;\n RETURN NULL;\n ", hash='ede7bc92910da8dae8ea06625d9d3b08f216831c', operation='INSERT OR UPDATE', pgid='pgtrigger_unique_element_orderings_340d8', table='documents_elementpath', timing='DEFERRABLE INITIALLY DEFERRED', when='AFTER')), + ), + pgtrigger.migrations.AddTrigger( + model_name='elementpath', + trigger=pgtrigger.compiler.Trigger(name='unique_parent_orderings', sql=pgtrigger.compiler.UpsertTriggerSql(constraint='CONSTRAINT', func="\n IF EXISTS (\n SELECT 1\n FROM documents_elementpath p\n WHERE\n p.path && NEW.path\n AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)]\n AND p.ordering = NEW.ordering\n AND p.element_id <> NEW.element_id\n ) THEN\n RAISE EXCEPTION 'Each element within a parent must have a distinct ordering';\n END IF;\n RETURN NULL;\n ", hash='45b3745ab2856a2e48ff3e4fc8430b1abda961e5', operation='INSERT OR UPDATE', pgid='pgtrigger_unique_parent_orderings_bbcd7', table='documents_elementpath', timing='DEFERRABLE INITIALLY DEFERRED', when='AFTER')), + ), + migrations.RemoveConstraint( + model_name='elementpath', + name='unique_element_orderings', + ), + migrations.RemoveConstraint( + model_name='elementpath', + name='unique_parent_orderings', + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 51d92a1347..0d87d139c6 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -15,6 +15,7 @@ from django.db.models.functions import Cast, Least from django.utils.functional import cached_property from enumfields import Enum, EnumField +import pgtrigger from arkindex.documents.dates import InterpretedDateMixin from arkindex.documents.deletion import delete_element from arkindex.documents.managers import CorpusManager, ElementManager @@ -120,34 +121,65 @@ class ElementPath(models.Model): ], deferrable=Deferrable.DEFERRED, ), + ] + triggers = [ # An element cannot be in two positions within the same parent, but its paths can be duplicated # as there might be multiple paths to handle multiple grandparents on the same parent. - # We cannot use a unique constraint, but we can use an exclusion constraint to forbid different orderings. - ExclusionConstraint( + # We therefore cannot use a unique constraint. We could use an exclusion constraint to forbid + # different orderings, but that requires a GiST index which is heavy and slow. If the query planner + # decides to use it instead of the GIN index, this can result in severe slowdowns in element lists + # or element deletions. We use a deferrable trigger instead, which behaves very similarly to how + # PostgreSQL implements foreign key constraints. + # After one row is inserted or updated, we run a query to see if there is another path + # for the same element on the same parent that has a different ordering. + # If this element has two orderings on the same parent, then the trigger raises an exception. + pgtrigger.Trigger( name='unique_element_orderings', - expressions=[ - # On the same element… - ('element', RangeOperators.EQUAL), - # Within the same parent… - ('path__last', RangeOperators.EQUAL), - # There cannot be any orderings different from each other. - ('ordering', RangeOperators.NOT_EQUAL), - ], - deferrable=Deferrable.DEFERRED, + operation=pgtrigger.Insert | pgtrigger.Update, + when=pgtrigger.After, + level=pgtrigger.Row, + timing=pgtrigger.Deferred, + func=""" + IF EXISTS ( + SELECT 1 + FROM documents_elementpath p + WHERE + p.path && NEW.path + AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)] + AND p.element_id = NEW.element_id + AND p.ordering <> NEW.ordering + ) THEN + RAISE EXCEPTION 'Each element may only have one ordering within the same parent'; + END IF; + RETURN NULL; + """, ), - # Within the same parent, there cannot be two elements with the same ordering, but since paths can be - # duplicated, there can be the same element twice with the same ordering, so exclusion constraint again. - ExclusionConstraint( + # Within the same parent, there cannot be two elements with the same ordering, + # but since paths can be duplicated, there can be the same element twice with the same ordering. + # We again use another trigger, but with a different condition. + # After one row is inserted or updated, we run a query to see if there is another path + # with the same parent and the same ordering but a different element ID. + # If there is another element in this parent with that same ordering, then the trigger raises an exception. + pgtrigger.Trigger( name='unique_parent_orderings', - expressions=[ - # Within the same parent… - ('path__last', RangeOperators.EQUAL), - # At the same ordering… - ('ordering', RangeOperators.EQUAL), - # There cannot be any element IDs different from each other. - ('element', RangeOperators.NOT_EQUAL), - ], - deferrable=Deferrable.DEFERRED, + operation=pgtrigger.Insert | pgtrigger.Update, + when=pgtrigger.After, + level=pgtrigger.Row, + timing=pgtrigger.Deferred, + func=""" + IF EXISTS ( + SELECT 1 + FROM documents_elementpath p + WHERE + p.path && NEW.path + AND p.path[array_length(p.path, 1)] = NEW.path[array_length(NEW.path, 1)] + AND p.ordering = NEW.ordering + AND p.element_id <> NEW.element_id + ) THEN + RAISE EXCEPTION 'Each element within a parent must have a distinct ordering'; + END IF; + RETURN NULL; + """, ), ] diff --git a/arkindex/documents/tests/test_neighbors.py b/arkindex/documents/tests/test_neighbors.py index 546467af3d..7ff6730b21 100644 --- a/arkindex/documents/tests/test_neighbors.py +++ b/arkindex/documents/tests/test_neighbors.py @@ -1,3 +1,4 @@ +from django.db import transaction from django.db.models import F from django.urls import reverse from rest_framework import status @@ -222,9 +223,18 @@ class TestElementNeighbors(FixtureAPITestCase): ) c_path = elements['C'].paths.get() c_path.ordering = 6584 - c_path.save() - response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['B'].id)})) + # The unique_element_orderings trigger will fail because an element is not supposed to have + # multiple orderings within the same parent. This trigger is deferred, meaning it will only run + # after the test ends, during teardown, so we cannot ignore it with pgtrigger.ignore(). + # Instead, we use a savepoint that we rollback immediately after the request, so that the trigger never runs. + sid = transaction.savepoint() + try: + c_path.save() + response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['B'].id)})) + finally: + transaction.savepoint_rollback(sid) + self.assertEqual(response.status_code, status.HTTP_200_OK) results = response.json()['results'] self.assertListEqual( @@ -376,14 +386,24 @@ class TestElementNeighbors(FixtureAPITestCase): corpus=self.corpus, type=self.volume_type, ) - # Put X at the orderings we want it to have, then add 1 to D E F G and L to account for the move - elements['X'].paths.filter(path__contains=[elements['A'].id]).update(ordering=1) - elements['X'].paths.filter(path__contains=[elements['B'].id]).update(ordering=4) - ElementPath.objects.filter(element__name__in=list('DEFGL')).update(ordering=F('ordering') + 1) + # The unique_element_orderings trigger will fail because an element is not supposed to have + # multiple orderings within the same parent. This trigger is deferred, meaning it will only run + # after the test ends, during teardown, so we cannot ignore it with pgtrigger.ignore(). + # Instead, we use a savepoint that we rollback immediately after the request, so that the trigger never runs. + sid = transaction.savepoint() + + try: + # Put X at the orderings we want it to have, then add 1 to D E F G and L to account for the move + elements['X'].paths.filter(path__contains=[elements['A'].id]).update(ordering=1) + elements['X'].paths.filter(path__contains=[elements['B'].id]).update(ordering=4) + ElementPath.objects.filter(element__name__in=list('DEFGL')).update(ordering=F('ordering') + 1) + + # Ask for X's neighbors, should get C, X, D, K, X, and L. + # Without the fix, would get the 12 children + response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['X'].id)})) + finally: + transaction.savepoint_rollback(sid) - # Ask for X's neighbors, should get C, X, D, K, X, and L. - # Without the fix, would get the 12 children - response = self.client.get(reverse('api:elements-neighbors', kwargs={'pk': str(elements['X'].id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) # The endpoint is ordered by ID, so depending on the random UUIDs we get, we can have CXDKXL or KXLCXD diff --git a/arkindex/documents/tests/test_path_constraints.py b/arkindex/documents/tests/test_path_constraints.py index 3be218d728..e3b966df37 100644 --- a/arkindex/documents/tests/test_path_constraints.py +++ b/arkindex/documents/tests/test_path_constraints.py @@ -1,35 +1,270 @@ -from django.db import IntegrityError +from django.db import IntegrityError, connections, transaction +from django.db.utils import InternalError +from arkindex.documents.models import ElementPath from arkindex.project.tests import FixtureTestCase class TestPathConstraints(FixtureTestCase): - def test_unique_paths(self): - """ - Ensure an Element cannot have strictly the same paths twice - """ - element_type = self.corpus.types.first() - parent1 = self.corpus.elements.get(name='Volume 1') - parent2 = self.corpus.elements.get(name='Volume 2') - element = self.corpus.elements.create( - type=element_type, + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.element_type = cls.corpus.types.first() + cls.parent1 = cls.corpus.elements.get(name='Volume 1') + cls.parent2 = cls.corpus.elements.get(name='Volume 2') + cls.element = cls.corpus.elements.create( + type=cls.element_type, name='Test element', ) - child = self.corpus.elements.create( - type=element_type, + cls.child = cls.corpus.elements.create( + type=cls.element_type, name='Test child', ) - element.add_parent(parent1) - element.add_parent(parent2) + cls.element.add_parent(cls.parent1) + cls.element.add_parent(cls.parent2) - child.add_parent(element) - self.assertCountEqual(child.paths.values_list('path', 'ordering'), [ - ([parent1.id, element.id], 0), - ([parent2.id, element.id], 0) + cls.child.add_parent(cls.element) + + def test_unique_paths(self): + """ + Ensure an Element cannot have strictly the same paths twice + """ + self.assertCountEqual(self.child.paths.values_list('path', 'ordering'), [ + ([self.parent1.id, self.element.id], 0), + ([self.parent2.id, self.element.id], 0) ]) # Cannot create a path that already exists with self.assertRaises(IntegrityError): - child.paths.create(path=[parent1.id, element.id], ordering=42) + self.child.paths.create(path=[self.parent1.id, self.element.id], ordering=42) + + def test_unique_element_orderings_insert(self): + self.child.paths.get(path=[self.parent2.id, self.element.id]).delete() + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # Creating a path should not raise anything: the trigger only runs on commit because it is deferred. + self.child.paths.create( + path=[self.parent2.id, self.element.id], + # Cause the child to have two orderings within self.element + ordering=11111, + ) + + with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_element_orderings_update_element(self): + # self.child is the only child of self.element, it has two paths + # because self.element has two parents, but it only has one ordering + self.assertEqual(self.child.paths.count(), 2) + self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0]) + + path = self.child.paths.get(path=[self.parent1.id, self.element.id]) + + # Assign this path to another element, and assign a different ordering + path.element = self.corpus.elements.create(type=self.element_type, name='Another child') + path.ordering = 65535 + path.save() + + # Nothing is wrong with the constraints yet, the path is just a little strange. + connections['default'].check_constraints() + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # Reassign the path to the child, without touching the ordering, so that the child now has + # two different orderings within self.element. + path.element = self.child + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the element gets updated. + path.save(update_fields=['element']) + + with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_element_orderings_update_path(self): + # self.child is the only child of self.element, it has two paths + # because self.element has two parents, but it only has one ordering + self.assertEqual(self.child.paths.count(), 2) + self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0]) + + path = self.child.paths.get(path=[self.parent1.id, self.element.id]) + # Change the path in some way, so that this path no longer ends with self.element + path.path = [self.parent1.id] + # Change the ordering + path.ordering = 42 + path.save() + + # Nothing is wrong with the constraints yet, the path is just a little strange. + connections['default'].check_constraints() + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # Restore the old path: now we are only saving to update the path, not the ordering, + # but this will cause the child to have two orderings within self.element. + path.path = [self.parent1.id, self.element.id] + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the path gets updated. + path.save(update_fields=['path']) + + with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_element_orderings_update_ordering(self): + # self.child is the only child of self.element, it has two paths + # because self.element has two parents, but it only has one ordering + self.assertEqual(self.child.paths.count(), 2) + self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0]) + + path = self.child.paths.first() + path.ordering = 42 + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the ordering gets updated. + path.save(update_fields=['ordering']) + + with self.assertRaisesMessage(InternalError, 'Each element may only have one ordering within the same parent'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_parent_orderings_insert(self): + self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3) + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # Creating a path should not raise anything: the trigger only runs on commit because it is deferred. + self.child.paths.create( + path=[self.parent2.id], + # Cause ordering 0 to be assigned to both self.element and self.child + ordering=3, + ) + + with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_parent_orderings_update_element(self): + self.assertEqual(self.child.paths.count(), 2) + self.assertCountEqual(self.child.paths.values_list('ordering', flat=True).distinct(), [0]) + + path = self.child.paths.first() + # Change the element so that one path is on self.child with ordering 0, and one path is on this element with the same ordering + path.element = self.corpus.elements.create(type=self.element_type, name='Another child') + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the element gets updated. + path.save(update_fields=['element']) + + with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_parent_orderings_update_path(self): + connections['default'].check_constraints() + + self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3) + + # Replicate the path of self.element, first with just the ordering + # We update all of the child's paths to avoid breaking unique_element_orderings + self.child.paths.update(ordering=3) + + # Nothing is wrong with the constraints yet, the paths are just a little strange. + connections['default'].check_constraints() + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + path = self.child.paths.get(path=[self.parent2.id, self.element.id]) + # Now we are updating the path alone, which will cause both self.element and self.child + # to have the same orderings within self.parent2 + path.path = [self.parent2.id] + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the path gets updated. + path.save(update_fields=['path']) + + with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) + + def test_unique_parent_orderings_update_ordering(self): + connections['default'].check_constraints() + + self.assertTrue(ElementPath.objects.filter(path__last=self.parent2.id, ordering=0).exists()) + self.assertEqual(self.element.paths.get(path=[self.parent2.id]).ordering, 3) + + path = self.element.paths.get(path=[self.parent2.id]) + # Update the ordering so that conflicts with the existing path + path.ordering = 0 + + # Committing the transaction should cause an error since the trigger will raise an exception. + # Since we are within a Django TestCase, such a failure will cause the whole test class to fail, + # so we use a savepoint to handle the failure gracefully. + sid = transaction.savepoint() + + # path.save() should not raise anything: the trigger only runs on commit because it is deferred. + # We save using update_fields to really ensure only the ordering gets updated. + path.save() + + with self.assertRaisesMessage(InternalError, 'Each element within a parent must have a distinct ordering'): + # Committing the savepoint would not execute the deferred trigger, and committing the transaction + # would mess with the test class and any subsequent unit tests, so the next best thing is to act + # like we are about to commit by forcing all constraint checks to run. + connections['default'].check_constraints() + + transaction.savepoint_rollback(sid) -- GitLab