diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 2116376656dfff08d3af6814acdc982f37ecf321..0be67b421e25618b7b03a0e2b918e6d16ea63094 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -88,6 +88,7 @@ from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly from arkindex.project.tools import BulkMap from arkindex.project.triggers import ( corpus_delete, + element_delete, element_trash, move_element, selection_worker_results_delete, @@ -1231,12 +1232,13 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView): corpora = Corpus.objects.readable(self.request.user) queryset = Element.objects.filter(corpus__in=corpora) if self.request and self.request.method == "DELETE": - # Only include corpus and creator for ACL check and ID for deletion + # Only include the corpus and creator for ACL check, + # the ID for the deletion task, and the type and name for the task's description. return ( queryset - .select_related("corpus") + .select_related("corpus", "type") .annotate(has_dataset=Exists(DatasetElement.objects.filter(element_id=OuterRef("pk")))) - .only("id", "creator_id", "corpus") + .only("id", "creator_id", "corpus", "type__display_name", "name") ) return ( @@ -1274,12 +1276,12 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView): return context def delete(self, request, *args, **kwargs): - self.check_object_permissions(self.request, self.get_object()) + element = self.get_object() + self.check_object_permissions(self.request, element) - queryset = Element.objects.filter(id=self.kwargs["pk"]) delete_children = self.request.query_params.get("delete_children", "false").lower() not in ("false", "0") - element_trash(queryset, user_id=self.request.user.id, delete_children=delete_children) + element_delete(element, user_id=self.request.user.id, delete_children=delete_children) return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index b9595d920af4cb2e6afc6e177f90bc940f7e376d..656815442afd5adfcfa6e3d60c41397cfa7ad745 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -11,7 +11,7 @@ from django.contrib.postgres.indexes import GinIndex from django.core.exceptions import ValidationError from django.core.validators import MaxValueValidator, MinValueValidator, RegexValidator from django.db import connections, models, transaction -from django.db.models import Deferrable, Q +from django.db.models import Count, Deferrable, Q, Window from django.db.models.functions import Cast, Least from django.urls import reverse from django.utils.functional import cached_property @@ -645,6 +645,82 @@ class Element(IndexableModel): # Now that the child's descendants are handled, we can clean up the child's own paths. child.paths.filter(path__last=self.id).delete() + @transaction.atomic + def remove_children(self): + """ + Remove this parent element from all of its children at once + """ + # Fetch two values that we will need to detect which queries to run, and build them: + # - How many paths this parent has, so we can tell if we need to delete paths and not only update them + # - One path on the parent, so that we can perform the update. + # In the rare edge case where the element has zero paths, this returns nothing, so we'll act as if there was a top-level path. + first_parent_path, parent_paths_count = ( + self.paths + .using("default") + .annotate(count=Window(Count("*"))) + .values_list("path", "count") + # Unnecessary for this algorithm to work, but simplifies unit testing a lot. + .order_by("id") + .first() + ) or ([], 0) + + with connections["default"].cursor() as cursor: + if parent_paths_count > 1: + # Delete all child paths that are not the first parent path. + # If we tried to also update those, we would end up with duplicates. + cursor.execute( + """ + DELETE FROM documents_elementpath child_paths + USING documents_elementpath parent_paths + WHERE + parent_paths.element_id = %(parent_id)s + AND parent_paths.path <> %(first_parent_path)s + AND child_paths.path @> (parent_paths.path || %(parent_id)s) + """, + { + "parent_id": self.id, + "first_parent_path": first_parent_path, + }, + ) + + # For children that have other parents, delete all the paths for this parent. + # The paths for the other parents will preserve the structure of the child's descendants, + # so we will have no updates to make. + cursor.execute( + """ + DELETE FROM documents_elementpath parent_paths + USING documents_elementpath other_paths + WHERE + parent_paths.path && ARRAY[%(parent_id)s] + AND other_paths.element_id = parent_paths.element_id + AND NOT other_paths.path && ARRAY[%(parent_id)s] + """, + {"parent_id": self.id}, + ) + + # For the child elements that had no other parent, we have one path starting with `first_parent_path`. + # We strip that from the children's paths and their descendants', + # meaning each child will have a top-level path left (empty array) + # and its descendants will remain descendants of this child. + # As an extra precaution, we will check that the path really starts with this prefix before updating, + # since @> is really a set operation ([1,2,3,4] @> [3,1,4] is true). + prefix = first_parent_path + [self.id] + # TODO: In Django 5.1, rewrite this without raw SQL by using F() slicing + # See https://docs.djangoproject.com/en/dev/ref/models/expressions/#slicing-f-expressions + cursor.execute( + """ + UPDATE documents_elementpath + SET path = path[%(prefix_size)s + 1:] + WHERE + path @> %(prefix)s + AND path[:%(prefix_size)s] = %(prefix)s + """, + { + "prefix": prefix, + "prefix_size": len(prefix), + }, + ) + @cached_property def thumbnail(self): from arkindex.images.models import Thumbnail # Prevent circular imports diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index 1c42b0d360adf1e7e7f2c97f61f911629cfbedea..ce4c680ae9d483e6b94da0595336902701ed66c0 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -100,6 +100,21 @@ def corpus_delete(corpus_id: str) -> None: logger.info(f"Deleted corpus {corpus_id}") +@job("high", timeout=settings.RQ_TIMEOUTS["element_trash"]) +def element_delete(element: Element, delete_children: bool) -> None: + """ + Wrapper around the element_trash task that removes the element from its children's paths + when not deleting recursively. + """ + if not delete_children: + element.remove_children() + + element_trash( + Element.objects.filter(id=element.id), + delete_children=delete_children, + ) + + @job("high", timeout=settings.RQ_TIMEOUTS["element_trash"]) def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None: queryset.trash(delete_children=delete_children) diff --git a/arkindex/documents/tests/test_destroy_elements.py b/arkindex/documents/tests/test_destroy_elements.py index af9fe5fb44f798e919d50a245102ab6a1c68a0f9..31b3cd60cd2d1572385cf97d4674afb1f234bc36 100644 --- a/arkindex/documents/tests/test_destroy_elements.py +++ b/arkindex/documents/tests/test_destroy_elements.py @@ -46,7 +46,7 @@ class TestDestroyElements(FixtureAPITestCase): {"detail": "You do not have admin access to this element."} ) - @patch("arkindex.project.triggers.documents_tasks.element_trash.delay") + @patch("arkindex.project.triggers.documents_tasks.element_delete.delay") def test_element_destroy(self, delay_mock): self.client.force_login(self.user) castle_story = self.corpus.elements.create( @@ -61,14 +61,14 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) args, kwargs = delay_mock.call_args self.assertEqual(len(args), 0) - self.assertCountEqual(list(kwargs.pop("queryset")), list(self.corpus.elements.filter(id=castle_story.id))) self.assertDictEqual(kwargs, { + "element": castle_story, "delete_children": False, "user_id": self.user.id, - "description": "Element deletion", + "description": "Deletion of Volume: Castle story", }) - @patch("arkindex.project.triggers.documents_tasks.element_trash.delay") + @patch("arkindex.project.triggers.documents_tasks.element_delete.delay") def test_element_destroy_delete_children(self, delay_mock): self.client.force_login(self.user) castle_story = self.corpus.elements.create( @@ -77,7 +77,12 @@ class TestDestroyElements(FixtureAPITestCase): ) self.assertTrue(self.corpus.elements.filter(id=castle_story.id).exists()) - for delete_children in [True, False]: + cases = [ + (True, "Recursive deletion of Volume: Castle story"), + (False, "Deletion of Volume: Castle story"), + ] + + for delete_children, expected_description in cases: with self.subTest(delete_children=delete_children): delay_mock.reset_mock() @@ -91,14 +96,14 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) args, kwargs = delay_mock.call_args self.assertEqual(len(args), 0) - self.assertCountEqual(list(kwargs.pop("queryset")), list(self.corpus.elements.filter(id=castle_story.id))) self.assertDictEqual(kwargs, { + "element": castle_story, "delete_children": delete_children, "user_id": self.user.id, - "description": "Element deletion", + "description": expected_description, }) - @patch("arkindex.project.triggers.documents_tasks.element_trash.delay") + @patch("arkindex.project.triggers.documents_tasks.element_delete.delay") def test_element_destroy_creator(self, delay_mock): """ An element's creator can delete the element if it has write access @@ -117,11 +122,11 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) args, kwargs = delay_mock.call_args self.assertEqual(len(args), 0) - self.assertCountEqual(list(kwargs.pop("queryset")), list(self.private_corpus.elements.filter(id=castle_story.id))) self.assertDictEqual(kwargs, { + "element": castle_story, "delete_children": False, "user_id": self.user.id, - "description": "Element deletion", + "description": "Deletion of Volume: Castle story", }) @patch("arkindex.project.mixins.has_access", return_value=False) @@ -156,7 +161,7 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) self.assertDictEqual(response.json(), {"detail": "You cannot delete an element that is part of a dataset."}) - @patch("arkindex.project.triggers.documents_tasks.element_trash.delay") + @patch("arkindex.project.triggers.documents_tasks.element_delete.delay") def test_non_empty_element(self, delay_mock): """ We can now delete a non-empty element @@ -169,11 +174,11 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) args, kwargs = delay_mock.call_args self.assertEqual(len(args), 0) - self.assertCountEqual(list(kwargs.pop("queryset")), list(self.corpus.elements.filter(id=self.vol.id))) self.assertDictEqual(kwargs, { + "element": self.vol, "delete_children": False, "user_id": self.user.id, - "description": "Element deletion", + "description": "Deletion of Volume: Volume 1", }) def test_element_trash_dataset_failure(self): @@ -542,7 +547,6 @@ class TestDestroyElements(FixtureAPITestCase): """ test Element.delete method """ - self.maxDiff = None self.client.force_login(self.user) with self.assertExactQueries("element_dot_delete.sql", params={"id": str(self.vol.id)}): self.vol.delete() diff --git a/arkindex/documents/tests/test_edit_elementpath.py b/arkindex/documents/tests/test_edit_elementpath.py index 5633763ceace96fd5fa7a00c21313f4224cd3002..250072d979f3dc9905e5b675043f4da05bbfe8c5 100644 --- a/arkindex/documents/tests/test_edit_elementpath.py +++ b/arkindex/documents/tests/test_edit_elementpath.py @@ -469,3 +469,196 @@ class TestEditElementPath(FixtureTestCase): self.check_parents(elements, "Element", *[[parent_name] for parent_name in parent_names]) self.check_parents(elements, "Child 1", *[[parent_name, "Element"] for parent_name in parent_names]) self.check_parents(elements, "Child 2", *[[parent_name, "Element"] for parent_name in parent_names]) + + def test_remove_children_no_parent(self): + r""" + Test removing all children from parent A, with no parent paths at all. + K A + \ - - + B C + / \ \ + D E F + / + G + """ + elements = build_tree( + { + "C": "A", + "F": "C", + "B": ["A", "K"], + "D": "B", + "E": "B", + "G": "D", + }, + corpus=self.corpus, + type=self.element_type, + ) + + # Trigger the edge case where there are no paths at all, even top-level paths + elements["A"].paths.all().delete() + + self.check_parents(elements, "A") + self.check_parents(elements, "B", ["A"], + ["K"]) + self.check_parents(elements, "C", ["A"]) + self.check_parents(elements, "D", ["A", "B"], + ["K", "B"]) + self.check_parents(elements, "E", ["A", "B"], + ["K", "B"]) + self.check_parents(elements, "F", ["A", "C"]) + self.check_parents(elements, "G", ["A", "B", "D"], + ["K", "B", "D"]) + + with self.assertExactQueries( + "remove_children_no_parents.sql", params={ + # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. + # This will cause a savepoint to be created, with a name that is hard to mock. + "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", + "A": elements["A"].id, + } + ): + elements["A"].remove_children() + + self.check_parents(elements, "K", []) + self.check_parents(elements, "A") + self.check_parents(elements, "B", ["K"]) + self.check_parents(elements, "C", []) + self.check_parents(elements, "D", ["K", "B"]) + self.check_parents(elements, "E", ["K", "B"]) + self.check_parents(elements, "F", ["C"]) + self.check_parents(elements, "G", ["K", "B", "D"]) + + + def test_remove_children_single_parent(self): + r""" + Test removing all children from parent A, with one parent path. + X + \ + K A + \ - - + B C + / \ \ + D E F + / + G + """ + elements = build_tree( + { + "A": "X", + "C": "A", + "F": "C", + "B": ["A", "K"], + "D": "B", + "E": "B", + "G": "D", + }, + corpus=self.corpus, + type=self.element_type, + ) + + self.check_parents(elements, "X", []) + self.check_parents(elements, "K", []) + self.check_parents(elements, "A", ["X"]) + self.check_parents(elements, "B", ["X", "A"], + ["K"]) + self.check_parents(elements, "C", ["X", "A"]) + self.check_parents(elements, "D", ["X", "A", "B"], + ["K", "B"]) + self.check_parents(elements, "E", ["X", "A", "B"], + ["K", "B"]) + self.check_parents(elements, "F", ["X", "A", "C"]) + self.check_parents(elements, "G", ["X", "A", "B", "D"], + ["K", "B", "D"]) + + with self.assertExactQueries( + "remove_children_single_parent.sql", params={ + # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. + # This will cause a savepoint to be created, with a name that is hard to mock. + "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", + "A": elements["A"].id, + "X": elements["X"].id, + } + ): + elements["A"].remove_children() + + self.check_parents(elements, "X", []) + self.check_parents(elements, "K", []) + self.check_parents(elements, "A", ["X"]) + self.check_parents(elements, "B", ["K"]) + self.check_parents(elements, "C", []) + self.check_parents(elements, "D", ["K", "B"]) + self.check_parents(elements, "E", ["K", "B"]) + self.check_parents(elements, "F", ["C"]) + self.check_parents(elements, "G", ["K", "B", "D"]) + + def test_remove_children_multiple_parents(self): + r""" + Test removing all children from parent A, with multiple parent paths. + X Y + \ / + K A + \ - - + B C + / \ \ + D E F + / + G + """ + elements = build_tree( + { + "A": ["X", "Y"], + "C": "A", + "F": "C", + "B": ["A", "K"], + "D": "B", + "E": "B", + "G": "D", + }, + corpus=self.corpus, + type=self.element_type, + ) + + self.check_parents(elements, "X", []) + self.check_parents(elements, "Y", []) + self.check_parents(elements, "K", []) + self.check_parents(elements, "A", ["X"], + ["Y"]) + self.check_parents(elements, "B", ["X", "A"], + ["Y", "A"], + ["K"]) + self.check_parents(elements, "C", ["X", "A"], + ["Y", "A"]) + self.check_parents(elements, "D", ["X", "A", "B"], + ["Y", "A", "B"], + ["K", "B"]) + self.check_parents(elements, "E", ["X", "A", "B"], + ["Y", "A", "B"], + ["K", "B"]) + self.check_parents(elements, "F", ["X", "A", "C"], + ["Y", "A", "C"]) + self.check_parents(elements, "G", ["X", "A", "B", "D"], + ["Y", "A", "B", "D"], + ["K", "B", "D"]) + + with self.assertExactQueries( + "remove_children_multiple_parents.sql", params={ + # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. + # This will cause a savepoint to be created, with a name that is hard to mock. + "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", + "A": elements["A"].id, + "first_parent": elements["A"].paths.order_by("id").first().path[0], + } + ): + elements["A"].remove_children() + + self.check_parents(elements, "X", []) + self.check_parents(elements, "Y", []) + self.check_parents(elements, "K", []) + self.check_parents(elements, "A", ["X"], + ["Y"]) + self.check_parents(elements, "B", ["K"]) + self.check_parents(elements, "C", []) + self.check_parents(elements, "D", ["K", "B"]) + self.check_parents(elements, "E", ["K", "B"]) + self.check_parents(elements, "F", ["C"]) + self.check_parents(elements, "G", ["K", "B", "D"]) diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index 67328f7c36ed0903384fc3395970e6084be06b75..8ff16f82dd730cc0c5d7578f8508fbae29dfde26 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -33,6 +33,22 @@ def corpus_delete(corpus: Union[Corpus, UUID, str], user_id: Optional[int] = Non documents_tasks.corpus_delete.delay(corpus_id=corpus_id, description=description, user_id=user_id) +def element_delete(element: Element, + delete_children: bool = True, + user_id: Optional[int] = None) -> None: + """ + Delete one element, and optionally delete all its child elements recursively. + """ + assert isinstance(element, Element), "Only an element can be deleted" + deletion_type = "Recursive deletion" if delete_children else "Deletion" + documents_tasks.element_delete.delay( + element=element, + delete_children=delete_children, + user_id=user_id, + description=f"{deletion_type} of {element}", + ) + + def element_trash(queryset: ElementQuerySet, delete_children: bool = True, user_id: Optional[int] = None) -> None: diff --git a/arkindex/sql_validation/remove_children_multiple_parents.sql b/arkindex/sql_validation/remove_children_multiple_parents.sql new file mode 100644 index 0000000000000000000000000000000000000000..d5670523cb70f2af9441da7ff753b592161be2fa --- /dev/null +++ b/arkindex/sql_validation/remove_children_multiple_parents.sql @@ -0,0 +1,29 @@ +SAVEPOINT "{savepoint}"; + +SELECT "documents_elementpath"."path", + COUNT(*) OVER () AS "count" +FROM "documents_elementpath" +WHERE "documents_elementpath"."element_id" = '{A}'::uuid +ORDER BY "documents_elementpath"."id" ASC +LIMIT 1; + +DELETE +FROM documents_elementpath child_paths USING documents_elementpath parent_paths +WHERE parent_paths.element_id = '{A}'::uuid + AND parent_paths.path <> ARRAY['{first_parent}'::uuid] + AND child_paths.path @ > (parent_paths.path || '{A}'::uuid) ; + +DELETE +FROM documents_elementpath parent_paths USING documents_elementpath other_paths +WHERE parent_paths.path && ARRAY['{A}'::uuid] + AND other_paths.element_id = parent_paths.element_id + AND NOT other_paths.path && ARRAY['{A}'::uuid] ; + +UPDATE documents_elementpath +SET path = path[2 + 1:] +WHERE path @ > ARRAY['{first_parent}'::uuid, + '{A}'::uuid] + AND path[:2] = ARRAY['{first_parent}'::uuid, + '{A}'::uuid] ; + +RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/remove_children_no_parents.sql b/arkindex/sql_validation/remove_children_no_parents.sql new file mode 100644 index 0000000000000000000000000000000000000000..4d31a5ff820082b6dd3200c9cc501458aed6ca51 --- /dev/null +++ b/arkindex/sql_validation/remove_children_no_parents.sql @@ -0,0 +1,21 @@ +SAVEPOINT "{savepoint}"; + +SELECT "documents_elementpath"."path", + COUNT(*) OVER () AS "count" +FROM "documents_elementpath" +WHERE "documents_elementpath"."element_id" = '{A}'::uuid +ORDER BY "documents_elementpath"."id" ASC +LIMIT 1; + +DELETE +FROM documents_elementpath parent_paths USING documents_elementpath other_paths +WHERE parent_paths.path && ARRAY['{A}'::uuid] + AND other_paths.element_id = parent_paths.element_id + AND NOT other_paths.path && ARRAY['{A}'::uuid] ; + +UPDATE documents_elementpath +SET path = path[1 + 1:] +WHERE path @ > ARRAY['{A}'::uuid] + AND path[:1] = ARRAY['{A}'::uuid] ; + +RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/remove_children_single_parent.sql b/arkindex/sql_validation/remove_children_single_parent.sql new file mode 100644 index 0000000000000000000000000000000000000000..c05f2af5dafc765f547ba9236ab10faffc24c08e --- /dev/null +++ b/arkindex/sql_validation/remove_children_single_parent.sql @@ -0,0 +1,23 @@ +SAVEPOINT "{savepoint}"; + +SELECT "documents_elementpath"."path", + COUNT(*) OVER () AS "count" +FROM "documents_elementpath" +WHERE "documents_elementpath"."element_id" = '{A}'::uuid +ORDER BY "documents_elementpath"."id" ASC +LIMIT 1; + +DELETE +FROM documents_elementpath parent_paths USING documents_elementpath other_paths +WHERE parent_paths.path && ARRAY['{A}'::uuid] + AND other_paths.element_id = parent_paths.element_id + AND NOT other_paths.path && ARRAY['{A}'::uuid] ; + +UPDATE documents_elementpath +SET path = path[2 + 1:] +WHERE path @ > ARRAY['{X}'::uuid, + '{A}'::uuid] + AND path[:2] = ARRAY['{X}'::uuid, + '{A}'::uuid] ; + +RELEASE SAVEPOINT "{savepoint}"