From 78ebea952171f5580dc4176526c3c74ac6086653 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Wed, 12 Feb 2025 16:27:29 +0000 Subject: [PATCH] Switch to psycopg 3 --- .gitlab-ci.yml | 2 +- .isort.cfg | 2 +- Dockerfile | 4 +- arkindex/documents/api/elements.py | 13 ++- arkindex/documents/api/entities.py | 2 +- arkindex/documents/export/__init__.py | 2 +- arkindex/documents/indexer.py | 4 +- arkindex/documents/serializers/entities.py | 2 +- .../tests/tasks/test_corpus_delete.py | 4 +- .../tests/tasks/test_move_element.py | 12 +-- .../tests/tasks/test_worker_results_delete.py | 52 +++++----- .../documents/tests/test_corpus_elements.py | 2 +- .../documents/tests/test_destroy_elements.py | 10 +- .../documents/tests/test_edit_elementpath.py | 34 +++---- arkindex/documents/tests/test_indexer.py | 15 ++- .../documents/tests/test_path_constraints.py | 19 ++-- arkindex/process/tests/test_managers.py | 2 +- .../process/tests/test_process_elements.py | 20 ++-- .../tests/worker_activity/test_bulk_insert.py | 32 +++---- arkindex/project/tests/__init__.py | 2 +- arkindex/project/tests/test_gis.py | 6 +- arkindex/sql_validation/add_first_parent.sql | 15 ++- arkindex/sql_validation/add_second_parent.sql | 5 +- .../sql_validation/element_dot_delete.sql | 96 +++++++++++-------- .../element_move_with_children.sql | 12 +-- .../element_move_without_child.sql | 12 +-- arkindex/sql_validation/indexer_prefetch.sql | 6 +- arkindex/sql_validation/list_elements.sql | 2 +- .../process_elements_with_image.sql | 2 +- .../remove_child_last_parent.sql | 8 +- .../remove_children_multiple_parents.sql | 8 +- .../remove_children_no_parents.sql | 4 +- .../remove_children_single_parent.sql | 6 +- .../workeractivity_bulk_insert_no_model.sql | 2 +- arkindex/training/api.py | 15 ++- base/requirements.txt | 1 - requirements.txt | 1 + ruff.toml | 2 +- 38 files changed, 223 insertions(+), 215 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b8a56570c6..6183a928f0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,7 +11,7 @@ include: # For jobs that run backend scripts directly .backend-setup: - image: registry.gitlab.teklia.com/arkindex/backend/base:django-5.0.8 + image: registry.gitlab.teklia.com/arkindex/backend/base:psycopg3 cache: paths: diff --git a/.isort.cfg b/.isort.cfg index 0b8bd7b946..83a3f011bd 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -7,4 +7,4 @@ use_parentheses = True line_length = 120 default_section=FIRSTPARTY -known_third_party = SolrClient,bleach,boto3,botocore,cryptography,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,enumfields,gitlab,psycopg2,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml +known_third_party = SolrClient,bleach,boto3,botocore,cryptography,corsheaders,django,django_admin_hstore_widget,django_rq,drf_spectacular,enumfields,gitlab,psycopg,requests,responses,rest_framework,rq,setuptools,sqlparse,teklia_toolbox,tenacity,tripoli,yaml diff --git a/Dockerfile b/Dockerfile index 01ec3163d4..b1092bf632 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,11 @@ # syntax=docker/dockerfile:1 -FROM registry.gitlab.teklia.com/arkindex/backend/base:django-5.0.8 as build +FROM registry.gitlab.teklia.com/arkindex/backend/base:psycopg3 AS build RUN mkdir build ADD . build RUN cd build && python3 setup.py sdist -FROM registry.gitlab.teklia.com/arkindex/backend/base:django-5.0.8 +FROM registry.gitlab.teklia.com/arkindex/backend/base:psycopg3 # Install arkindex and its deps # Uses a source archive instead of full local copy to speedup docker build diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 5a027a5179..d9644c226b 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -32,7 +32,6 @@ from drf_spectacular.utils import ( extend_schema_view, inline_serializer, ) -from psycopg2.extras import execute_values from rest_framework import permissions, serializers, status from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.generics import ( @@ -133,13 +132,17 @@ def _fetch_has_children(elements): if not elements: return elements + # psycopg 2 used to have `execute_values` as a clean way to manage a VALUES statement, but psycopg 3 doesn't, + # so we will add a variable amount of placeholders to put each element ID separately. + # This results in `VALUES (%s),(%s),(%s)`. + placeholders = ",".join("(%s)" for _ in range(len(elements))) + with connection.cursor() as cursor: - execute_values( - cursor, + cursor.execute( "SELECT DISTINCT ON (e.id) e.id, p.id is not null as has_children " - "FROM (VALUES %s) e (id) " + f"FROM (VALUES {placeholders}) e (id) " "LEFT JOIN documents_elementpath p ON ARRAY[e.id] && p.path AND p.path[array_length(p.path, 1)] = e.id", - tuple((element.id, ) for element in elements), + tuple(element.id for element in elements), ) has_children = dict(cursor.fetchall()) diff --git a/arkindex/documents/api/entities.py b/arkindex/documents/api/entities.py index 7c60ae8f43..32fd143739 100644 --- a/arkindex/documents/api/entities.py +++ b/arkindex/documents/api/entities.py @@ -6,7 +6,7 @@ from django.core.exceptions import ValidationError as DjangoValidationError from django.db.utils import OperationalError from django.shortcuts import get_object_or_404 from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view -from psycopg2.errors import ProgramLimitExceeded +from psycopg.errors import ProgramLimitExceeded from rest_framework import permissions, serializers, status from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.generics import CreateAPIView, ListAPIView, RetrieveUpdateDestroyAPIView diff --git a/arkindex/documents/export/__init__.py b/arkindex/documents/export/__init__.py index ac1ccd7814..bd51e010f4 100644 --- a/arkindex/documents/export/__init__.py +++ b/arkindex/documents/export/__init__.py @@ -48,7 +48,7 @@ EXPORT_QUERIES = [ def run_pg_query(query, source_db): """ Run a single Postgresql query and split the results into chunks. - When a name is given to a cursor, psycopg2 uses a server-side cursor; we just use a random string as a name. + When a name is given to a cursor, psycopg uses a server-side cursor; we just use a random string as a name. """ db = connections[source_db] diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py index bee4bd7fc9..fa273647b4 100644 --- a/arkindex/documents/indexer.py +++ b/arkindex/documents/indexer.py @@ -30,7 +30,7 @@ SELECT element.name AS name, elementtype.display_name AS type_name, element.image_id AS image_id, - element.polygon::bytea AS polygon, + element.polygon AS polygon, element.worker_run_id AS worker_run_id FROM documents_element element INNER JOIN documents_elementtype elementtype ON (elementtype.id = element.type_id) @@ -50,7 +50,7 @@ SELECT element.name as name, elementtype.display_name as type_name, element.image_id AS image_id, - element.polygon::bytea AS polygon, + element.polygon AS polygon, element.worker_run_id AS worker_run_id FROM (SELECT * FROM parent LIMIT %(limit)s OFFSET %(offset)s) AS parent_chunk INNER JOIN documents_elementpath as elementpath ON (elementpath.path @> ARRAY[parent_chunk.id]) diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index bcc2778368..bd9a291d8e 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -3,7 +3,7 @@ from textwrap import dedent from django.db import transaction from django.db.utils import OperationalError -from psycopg2.errors import ProgramLimitExceeded +from psycopg.errors import ProgramLimitExceeded from rest_framework import serializers from rest_framework.exceptions import ValidationError diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 7f0ff00642..071af6a3af 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -174,7 +174,7 @@ class TestDeleteCorpus(FixtureTestCase): def test_run(self): receivers = pre_delete.receivers - with force_constraints_immediate(), self.assertExactQueries("corpus_delete.sql", params={"corpus_id": self.corpus.id}): + with force_constraints_immediate(), self.assertExactQueries("corpus_delete.sql", params={"corpus_id": self.corpus.id.hex}): corpus_delete(self.corpus.id) # Ensure the task restores the signal receivers @@ -221,7 +221,7 @@ class TestDeleteCorpus(FixtureTestCase): self.corpus.top_level_type = self.corpus.types.first() self.corpus.save() - with force_constraints_immediate(), self.assertExactQueries("corpus_delete_top_level_type.sql", params={"corpus_id": self.corpus.id}): + with force_constraints_immediate(), self.assertExactQueries("corpus_delete_top_level_type.sql", params={"corpus_id": self.corpus.id.hex}): corpus_delete(self.corpus.id) # Ensure the task restores the signal receivers diff --git a/arkindex/documents/tests/tasks/test_move_element.py b/arkindex/documents/tests/tasks/test_move_element.py index 32d5a9cc78..84272f9ade 100644 --- a/arkindex/documents/tests/tasks/test_move_element.py +++ b/arkindex/documents/tests/tasks/test_move_element.py @@ -26,9 +26,9 @@ class TestMoveElement(FixtureTestCase): self.assertEqual(list(source_paths.values("path")), [{"path": [self.parent.id]}]) with self.assertExactQueries("element_move_without_child.sql", params={ - "source_id": str(self.source_without_child.id), - "parent_id": str(self.parent.id), - "destination_id": str(self.destination.id), + "source_id": self.source_without_child.id.hex, + "parent_id": self.parent.id.hex, + "destination_id": self.destination.id.hex, "savepoints": [f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 2}"] }): move_element(self.source_without_child, self.destination) @@ -52,9 +52,9 @@ class TestMoveElement(FixtureTestCase): self.assertEqual(list(source_paths.values("path")), [{"path": [self.parent.id]}]) with self.assertExactQueries("element_move_with_children.sql", params={ - "source_id": str(self.source_with_children.id), - "parent_id": str(self.parent.id), - "destination_id": str(self.destination.id), + "source_id": self.source_with_children.id.hex, + "parent_id": self.parent.id.hex, + "destination_id": self.destination.id.hex, "savepoints": [f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 2}"] }): move_element(self.source_with_children, self.destination) diff --git a/arkindex/documents/tests/tasks/test_worker_results_delete.py b/arkindex/documents/tests/tasks/test_worker_results_delete.py index 264d9fa821..0d3c429ecf 100644 --- a/arkindex/documents/tests/tasks/test_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_worker_results_delete.py @@ -133,8 +133,8 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_on_corpus(self): with self.assertExactQueries("worker_results_delete_in_corpus.sql", params={ - "corpus_id": str(self.corpus.id), - "version_id": str(self.version_1.id), + "corpus_id": self.corpus.id.hex, + "version_id": self.version_1.id.hex, }): worker_results_delete( corpus_id=self.corpus.id, @@ -153,9 +153,9 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_on_parent(self): with self.assertExactQueries("worker_results_delete_under_parent.sql", params={ - "corpus_id": str(self.corpus.id), - "version_id": str(self.version_1.id), - "element_id": str(self.page1.id), + "corpus_id": self.corpus.id.hex, + "version_id": self.version_1.id.hex, + "element_id": self.page1.id.hex, }): worker_results_delete(corpus_id=self.corpus.id, version_id=self.version_1.id, element_id=self.page1.id) self.check_deleted( @@ -169,9 +169,9 @@ class TestDeleteWorkerResults(FixtureTestCase): The element itself is deleted after its related results from the same version """ with self.assertExactQueries("worker_results_delete_under_parent_included.sql", params={ - "corpus_id": str(self.corpus.id), - "version_id": str(self.version_1.id), - "element_id": str(self.page2.id), + "corpus_id": self.corpus.id.hex, + "version_id": self.version_1.id.hex, + "element_id": self.page2.id.hex, }): worker_results_delete(corpus_id=self.corpus.id, version_id=self.version_1.id, element_id=self.page2.id) self.check_deleted( @@ -183,9 +183,9 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_model_version_filter_on_parent(self): with self.assertExactQueries("worker_results_delete_model_version_under_parent.sql", params={ - "corpus_id": str(self.corpus.id), - "element_id": str(self.page2.id), - "model_version_id": str(self.model_version.id), + "corpus_id": self.corpus.id.hex, + "element_id": self.page2.id.hex, + "model_version_id": self.model_version.id.hex, }): worker_results_delete( corpus_id=self.corpus.id, @@ -199,8 +199,8 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_configuration_filter(self): with self.assertExactQueries("worker_results_delete_configuration_filter.sql", params={ - "corpus_id": str(self.corpus.id), - "configuration_id": str(self.configuration.id), + "corpus_id": self.corpus.id.hex, + "configuration_id": self.configuration.id.hex, }): worker_results_delete( corpus_id=self.corpus.id, @@ -219,8 +219,8 @@ class TestDeleteWorkerResults(FixtureTestCase): self.page2.worker_run = self.worker_run_2 self.page2.save() with self.assertExactQueries("worker_results_delete_unset_configuration.sql", params={ - "corpus_id": str(self.corpus.id), - "element_id": str(self.page2.id), + "corpus_id": self.corpus.id.hex, + "element_id": self.page2.id.hex, }): worker_results_delete( corpus_id=self.corpus.id, @@ -250,7 +250,7 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_all_versions(self): with self.assertExactQueries("worker_results_delete_all_versions.sql", params={ - "corpus_id": str(self.corpus.id), + "corpus_id": self.corpus.id.hex, }): worker_results_delete(corpus_id=self.corpus.id) self.check_deleted( @@ -291,8 +291,8 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_worker_run_on_corpus(self): with self.assertExactQueries("worker_results_delete_in_corpus_worker_run.sql", params={ - "corpus_id": str(self.corpus.id), - "worker_run_id": str(self.worker_run_1.id), + "corpus_id": self.corpus.id.hex, + "worker_run_id": self.worker_run_1.id.hex, }): worker_results_delete( corpus_id=self.corpus.id, @@ -306,9 +306,9 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_worker_run_on_parent(self): with self.assertExactQueries("worker_results_delete_under_parent_worker_run.sql", params={ - "corpus_id": str(self.corpus.id), - "worker_run_id": str(self.worker_run_2.id), - "element_id": str(self.page1.id), + "corpus_id": self.corpus.id.hex, + "worker_run_id": self.worker_run_2.id.hex, + "element_id": self.page1.id.hex, }): worker_results_delete(corpus_id=self.corpus.id, worker_run_id=self.worker_run_2.id, element_id=self.page1.id) self.check_deleted( @@ -324,9 +324,9 @@ class TestDeleteWorkerResults(FixtureTestCase): self.page1.worker_version = self.version_2 self.page1.save() with self.assertExactQueries("worker_results_delete_under_parent_included_worker_run.sql", params={ - "corpus_id": str(self.corpus.id), - "worker_run_id": str(self.worker_run_2.id), - "element_id": str(self.page1.id), + "corpus_id": self.corpus.id.hex, + "worker_run_id": self.worker_run_2.id.hex, + "element_id": self.page1.id.hex, }): worker_results_delete(corpus_id=self.corpus.id, worker_run_id=self.worker_run_2.id, element_id=self.page1.id) self.check_deleted( @@ -339,8 +339,8 @@ class TestDeleteWorkerResults(FixtureTestCase): def test_run_worker_run_ignore_filters(self): with self.assertExactQueries("worker_results_delete_in_corpus_worker_run.sql", params={ - "corpus_id": str(self.corpus.id), - "worker_run_id": str(self.worker_run_1.id) + "corpus_id": self.corpus.id.hex, + "worker_run_id": self.worker_run_1.id.hex }): worker_results_delete( corpus_id=self.corpus.id, diff --git a/arkindex/documents/tests/test_corpus_elements.py b/arkindex/documents/tests/test_corpus_elements.py index 78c3929e91..0a252144e8 100644 --- a/arkindex/documents/tests/test_corpus_elements.py +++ b/arkindex/documents/tests/test_corpus_elements.py @@ -88,7 +88,7 @@ class TestListElements(FixtureAPITestCase): expected_image_ids = set(filter(None, expected_elements.values_list("image_id", flat=True))) expected_type_ids = set(expected_elements.values_list("type_id", flat=True)) - with self.assertExactQueries("list_elements.sql", params={"corpus_id": self.corpus.id}) as ctx: + with self.assertExactQueries("list_elements.sql", params={"corpus_id": self.corpus.id.hex}) as ctx: response = self.client.get( reverse("api:corpus-elements", kwargs={"corpus": self.corpus.id}), ) diff --git a/arkindex/documents/tests/test_destroy_elements.py b/arkindex/documents/tests/test_destroy_elements.py index d6f39024b8..c875c06a21 100644 --- a/arkindex/documents/tests/test_destroy_elements.py +++ b/arkindex/documents/tests/test_destroy_elements.py @@ -219,7 +219,7 @@ class TestDestroyElements(FixtureAPITestCase): self.assertEqual(len(ids), 24) self.assertEqual(len(Element.objects.exclude(id__in=ids)), 5) - with self.assertExactQueries("element_trash_children.sql", params={"id": str(self.vol.id)}): + with self.assertExactQueries("element_trash_children.sql", params={"id": self.vol.id.hex}): Element.objects.filter(id=self.vol.id).trash() with self.assertRaises(Element.DoesNotExist): @@ -234,7 +234,7 @@ class TestDestroyElements(FixtureAPITestCase): ids = list(children.values_list("id", flat=True)) self.assertEqual(len(ids), 24) - with self.assertExactQueries("element_trash_no_children.sql", params={"id": str(self.vol.id)}): + with self.assertExactQueries("element_trash_no_children.sql", params={"id": self.vol.id.hex}): Element.objects.filter(id=self.vol.id).trash(delete_children=False) with self.assertRaises(Element.DoesNotExist): @@ -248,7 +248,7 @@ class TestDestroyElements(FixtureAPITestCase): ids = list(children.values_list("id", flat=True)) self.assertEqual(len(ids), 24) - with self.assertExactQueries("element_trash_children.sql", params={"id": str(self.vol.id)}): + with self.assertExactQueries("element_trash_children.sql", params={"id": self.vol.id.hex}): Element.objects.filter(id=self.vol.id).order_by("name").trash() with self.assertRaises(Element.DoesNotExist): @@ -324,7 +324,7 @@ class TestDestroyElements(FixtureAPITestCase): corpus=self.corpus, ) - with self.assertExactQueries("element_trash_deep.sql", params={"id": str(elements["A"].id)}): + with self.assertExactQueries("element_trash_deep.sql", params={"id": elements["A"].id.hex}): Element.objects.filter(id=elements["A"].id).trash() self.assertFalse(Element.objects.filter(id__in=[e.id for e in elements.values()]).exists()) @@ -563,7 +563,7 @@ class TestDestroyElements(FixtureAPITestCase): test Element.delete method """ self.client.force_login(self.user) - with self.assertExactQueries("element_dot_delete.sql", params={"id": str(self.vol.id)}): + with self.assertExactQueries("element_dot_delete.sql", params={"id": self.vol.id.hex}): self.vol.delete() with self.assertRaises(Element.DoesNotExist): self.vol.refresh_from_db() diff --git a/arkindex/documents/tests/test_edit_elementpath.py b/arkindex/documents/tests/test_edit_elementpath.py index 250072d979..44e20b7c87 100644 --- a/arkindex/documents/tests/test_edit_elementpath.py +++ b/arkindex/documents/tests/test_edit_elementpath.py @@ -96,14 +96,14 @@ class TestEditElementPath(FixtureTestCase): # add_parent uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "B": elements["B"].id, + "A": elements["A"].id.hex, + "B": elements["B"].id.hex, # Element A has two parents so it has two paths. # add_parent will pick the first one to perform updates on the new child's paths, # so it will be seen in the SQL queries. To avoid intermittent failures, # add_parent sorts parent paths by `path`, so we apply the same sort here. # The paths only contain one ID, X's or Y's. - "first_parent": elements["A"].paths.order_by("path").first().path[0], + "first_parent": elements["A"].paths.order_by("path").first().path[0].hex, } ): elements["B"].add_parent(elements["A"]) @@ -171,9 +171,9 @@ class TestEditElementPath(FixtureTestCase): # add_parent uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "B": elements["B"].id, - "K": elements["K"].id, + "A": elements["A"].id.hex, + "B": elements["B"].id.hex, + "K": elements["K"].id.hex, } ): elements["B"].add_parent(elements["A"]) @@ -285,8 +285,8 @@ class TestEditElementPath(FixtureTestCase): # remove_child uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "B": elements["B"].id, + "A": elements["A"].id.hex, + "B": elements["B"].id.hex, } ): elements["A"].remove_child(elements["B"]) @@ -362,9 +362,9 @@ class TestEditElementPath(FixtureTestCase): def __str__(self): path_id = elements["B"].paths.get().id if path1.id == path_id: - return str(path1.path[0]) + return path1.path[0].hex if path2.id == path_id: - return str(path2.path[0]) + return path2.path[0].hex raise AssertionError("Unexpected top-level path ID") with self.assertExactQueries( @@ -372,8 +372,8 @@ class TestEditElementPath(FixtureTestCase): # remove_child uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "B": elements["B"].id, + "A": elements["A"].id.hex, + "B": elements["B"].id.hex, "first_parent": FirstParent(), } ): @@ -514,7 +514,7 @@ class TestEditElementPath(FixtureTestCase): # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, + "A": elements["A"].id.hex, } ): elements["A"].remove_children() @@ -575,8 +575,8 @@ class TestEditElementPath(FixtureTestCase): # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "X": elements["X"].id, + "A": elements["A"].id.hex, + "X": elements["X"].id.hex, } ): elements["A"].remove_children() @@ -645,8 +645,8 @@ class TestEditElementPath(FixtureTestCase): # remove_children uses transaction.atomic(), and we are running in a unit test, which is already in a transaction. # This will cause a savepoint to be created, with a name that is hard to mock. "savepoint": f"s{_thread.get_ident()}_x{connections['default'].savepoint_state + 1}", - "A": elements["A"].id, - "first_parent": elements["A"].paths.order_by("id").first().path[0], + "A": elements["A"].id.hex, + "first_parent": elements["A"].paths.order_by("id").first().path[0].hex, } ): elements["A"].remove_children() diff --git a/arkindex/documents/tests/test_indexer.py b/arkindex/documents/tests/test_indexer.py index 8fd06cb9e4..2cd29e02e2 100644 --- a/arkindex/documents/tests/test_indexer.py +++ b/arkindex/documents/tests/test_indexer.py @@ -295,14 +295,13 @@ class TestIndexerCommand(FixtureTestCase): indexer = Indexer(self.private_corpus.id) with self.assertExactQueries("indexer_prefetch.sql", params={ - "corpus_id": self.private_corpus.id, - "page_id": self.page.id, - "image_id": self.page.image_id, - "worker_run_id": self.worker_run.id, - "worker_version_id": self.worker_version.id, - "worker_id": self.worker.id, - "transcription_id": tr.id, - "type_id": location_type.id + "corpus_id": self.private_corpus.id.hex, + "page_id": self.page.id.hex, + "worker_run_id": self.worker_run.id.hex, + "worker_version_id": self.worker_version.id.hex, + "worker_id": self.worker.id.hex, + "transcription_id": tr.id.hex, + "type_id": location_type.id.hex, }): indexer.index() self.assertEqual(mock_solr.index.call_count, 1) diff --git a/arkindex/documents/tests/test_path_constraints.py b/arkindex/documents/tests/test_path_constraints.py index 751270a221..0e627cdb87 100644 --- a/arkindex/documents/tests/test_path_constraints.py +++ b/arkindex/documents/tests/test_path_constraints.py @@ -1,5 +1,4 @@ -from django.db import IntegrityError, connections, transaction -from django.db.utils import InternalError +from django.db import IntegrityError, ProgrammingError, connections, transaction from arkindex.documents.models import ElementPath from arkindex.project.tests import FixtureTestCase @@ -55,7 +54,7 @@ class TestPathConstraints(FixtureTestCase): ordering=11111, ) - with self.assertRaisesMessage(InternalError, "Each element may only have one ordering within the same parent"): + with self.assertRaisesMessage(ProgrammingError, "Each element may only have one ordering within the same parent"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -91,7 +90,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the element gets updated. path.save(update_fields=["element"]) - with self.assertRaisesMessage(InternalError, "Each element may only have one ordering within the same parent"): + with self.assertRaisesMessage(ProgrammingError, "Each element may only have one ordering within the same parent"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -127,7 +126,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the path gets updated. path.save(update_fields=["path"]) - with self.assertRaisesMessage(InternalError, "Each element may only have one ordering within the same parent"): + with self.assertRaisesMessage(ProgrammingError, "Each element may only have one ordering within the same parent"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -153,7 +152,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the ordering gets updated. path.save(update_fields=["ordering"]) - with self.assertRaisesMessage(InternalError, "Each element may only have one ordering within the same parent"): + with self.assertRaisesMessage(ProgrammingError, "Each element may only have one ordering within the same parent"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -176,7 +175,7 @@ class TestPathConstraints(FixtureTestCase): ordering=3, ) - with self.assertRaisesMessage(InternalError, "Each element within a parent must have a distinct ordering"): + with self.assertRaisesMessage(ProgrammingError, "Each element within a parent must have a distinct ordering"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -201,7 +200,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the element gets updated. path.save(update_fields=["element"]) - with self.assertRaisesMessage(InternalError, "Each element within a parent must have a distinct ordering"): + with self.assertRaisesMessage(ProgrammingError, "Each element within a parent must have a distinct ordering"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -234,7 +233,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the path gets updated. path.save(update_fields=["path"]) - with self.assertRaisesMessage(InternalError, "Each element within a parent must have a distinct ordering"): + with self.assertRaisesMessage(ProgrammingError, "Each element within a parent must have a distinct ordering"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. @@ -261,7 +260,7 @@ class TestPathConstraints(FixtureTestCase): # We save using update_fields to really ensure only the ordering gets updated. path.save() - with self.assertRaisesMessage(InternalError, "Each element within a parent must have a distinct ordering"): + with self.assertRaisesMessage(ProgrammingError, "Each element within a parent must have a distinct ordering"): # Committing the savepoint would not execute the deferred trigger, and committing the transaction # would mess with the test class and any subsequent unit tests, so the next best thing is to act # like we are about to commit by forcing all constraint checks to run. diff --git a/arkindex/process/tests/test_managers.py b/arkindex/process/tests/test_managers.py index aa5c3c12f9..26f0feada5 100644 --- a/arkindex/process/tests/test_managers.py +++ b/arkindex/process/tests/test_managers.py @@ -105,5 +105,5 @@ class TestManagers(FixtureTestCase): Ensure the in_use method iterates over all related items """ worker_run_id = uuid4() - with self.assertExactQueries("worker_run_in_use.sql", params={"id": worker_run_id}): + with self.assertExactQueries("worker_run_in_use.sql", params={"id": worker_run_id.hex}): self.assertFalse(WorkerRun.objects.filter(id=worker_run_id).in_use()) diff --git a/arkindex/process/tests/test_process_elements.py b/arkindex/process/tests/test_process_elements.py index 0881b4fe1c..e2c60dd541 100644 --- a/arkindex/process/tests/test_process_elements.py +++ b/arkindex/process/tests/test_process_elements.py @@ -304,9 +304,9 @@ class TestProcessElements(FixtureAPITestCase): self.client.force_login(self.superuser) with self.assertExactQueries("process_elements_filter_type.sql", skip=1, params={ "user_id": self.superuser.id, - "process_id": str(self.process.id), - "corpus_id": str(self.private_corpus.id), - "type_id": str(self.folder_type.id), + "process_id": self.process.id.hex, + "corpus_id": self.private_corpus.id.hex, + "type_id": self.folder_type.id.hex, }): response = self.client.get(reverse("api:process-elements-list", kwargs={"pk": self.process.id})) @@ -333,9 +333,9 @@ class TestProcessElements(FixtureAPITestCase): self.client.force_login(self.superuser) with self.assertExactQueries("process_elements_filter_ml_class.sql", skip=1, params={ "user_id": self.superuser.id, - "process_id": str(self.process.id), - "corpus_id": str(self.private_corpus.id), - "ml_class_id": str(self.ml_class.id), + "process_id": self.process.id.hex, + "corpus_id": self.private_corpus.id.hex, + "ml_class_id": self.ml_class.id.hex, }): response = self.client.get(reverse("api:process-elements-list", kwargs={"pk": self.process.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -719,8 +719,8 @@ class TestProcessElements(FixtureAPITestCase): self.process.save() with self.assertExactQueries("process_elements_with_image.sql", skip=1, params={ "user_id": self.superuser.id, - "process_id": str(self.process.id), - "corpus_id": str(self.private_corpus.id), + "process_id": self.process.id.hex, + "corpus_id": self.private_corpus.id.hex, }): response = self.client.get( reverse("api:process-elements-list", kwargs={"pk": self.process.id}), @@ -749,8 +749,8 @@ class TestProcessElements(FixtureAPITestCase): self.client.force_login(self.superuser) with self.assertExactQueries("process_elements_top_level.sql", skip=1, params={ "user_id": self.superuser.id, - "process_id": str(self.process.id), - "corpus_id": str(self.private_corpus.id), + "process_id": self.process.id.hex, + "corpus_id": self.private_corpus.id.hex, }): response = self.client.get( reverse("api:process-elements-list", kwargs={"pk": self.process.id}), diff --git a/arkindex/process/tests/worker_activity/test_bulk_insert.py b/arkindex/process/tests/worker_activity/test_bulk_insert.py index 026292cd54..0e2ac8c808 100644 --- a/arkindex/process/tests/worker_activity/test_bulk_insert.py +++ b/arkindex/process/tests/worker_activity/test_bulk_insert.py @@ -57,9 +57,9 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): elements_qs = Element.objects.filter(type__slug="act", type__corpus_id=self.corpus.id) params = { - "worker_version_id": self.worker_version.id, - "corpus_id": self.corpus.id, - "process_id": self.process.id, + "worker_version_id": self.worker_version.id.hex, + "corpus_id": self.corpus.id.hex, + "process_id": self.process.id.hex, } with self.assertExactQueries("workeractivity_bulk_insert_worker_version_only.sql", params=params): WorkerActivity.objects.bulk_insert( @@ -87,10 +87,10 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): elements_qs = Element.objects.filter(type__slug="act", type__corpus_id=self.corpus.id) params = { - "worker_version_id": self.worker_version.id, - "corpus_id": self.corpus.id, - "process_id": self.process.id, - "configuration_id": self.configuration.id, + "worker_version_id": self.worker_version.id.hex, + "corpus_id": self.corpus.id.hex, + "process_id": self.process.id.hex, + "configuration_id": self.configuration.id.hex, } with self.assertExactQueries("workeractivity_bulk_insert_no_model.sql", params=params): WorkerActivity.objects.bulk_insert( @@ -118,10 +118,10 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): elements_qs = Element.objects.filter(type__slug="act", type__corpus_id=self.corpus.id) params = { - "worker_version_id": self.worker_version.id, - "model_version_id": self.model_version.id, - "corpus_id": self.corpus.id, - "process_id": self.process.id, + "worker_version_id": self.worker_version.id.hex, + "model_version_id": self.model_version.id.hex, + "corpus_id": self.corpus.id.hex, + "process_id": self.process.id.hex, } with self.assertExactQueries("workeractivity_bulk_insert_no_configuration.sql", params=params): WorkerActivity.objects.bulk_insert( @@ -184,11 +184,11 @@ class TestWorkerActivityBulkInsert(FixtureAPITestCase): elements_qs = Element.objects.filter(type__slug="act", type__corpus_id=self.corpus.id) params = { - "worker_version_id": self.worker_version.id, - "configuration_id": self.configuration.id, - "model_version_id": self.model_version.id, - "corpus_id": self.corpus.id, - "process_id": self.process.id, + "worker_version_id": self.worker_version.id.hex, + "configuration_id": self.configuration.id.hex, + "model_version_id": self.model_version.id.hex, + "corpus_id": self.corpus.id.hex, + "process_id": self.process.id.hex, } with self.assertExactQueries("workeractivity_bulk_insert.sql", params=params): WorkerActivity.objects.bulk_insert( diff --git a/arkindex/project/tests/__init__.py b/arkindex/project/tests/__init__.py index f1bc9e90ba..fc326f20e0 100644 --- a/arkindex/project/tests/__init__.py +++ b/arkindex/project/tests/__init__.py @@ -77,7 +77,7 @@ class _AssertExactQueriesContext(CaptureQueriesContext): items = enumerate(self.params) for name, value in items: - actual_sql = actual_sql.replace(value, f"{{{name}}}") + actual_sql = actual_sql.replace(str(value), f"{{{name}}}") self.path.write_text(actual_sql) except OSError as e: diff --git a/arkindex/project/tests/test_gis.py b/arkindex/project/tests/test_gis.py index 1e0dfde315..9d3c7647aa 100644 --- a/arkindex/project/tests/test_gis.py +++ b/arkindex/project/tests/test_gis.py @@ -1,5 +1,5 @@ from django.contrib.gis.geos import LinearRing, LineString, Point -from psycopg2 import Binary +from psycopg.sql import quote from arkindex.documents.models import Element from arkindex.project.gis import ensure_linear_ring @@ -69,8 +69,8 @@ class TestGis(FixtureTestCase): """ polygon = LinearRing((0, 0), (0, 10), (10, 10), (10, 0), (0, 0)) - # psycopg2.Binary provides the encoding from a geometry's Extended Well Known Binary to a PostgreSQL bytea - encoded_polygon = str(Binary(bytes(polygon.ewkb))) + # psycopg.Binary provides the encoding from a geometry's Extended Well Known Binary to a PostgreSQL bytea + encoded_polygon = quote(bytes(polygon.ewkb)) self.assertEqual( str(Element.objects.filter(polygon=polygon).only("id").query), diff --git a/arkindex/sql_validation/add_first_parent.sql b/arkindex/sql_validation/add_first_parent.sql index 296ee48210..31f0ed72bc 100644 --- a/arkindex/sql_validation/add_first_parent.sql +++ b/arkindex/sql_validation/add_first_parent.sql @@ -30,7 +30,7 @@ SELECT EXISTS (SELECT COALESCE(MAX(ordering) + 1, 0) FROM documents_elementpath WHERE path @> ARRAY['{A}'::uuid] - AND path[array_length(path, 1)] = '{A}'::uuid ) ; + AND path[array_length(path, 1)] = '{A}'::uuid ) ; INSERT INTO documents_elementpath (id, element_id, path, ordering) SELECT gen_random_uuid(), @@ -39,16 +39,15 @@ SELECT gen_random_uuid(), 1 FROM documents_elementpath WHERE element_id = '{A}'::uuid - AND path <> ARRAY['{first_parent}'::uuid]; + AND path <> '{{{first_parent}}}'::uuid[]; UPDATE "documents_elementpath" -SET "path" = ARRAY['{first_parent}'::uuid, - '{A}'::uuid]::uuid[], "ordering" = 1 +SET "path" = '{{{first_parent},{A}}}'::uuid[]::uuid[], "ordering" = 1 WHERE ("documents_elementpath"."element_id" = '{B}'::uuid AND "documents_elementpath"."path" = '{{}}'::uuid[]); UPDATE "documents_elementpath" -SET "path" = array_cat(ARRAY['{first_parent}'::uuid, '{A}'::uuid], "documents_elementpath"."path")::uuid[] +SET "path" = array_cat('{{{first_parent},{A}}}'::uuid[], "documents_elementpath"."path")::uuid[] WHERE "documents_elementpath"."path" && (ARRAY['{B}'::uuid])::uuid[]; INSERT INTO documents_elementpath (id, element_id, path, ordering) @@ -58,10 +57,8 @@ SELECT gen_random_uuid(), child_paths.ordering FROM documents_elementpath child_paths, documents_elementpath new_parent_paths -WHERE child_paths.path @> ARRAY['{first_parent}'::uuid, - '{A}'::uuid, - '{B}'::uuid] +WHERE child_paths.path @> '{{{first_parent},{A},{B}}}'::uuid[] AND new_parent_paths.element_id = '{A}'::uuid - AND new_parent_paths.path <> ARRAY['{first_parent}'::uuid]; + AND new_parent_paths.path <> '{{{first_parent}}}'::uuid[]; RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/add_second_parent.sql b/arkindex/sql_validation/add_second_parent.sql index 27375cfede..ad0fb7c23e 100644 --- a/arkindex/sql_validation/add_second_parent.sql +++ b/arkindex/sql_validation/add_second_parent.sql @@ -30,7 +30,7 @@ SELECT EXISTS (SELECT COALESCE(MAX(ordering) + 1, 0) FROM documents_elementpath WHERE path @> ARRAY['{A}'::uuid] - AND path[array_length(path, 1)] = '{A}'::uuid ) ; + AND path[array_length(path, 1)] = '{A}'::uuid ) ; INSERT INTO documents_elementpath (id, element_id, path, ordering) SELECT gen_random_uuid(), @@ -47,8 +47,7 @@ SELECT gen_random_uuid(), child_paths.ordering FROM documents_elementpath child_paths, documents_elementpath new_parent_paths -WHERE child_paths.path @> ARRAY['{K}'::uuid, - '{B}'::uuid] +WHERE child_paths.path @> '{{{K},{B}}}'::uuid[] AND new_parent_paths.element_id = '{A}'::uuid ; RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/element_dot_delete.sql b/arkindex/sql_validation/element_dot_delete.sql index 31722f6a59..c28d23ce6e 100644 --- a/arkindex/sql_validation/element_dot_delete.sql +++ b/arkindex/sql_validation/element_dot_delete.sql @@ -1,59 +1,75 @@ -DELETE FROM documents_transcriptionentity WHERE transcription_id IN ( - SELECT t.id FROM documents_transcription t - LEFT JOIN documents_elementpath elementpath USING (element_id) - WHERE t.element_id = '{id}'::uuid OR elementpath.path && ARRAY['{id}'::uuid] -) ; +DELETE +FROM documents_transcriptionentity +WHERE transcription_id IN + (SELECT t.id + FROM documents_transcription t + LEFT JOIN documents_elementpath elementpath USING (element_id) + WHERE t.element_id = '{id}'::uuid + OR elementpath.path && ARRAY['{id}'::uuid] ) ; -DELETE FROM documents_transcription +DELETE +FROM documents_transcription WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; -DELETE FROM documents_classification +DELETE +FROM documents_classification WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; -DELETE FROM documents_metadata +DELETE +FROM documents_metadata WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] - ) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; -DELETE FROM process_processelement +DELETE +FROM process_processelement WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; UPDATE process_process SET element_id = NULL WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; -DELETE FROM documents_selection selection +DELETE +FROM documents_selection selection WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; -DELETE FROM process_workeractivity +DELETE +FROM process_workeractivity WHERE element_id = '{id}'::uuid -OR element_id IN ( - SELECT element_id FROM documents_elementpath WHERE path && ARRAY['{id}'::uuid] -) ; - -WITH children_ids (id) AS ( - DELETE FROM documents_elementpath - WHERE element_id = '{id}'::uuid OR path && ARRAY['{id}'::uuid] - RETURNING element_id -) -DELETE FROM documents_element element -USING children_ids + OR element_id IN + (SELECT element_id + FROM documents_elementpath + WHERE path && ARRAY['{id}'::uuid] ) ; + +WITH children_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id = '{id}'::uuid + OR path && ARRAY['{id}'::uuid] RETURNING element_id) +DELETE +FROM documents_element element USING children_ids WHERE element.id = children_ids.id ; DELETE diff --git a/arkindex/sql_validation/element_move_with_children.sql b/arkindex/sql_validation/element_move_with_children.sql index 354e208d60..9466ccaeb2 100644 --- a/arkindex/sql_validation/element_move_with_children.sql +++ b/arkindex/sql_validation/element_move_with_children.sql @@ -21,10 +21,8 @@ WHERE element_id = '{source_id}'::uuid ; UPDATE documents_elementpath SET path = path[2:] -WHERE path @> ARRAY['{parent_id}'::uuid, - '{source_id}'::uuid] - AND path[0:2] = ARRAY['{parent_id}'::uuid, - '{source_id}'::uuid] ; +WHERE path @> '{{{parent_id},{source_id}}}'::uuid[] + AND path[0:2] = '{{{parent_id},{source_id}}}'::uuid[] ; UPDATE "documents_elementpath" SET "path" = '{{}}'::uuid[] @@ -65,15 +63,15 @@ SELECT EXISTS (SELECT COALESCE(MAX(ordering) + 1, 0) FROM documents_elementpath WHERE path @> ARRAY['{destination_id}'::uuid] - AND path[array_length(path, 1)] = '{destination_id}'::uuid ) ; + AND path[array_length(path, 1)] = '{destination_id}'::uuid ) ; UPDATE "documents_elementpath" -SET "path" = ARRAY['{destination_id}'::uuid]::uuid[], "ordering" = 3 +SET "path" = '{{{destination_id}}}'::uuid[]::uuid[], "ordering" = 3 WHERE ("documents_elementpath"."element_id" = '{source_id}'::uuid AND "documents_elementpath"."path" = '{{}}'::uuid[]); UPDATE "documents_elementpath" -SET "path" = array_cat(ARRAY['{destination_id}'::uuid], "documents_elementpath"."path")::uuid[] +SET "path" = array_cat('{{{destination_id}}}'::uuid[], "documents_elementpath"."path")::uuid[] WHERE "documents_elementpath"."path" && (ARRAY['{source_id}'::uuid])::uuid[]; RELEASE SAVEPOINT "{savepoints[1]}" diff --git a/arkindex/sql_validation/element_move_without_child.sql b/arkindex/sql_validation/element_move_without_child.sql index 354e208d60..9466ccaeb2 100644 --- a/arkindex/sql_validation/element_move_without_child.sql +++ b/arkindex/sql_validation/element_move_without_child.sql @@ -21,10 +21,8 @@ WHERE element_id = '{source_id}'::uuid ; UPDATE documents_elementpath SET path = path[2:] -WHERE path @> ARRAY['{parent_id}'::uuid, - '{source_id}'::uuid] - AND path[0:2] = ARRAY['{parent_id}'::uuid, - '{source_id}'::uuid] ; +WHERE path @> '{{{parent_id},{source_id}}}'::uuid[] + AND path[0:2] = '{{{parent_id},{source_id}}}'::uuid[] ; UPDATE "documents_elementpath" SET "path" = '{{}}'::uuid[] @@ -65,15 +63,15 @@ SELECT EXISTS (SELECT COALESCE(MAX(ordering) + 1, 0) FROM documents_elementpath WHERE path @> ARRAY['{destination_id}'::uuid] - AND path[array_length(path, 1)] = '{destination_id}'::uuid ) ; + AND path[array_length(path, 1)] = '{destination_id}'::uuid ) ; UPDATE "documents_elementpath" -SET "path" = ARRAY['{destination_id}'::uuid]::uuid[], "ordering" = 3 +SET "path" = '{{{destination_id}}}'::uuid[]::uuid[], "ordering" = 3 WHERE ("documents_elementpath"."element_id" = '{source_id}'::uuid AND "documents_elementpath"."path" = '{{}}'::uuid[]); UPDATE "documents_elementpath" -SET "path" = array_cat(ARRAY['{destination_id}'::uuid], "documents_elementpath"."path")::uuid[] +SET "path" = array_cat('{{{destination_id}}}'::uuid[], "documents_elementpath"."path")::uuid[] WHERE "documents_elementpath"."path" && (ARRAY['{source_id}'::uuid])::uuid[]; RELEASE SAVEPOINT "{savepoints[1]}" diff --git a/arkindex/sql_validation/indexer_prefetch.sql b/arkindex/sql_validation/indexer_prefetch.sql index 74c6e2bc86..aab9525166 100644 --- a/arkindex/sql_validation/indexer_prefetch.sql +++ b/arkindex/sql_validation/indexer_prefetch.sql @@ -5,7 +5,7 @@ SELECT element.id AS parent_id, element.name AS name, elementtype.display_name AS type_name, element.image_id AS image_id, - element.polygon::bytea AS polygon, + element.polygon AS polygon, element.worker_run_id AS worker_run_id FROM documents_element element INNER JOIN documents_elementtype elementtype ON (elementtype.id = element.type_id) @@ -21,7 +21,7 @@ WITH parent AS element.name AS name, elementtype.display_name AS type_name, element.image_id AS image_id, - element.polygon::bytea AS polygon, + element.polygon AS polygon, element.worker_run_id AS worker_run_id FROM documents_element element INNER JOIN documents_elementtype elementtype ON (elementtype.id = element.type_id) @@ -35,7 +35,7 @@ SELECT parent_id, element.name as name, elementtype.display_name as type_name, element.image_id AS image_id, - element.polygon::bytea AS polygon, + element.polygon AS polygon, element.worker_run_id AS worker_run_id FROM (SELECT * diff --git a/arkindex/sql_validation/list_elements.sql b/arkindex/sql_validation/list_elements.sql index 2cca682e6a..546d2c19cb 100644 --- a/arkindex/sql_validation/list_elements.sql +++ b/arkindex/sql_validation/list_elements.sql @@ -27,7 +27,7 @@ SELECT "documents_element"."id", "documents_element"."worker_version_id", "documents_element"."worker_run_id", "documents_element"."image_id", - "documents_element"."polygon"::bytea, + "documents_element"."polygon", "documents_element"."rotation_angle", "documents_element"."mirrored", "documents_element"."confidence", diff --git a/arkindex/sql_validation/process_elements_with_image.sql b/arkindex/sql_validation/process_elements_with_image.sql index c2197f8318..75a1488391 100644 --- a/arkindex/sql_validation/process_elements_with_image.sql +++ b/arkindex/sql_validation/process_elements_with_image.sql @@ -71,7 +71,7 @@ SELECT "documents_element"."id", "documents_element"."image_id", "images_image"."width", "images_image"."height", - "documents_element"."polygon"::bytea, + "documents_element"."polygon", "documents_element"."rotation_angle", "documents_element"."mirrored", NULLIF(CONCAT((RTRIM("images_imageserver"."url", '/'))::text, (CONCAT(('/')::text, ("images_image"."path")::text))::text), '/') AS "image_url" diff --git a/arkindex/sql_validation/remove_child_last_parent.sql b/arkindex/sql_validation/remove_child_last_parent.sql index fa81733d12..c44943c5c5 100644 --- a/arkindex/sql_validation/remove_child_last_parent.sql +++ b/arkindex/sql_validation/remove_child_last_parent.sql @@ -14,12 +14,8 @@ WHERE element_id = '{B}'::uuid ; UPDATE documents_elementpath SET path = path[3:] -WHERE path @> ARRAY['{first_parent}'::uuid, - '{A}'::uuid, - '{B}'::uuid] - AND path[0:3] = ARRAY['{first_parent}'::uuid, - '{A}'::uuid, - '{B}'::uuid] ; +WHERE path @> '{{{first_parent},{A},{B}}}'::uuid[] + AND path[0:3] = '{{{first_parent},{A},{B}}}'::uuid[] ; UPDATE "documents_elementpath" SET "path" = '{{}}'::uuid[] diff --git a/arkindex/sql_validation/remove_children_multiple_parents.sql b/arkindex/sql_validation/remove_children_multiple_parents.sql index c1565c51de..03cfbf7a93 100644 --- a/arkindex/sql_validation/remove_children_multiple_parents.sql +++ b/arkindex/sql_validation/remove_children_multiple_parents.sql @@ -10,7 +10,7 @@ LIMIT 1; DELETE FROM documents_elementpath child_paths USING documents_elementpath parent_paths WHERE parent_paths.element_id = '{A}'::uuid - AND parent_paths.path <> ARRAY['{first_parent}'::uuid] + AND parent_paths.path <> '{{{first_parent}}}'::uuid[] AND child_paths.path @> (parent_paths.path || '{A}'::uuid) ; DELETE @@ -21,9 +21,7 @@ WHERE parent_paths.path && ARRAY['{A}'::uuid] UPDATE documents_elementpath SET path = path[2 + 1:] -WHERE path @> ARRAY['{first_parent}'::uuid, - '{A}'::uuid] - AND path[:2] = ARRAY['{first_parent}'::uuid, - '{A}'::uuid] ; +WHERE path @> '{{{first_parent},{A}}}'::uuid[] + AND path[:2] = '{{{first_parent},{A}}}'::uuid[] ; RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/remove_children_no_parents.sql b/arkindex/sql_validation/remove_children_no_parents.sql index dba5386430..1c939384a7 100644 --- a/arkindex/sql_validation/remove_children_no_parents.sql +++ b/arkindex/sql_validation/remove_children_no_parents.sql @@ -15,7 +15,7 @@ WHERE parent_paths.path && ARRAY['{A}'::uuid] UPDATE documents_elementpath SET path = path[1 + 1:] -WHERE path @> ARRAY['{A}'::uuid] - AND path[:1] = ARRAY['{A}'::uuid] ; +WHERE path @> '{{{A}}}'::uuid[] + AND path[:1] = '{{{A}}}'::uuid[] ; RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/remove_children_single_parent.sql b/arkindex/sql_validation/remove_children_single_parent.sql index 7d86e0d019..e884f1d34f 100644 --- a/arkindex/sql_validation/remove_children_single_parent.sql +++ b/arkindex/sql_validation/remove_children_single_parent.sql @@ -15,9 +15,7 @@ WHERE parent_paths.path && ARRAY['{A}'::uuid] UPDATE documents_elementpath SET path = path[2 + 1:] -WHERE path @> ARRAY['{X}'::uuid, - '{A}'::uuid] - AND path[:2] = ARRAY['{X}'::uuid, - '{A}'::uuid] ; +WHERE path @> '{{{X},{A}}}'::uuid[] + AND path[:2] = '{{{X},{A}}}'::uuid[] ; RELEASE SAVEPOINT "{savepoint}" diff --git a/arkindex/sql_validation/workeractivity_bulk_insert_no_model.sql b/arkindex/sql_validation/workeractivity_bulk_insert_no_model.sql index cd539dd953..461456a39e 100644 --- a/arkindex/sql_validation/workeractivity_bulk_insert_no_model.sql +++ b/arkindex/sql_validation/workeractivity_bulk_insert_no_model.sql @@ -15,7 +15,7 @@ FROM WHERE ("documents_elementtype"."corpus_id" = '{corpus_id}'::uuid AND "documents_elementtype"."slug" = 'act')) AS elt ON CONFLICT (worker_version_id, element_id, - configuration_id) + configuration_id) WHERE configuration_id IS NOT NULL AND model_version_id IS NULL DO UPDATE diff --git a/arkindex/training/api.py b/arkindex/training/api.py index b23502c88f..fd56c44f0a 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -69,9 +69,16 @@ def _fetch_datasetelement_neighbors(datasetelements): """ if not datasetelements: return datasetelements + + # psycopg 3 does not support `IN %s`, so we use a variable amount of placeholders for each ID instead. + # https://www.psycopg.org/psycopg3/docs/basic/from_pg2.html#you-cannot-use-in-s-with-a-tuple + # We use named placeholders with list indices so that we can refer to the same ID twice, + # because we are filtering on DatasetElement IDs twice + placeholders = ",".join(f"%({i})s" for i in range(len(datasetelements))) + with connection.cursor() as cursor: cursor.execute( - """ + f""" WITH neighbors AS ( SELECT n.id, @@ -89,7 +96,7 @@ def _fetch_datasetelement_neighbors(datasetelements): WHERE set_id IN ( SELECT set_id FROM training_datasetelement - WHERE id IN %(ids)s + WHERE id IN ({placeholders}) ) ORDER BY n.element_id ) @@ -97,9 +104,9 @@ def _fetch_datasetelement_neighbors(datasetelements): neighbors.id, neighbors.previous, neighbors.next FROM neighbors - WHERE neighbors.id in %(ids)s + WHERE neighbors.id in ({placeholders}) """, - {"ids": tuple(datasetelement.id for datasetelement in datasetelements)} + {str(i): datasetelement.id for i, datasetelement in enumerate(datasetelements)}, ) neighbors = { diff --git a/base/requirements.txt b/base/requirements.txt index 3d48b30e39..1ff54d70b0 100644 --- a/base/requirements.txt +++ b/base/requirements.txt @@ -3,4 +3,3 @@ cryptography==3.4.7 Django==5.0.8 ed25519==1.5 lxml==4.9.2 -psycopg2-binary==2.9.1 diff --git a/requirements.txt b/requirements.txt index fdfda8d85c..811e308e35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ djangorestframework==3.15.2 djangorestframework-simplejwt==5.2.2 docker==7.0.0 drf-spectacular==0.27.2 +psycopg[binary]==3.2.4 python-magic==0.4.27 python-memcached==1.59 PyYAML==6.0 diff --git a/ruff.toml b/ruff.toml index 8a02c65695..453bb5b5c9 100644 --- a/ruff.toml +++ b/ruff.toml @@ -45,7 +45,7 @@ known-third-party = [ "drf_spectacular", "enumfields", "gitlab", - "psycopg2", + "psycopg", "requests", "responses", "rest_framework", -- GitLab