diff --git a/.isort.cfg b/.isort.cfg index a33fb368115d5f8730f3c1a8d1eeffdd9f848a7c..255d5f5fe3a1c46d3dab4df8b8b25b1cf87391af 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -8,4 +8,4 @@ line_length = 120 default_section=FIRSTPARTY known_first_party = arkindex_common,ponos,transkribus -known_third_party = boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,setuptools,tenacity,tripoli,yaml +known_third_party = boto3,botocore,corsheaders,django,django_admin_hstore_widget,django_rq,elasticsearch,elasticsearch_dsl,enumfields,gitlab,psycopg2,requests,responses,rest_framework,setuptools,sqlparse,tenacity,tripoli,yaml diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 6beb2e39ecea6b6bc60fb0ffdecbeeb99af733ce..a9d7b0c3b5713cf0bb5c65cc8a636c332e3e82da 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -128,7 +128,7 @@ class TestDeleteCorpus(FixtureTestCase): self.corpus.repository = self.repo self.corpus.save() - with self.assertNumQueries(24): + with self.assertExactQueries('corpus_delete.sql', params={'corpus_id': self.corpus.id}): corpus_delete(self.corpus.id) # Ensure the command restores the signal receivers diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 7e13be7000de0b8dfbd37f48e9704623b4b0ba65..30621b7971813bed9f9f6ccb1baa00fcff587708 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -459,6 +459,8 @@ FRONTEND_SENTRY_DSN = conf['sentry']['frontend_dsn'] # You can override its value through local_settings.py file by assigning it 'http://YOURDOMAIN.pagekite.me' BACKEND_PUBLIC_URL_OAUTH = None +SQL_VALIDATION_DIR = BASE_DIR / 'sql_validation' + if TEST_ENV: # Overrides for unit tests AWS_ACCESS_KEY = 'test' diff --git a/arkindex/project/tests/__init__.py b/arkindex/project/tests/__init__.py index 4f676e1da83c1e9c19c5626f5787b3847289fb23..237513bb36dce8e334945563bd228560d4a83826 100644 --- a/arkindex/project/tests/__init__.py +++ b/arkindex/project/tests/__init__.py @@ -1,4 +1,11 @@ +from pathlib import Path +from typing import Any, Callable, Iterable, Mapping, Optional, Union + +import sqlparse +from django.conf import settings +from django.db import DEFAULT_DB_ALIAS, connections from django.test import TestCase +from django.test.utils import CaptureQueriesContext from django.utils.functional import cached_property from rest_framework.test import APITestCase @@ -7,6 +14,60 @@ from arkindex.images.models import ImageServer from arkindex.users.models import User +class _AssertExactQueriesContext(CaptureQueriesContext): + """ + Context manager for assertExactQueries. + The implementation is inspired by assertNumQueries's own implementation. + """ + + def __init__(self, test_case, path, params, connection): + self.test_case = test_case + self.path = settings.SQL_VALIDATION_DIR / Path(path) + self.params = params + super().__init__(connection) + + def __exit__(self, exc_type, exc_value, traceback): + super().__exit__(exc_type, exc_value, traceback) + if exc_type is not None: + return + + # Django's logged queries are each on a single line without semicolons + # so we lint them and assemble them to build a more readable diff for humans. + actual_sql = sqlparse.format( + ';'.join(query['sql'] for query in self), + reindent=True, + use_space_around_operators=True, + indent_width=4, + ) + + # When the file does not exist, try to create it with the current queries. + # This avoids having to write all of the queries by hand when adding assertExactQueries for the first time. + if not self.path.exists(): + try: + self.path.write_text(actual_sql) + except IOError as e: + raise AssertionError( + f'Could not assert on SQL queries; the file at {self.path} did not exist. ' + 'A new file could not be created with the current SQL queries.' + ) from e + else: + raise AssertionError( + f'Could not assert on SQL queries; the file at {self.path} did not exist. ' + 'A file has been created with the current SQL queries, please check it manually.' + ) + + expected_sql = self.path.read_text().strip() + # Apply str.format on the SQL when needed to handle variable IDs + if self.params: + if isinstance(self.params, Mapping): + expected_sql = expected_sql.format(**self.params) + else: + expected_sql = expected_sql.format(*self.params) + + # Use the base TestCase's assertions, since they can already handle displaying a diff. + self.test_case.assertEqual(expected_sql, actual_sql) + + class FixtureMixin(object): """ Add the database fixture to a test case @@ -25,6 +86,47 @@ class FixtureMixin(object): def internal_user(self): return User.objects.get(email='internal@internal.fr') + def assertExactQueries( + self, + path: Union[Path, str], + func: Optional[Callable] = None, + *args, + using: str = DEFAULT_DB_ALIAS, + params: Union[Iterable, Mapping[str, Any]] = [], + **kwargs) -> Optional[_AssertExactQueriesContext]: + """ + Assert that a function call causes exactly the SQL queries specified in a given file. + When used as a context manager, asserts than any queries occurring while in the context match the given file. + This function supports all of Django's `assertNumQueries` syntax: + + >>> self.assertExactQueries('path/to/file.sql', do_something) + >>> with self.assertExactQueries('path/to/file.sql'): + ... do_something() + + Paths will be relative to the SQL_VALIDATION_DIR setting. + SQL queries can often hold IDs or other dynamic values that depend on the test context. + To avoid having to update IDs or rewrite the fixture file every time, you can use the `params` argument + and give it anything supported by `str.format`; either an iterable or a mapping. For example: + + ``` + SELECT "documents_element"."name" FROM "documents_element" WHERE "documents_element"."id" = '{id}'::uuid; + ``` + + >>> with self.assertExactQueries('path/to/file.sql', params={'id': element_id}): + ... Element.objects.filter(id=element_id).values('name') + + To make first runs easier, when the expect SQL file does not exist, assertExactQueries will try to write + the current SQL queries to it and warn you using an `AssertionError`. Make sure to check this new SQL file + as subsequent runs of unit tests will no longer fail. + """ + context = _AssertExactQueriesContext(self, path, params, connections[using]) + + if func is None: + return context + + with context: + func(*args, **kwargs) + class FixtureTestCase(FixtureMixin, TestCase): """ diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql new file mode 100644 index 0000000000000000000000000000000000000000..8f0dd3e31f82fdc4a5cec25de38f1d2691b251b0 --- /dev/null +++ b/arkindex/sql_validation/corpus_delete.sql @@ -0,0 +1,155 @@ +SELECT "documents_corpus"."created", + "documents_corpus"."updated", + "documents_corpus"."id", + "documents_corpus"."name", + "documents_corpus"."description", + "documents_corpus"."repository_id", + "documents_corpus"."public" +FROM "documents_corpus" +WHERE "documents_corpus"."id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid +LIMIT 21; + +DELETE +FROM "documents_allowedmetadata" +WHERE "documents_allowedmetadata"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "dataimport_workerrun" +WHERE "dataimport_workerrun"."id" IN + (SELECT U0."id" + FROM "dataimport_workerrun" U0 + INNER JOIN "dataimport_dataimport" U1 ON (U0."dataimport_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "dataimport_dataimportelement" +WHERE "dataimport_dataimportelement"."id" IN + (SELECT U0."id" + FROM "dataimport_dataimportelement" U0 + INNER JOIN "dataimport_dataimport" U1 ON (U0."dataimport_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "dataimport_dataimportelement" +WHERE "dataimport_dataimportelement"."id" IN + (SELECT U0."id" + FROM "dataimport_dataimportelement" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "dataimport_dataimport_files" +WHERE "dataimport_dataimport_files"."id" IN + (SELECT U0."id" + FROM "dataimport_dataimport_files" U0 + INNER JOIN "dataimport_dataimport" U1 ON (U0."dataimport_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "dataimport_dataimport_files" +WHERE "dataimport_dataimport_files"."id" IN + (SELECT U0."id" + FROM "dataimport_dataimport_files" U0 + INNER JOIN "dataimport_datafile" U1 ON (U0."datafile_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "dataimport_dataimport" +WHERE "dataimport_dataimport"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "dataimport_datafile" +WHERE "dataimport_datafile"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_entitylink" +WHERE "documents_entitylink"."id" IN + (SELECT U0."id" + FROM "documents_entitylink" U0 + INNER JOIN "documents_entityrole" U1 ON (U0."role_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_entityrole" +WHERE "documents_entityrole"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_entity" U1 ON (U0."entity_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + WHERE U2."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_entity" +WHERE "documents_entity"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_mlclass" +WHERE "documents_mlclass"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_elementpath" +WHERE "documents_elementpath"."id" IN + (SELECT U0."id" + FROM "documents_elementpath" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."id" IN + (SELECT U0."id" + FROM "documents_selection" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE U1."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid); + +DELETE +FROM "documents_element" +WHERE "documents_element"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_elementtype" +WHERE "documents_elementtype"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "users_corpusright" +WHERE "users_corpusright"."corpus_id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid; + +DELETE +FROM "documents_corpus" +WHERE "documents_corpus"."id" = '78d45b37-36d1-4576-8bb2-8e5844f31feb'::uuid \ No newline at end of file