From e630651bf96b5cee64b2e77336cde192cd899409 Mon Sep 17 00:00:00 2001 From: ml bonhomme <bonhomme@teklia.com> Date: Tue, 20 Feb 2024 08:46:11 +0000 Subject: [PATCH] Support worker_run_id in DestroyWorkerResults --- arkindex/documents/api/elements.py | 98 ++-- arkindex/documents/tasks.py | 27 +- .../test_selection_worker_results_delete.py | 61 ++- .../tests/tasks/test_worker_results_delete.py | 77 ++- .../tests/test_destroy_worker_results.py | 140 +++++- arkindex/project/triggers.py | 59 ++- ...er_results_delete_in_corpus_worker_run.sql | 144 ++++++ ...elete_under_parent_included_worker_run.sql | 472 ++++++++++++++++++ ...results_delete_under_parent_worker_run.sql | 337 +++++++++++++ 9 files changed, 1357 insertions(+), 58 deletions(-) create mode 100644 arkindex/sql_validation/worker_results_delete_in_corpus_worker_run.sql create mode 100644 arkindex/sql_validation/worker_results_delete_under_parent_included_worker_run.sql create mode 100644 arkindex/sql_validation/worker_results_delete_under_parent_worker_run.sql diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 8178827464..6b5f1925ba 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -2193,6 +2193,14 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView): delete=extend_schema( operation_id="DestroyWorkerResults", parameters=[ + OpenApiParameter( + "worker_run_id", + type=UUID, + required=False, + description="Only delete Worker Results produced by a specific worker run. " + "If this parameter is set, any `worker_version_id`, `model_version_id` " + "or `configuration_id` parameters will be ignored.", + ), OpenApiParameter( "worker_version_id", type=UUID, @@ -2233,38 +2241,30 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView): ) class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): """ - Delete all Worker Results from all WorkerVersions or a specific one - on a Corpus or under a specified parent element (parent element included) + Delete all Worker Results, or Worker Results produced by specific WorkerRuns or WorkerVersions + (the results to delete can also be filtered by ModelVersion and Configuration) + on a Corpus, the selection, or under a specified parent element (parent element included) """ permission_classes = (IsVerified, ) - # https://github.com/tfranzel/drf-spectacular/issues/308 - @extend_schema(responses={204: None}) - def delete(self, request, *args, **kwargs): - corpus = self.get_corpus(self.kwargs["corpus"], role=Role.Admin) - + def results_filters(self): errors = defaultdict(list) - use_selection = self.request.query_params.get("use_selection", "false").lower() not in ("false", "0") - if use_selection: - # Only check for selected elements if the selection feature is enabled - if settings.ARKINDEX_FEATURES["selection"]: - if "element_id" in self.request.query_params: - errors["use_selection"].append("use_selection and element_id cannot be used simultaneously.") - if not self.request.user.selected_elements.filter(corpus=corpus).exists(): - errors["use_selection"].append("No elements of the specified corpus have been selected.") - else: - errors["use_selection"].append("Selection is not available on this instance.") - - element_id = None - if "element_id" in self.request.query_params: + if "worker_run_id" in self.request.query_params: try: - element_id = UUID(self.request.query_params["element_id"]) + worker_run_id = UUID(self.request.query_params["worker_run_id"]) except (TypeError, ValueError): - errors["element_id"].append("Invalid UUID.") + raise ValidationError({"worker_run_id": ["Invalid UUID."]}) else: - if not corpus.elements.filter(id=element_id).exists(): - errors["element_id"].append("This element does not exist in the specified corpus.") + try: + worker_run = WorkerRun.objects.get(id=worker_run_id) + except WorkerRun.DoesNotExist: + raise ValidationError({"worker_run_id": ["This worker run does not exist."]}) + + # Ignore the other parameters when a worker run ID is set + return { + "worker_run": worker_run, + } worker_version = None if "worker_version_id" in self.request.query_params: @@ -2314,22 +2314,60 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): if errors: raise ValidationError(errors) + return { + "version": worker_version, + "model_version": model_version, + "configuration": configuration + } + + # https://github.com/tfranzel/drf-spectacular/issues/308 + @extend_schema(responses={204: None}) + def delete(self, request, *args, **kwargs): + corpus = self.get_corpus(self.kwargs["corpus"], role=Role.Admin) + + errors = defaultdict(list) + + use_selection = self.request.query_params.get("use_selection", "false").lower() not in ("false", "0") + if use_selection: + # Only check for selected elements if the selection feature is enabled + if settings.ARKINDEX_FEATURES["selection"]: + if "element_id" in self.request.query_params: + errors["use_selection"].append("use_selection and element_id cannot be used simultaneously.") + if not self.request.user.selected_elements.filter(corpus=corpus).exists(): + errors["use_selection"].append("No elements of the specified corpus have been selected.") + else: + errors["use_selection"].append("Selection is not available on this instance.") + + element_id = None + if "element_id" in self.request.query_params: + try: + element_id = UUID(self.request.query_params["element_id"]) + except (TypeError, ValueError): + errors["element_id"].append("Invalid UUID.") + else: + if not corpus.elements.filter(id=element_id).exists(): + errors["element_id"].append("This element does not exist in the specified corpus.") + + try: + filters = self.results_filters() + except ValidationError as errs: + errors = errors | errs.detail + + if errors: + raise ValidationError(errors) + if use_selection: selection_worker_results_delete( corpus=corpus, - version=worker_version, - model_version=model_version, - configuration=configuration, user_id=self.request.user.id, + **filters ) else: worker_results_delete( corpus_id=corpus.id, - version=worker_version, element_id=element_id, - model_version=model_version, - configuration=configuration, user_id=self.request.user.id, + **filters ) return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index fcf517c06b..7e0421e4e4 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -104,6 +104,7 @@ def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None: @job("high", timeout=settings.RQ_TIMEOUTS["worker_results_delete"]) def selection_worker_results_delete( corpus_id: str, + worker_run_id: Optional[str] = None, model_version_id: Optional[str] = None, configuration_id: Optional[str | Literal[False]] = None, version_id: Optional[str] = None, @@ -123,6 +124,7 @@ def selection_worker_results_delete( worker_results_delete( corpus_id=corpus_id, element_id=element_id, + worker_run_id=worker_run_id, version_id=version_id, model_version_id=model_version_id, configuration_id=configuration_id, @@ -132,6 +134,7 @@ def selection_worker_results_delete( @job("high", timeout=settings.RQ_TIMEOUTS["worker_results_delete"]) def worker_results_delete( corpus_id: str, + worker_run_id: Optional[str] = None, version_id: Optional[str] = None, element_id: Optional[str] = None, model_version_id: Optional[str] = None, @@ -142,6 +145,8 @@ def worker_results_delete( whole corpus, under a specified parent element (parent element included), or on a single element. Results can be filtered depending on a specific model version and a specific or unset configuration. """ + assert (not worker_run_id or not version_id), "The worker_run_id and version_id parameters are mutually exclusive." + elements = Element.objects.filter(corpus_id=corpus_id) classifications = Classification.objects.filter(element__corpus_id=corpus_id) transcriptions = Transcription.objects.filter(element__corpus_id=corpus_id) @@ -155,8 +160,19 @@ def worker_results_delete( metadata = MetaData.objects.filter(element__corpus_id=corpus_id) worker_activities = WorkerActivity.objects.filter(element__corpus_id=corpus_id) + # When a worker run ID is defined, filter by that worker run ID + if worker_run_id: + elements = elements.filter(worker_run_id=worker_run_id) + classifications = classifications.filter(worker_run_id=worker_run_id) + transcriptions = transcriptions.filter(worker_run_id=worker_run_id) + transcription_entities = transcription_entities.filter(transcription__worker_run_id=worker_run_id) + worker_transcription_entities = worker_transcription_entities.filter(worker_run_id=worker_run_id) + metadata = metadata.filter(worker_run_id=worker_run_id) + # There is no worker_run_id on Worker Activities so the best thing we can do is delete the worker activities + # attached to the elements produced with that worker run, and they are already being deleted by elements.trash() + worker_activities = worker_activities.none() # When a version ID is defined, filter by the exact version ID - if version_id: + elif version_id: elements = elements.filter(worker_version_id=version_id) classifications = classifications.filter(worker_version_id=version_id) transcriptions = transcriptions.filter(worker_version_id=version_id) @@ -164,7 +180,9 @@ def worker_results_delete( worker_transcription_entities = worker_transcription_entities.filter(worker_version_id=version_id) metadata = metadata.filter(worker_version_id=version_id) worker_activities = worker_activities.filter(worker_version_id=version_id) - # Otherwise, select everything that has any worker version ID. + # Otherwise, select everything that has any worker version ID. (When something has been created + # by a worker run, it always has a worker version; however we have things that were created with + # a worker version but without a worker run.) # We use worker_version_id != None and not worker_version_id__isnull=False, # because isnull would cause an unnecessary LEFT JOIN query. else: @@ -202,7 +220,7 @@ def worker_results_delete( metadata = metadata.filter(element_id=element_id) worker_activities = worker_activities.filter(element_id=element_id) - if model_version_id: + if not worker_run_id and model_version_id: elements = elements.filter(worker_run__model_version_id=model_version_id) classifications = classifications.filter(worker_run__model_version_id=model_version_id) transcriptions = transcriptions.filter(worker_run__model_version_id=model_version_id) @@ -212,7 +230,7 @@ def worker_results_delete( # Activities are not linked to a worker run and cannot be filtered by model version worker_activities = worker_activities.none() - if configuration_id is not None: + if not worker_run_id and configuration_id is not None: if configuration_id is False: # Only delete results generated on a worker run with no configuration elements = elements.filter(worker_run__configuration_id=None) @@ -247,6 +265,7 @@ def worker_results_delete( # we were supposed to delete worker results on. worker_results_delete( corpus_id=corpus_id, + worker_run_id=worker_run_id, version_id=version_id, element_id=element_id, model_version_id=model_version_id, diff --git a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py index 420b4c33c4..1f25a1aa07 100644 --- a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py @@ -4,7 +4,7 @@ from django.db import connections from django.db.utils import IntegrityError from arkindex.documents.tasks import selection_worker_results_delete -from arkindex.process.models import Worker, WorkerVersion +from arkindex.process.models import Worker, WorkerRun, WorkerVersion from arkindex.project.tests import FixtureTestCase from arkindex.training.models import Dataset, Model, ModelVersionState @@ -18,6 +18,7 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): cls.page2 = cls.corpus.elements.get(name="Volume 1, page 1v") cls.page3 = cls.corpus.elements.get(name="Volume 1, page 2r") cls.version = WorkerVersion.objects.first() + cls.worker_run = WorkerRun.objects.first() cls.model = Model.objects.create(name="Generic model", public=False) cls.model_version = cls.model.versions.create( state=ModelVersionState.Available, @@ -63,6 +64,7 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): self.assertCountEqual(worker_results_delete_mock.call_args_list, [ call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=None, model_version_id=None, configuration_id=None, @@ -70,6 +72,7 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): ), call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=None, model_version_id=None, configuration_id=None, @@ -102,6 +105,7 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): self.assertCountEqual(worker_results_delete_mock.call_args_list, [ call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=self.version.id, model_version_id=self.model_version.id, configuration_id=self.configuration.id, @@ -109,6 +113,7 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): ), call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=self.version.id, model_version_id=self.model_version.id, configuration_id=self.configuration.id, @@ -116,6 +121,60 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): ), ]) + @patch("arkindex.documents.tasks.get_current_job") + def test_run_worker_run_or_version(self, job_mock): + self.user.selected_elements.set([self.page1, self.page2]) + self.superuser.selected_elements.set([self.page3]) + job_mock.return_value.user_id = self.user.id + + with self.assertRaisesMessage(AssertionError, "The worker_run_id and version_id parameters are mutually exclusive."): + selection_worker_results_delete( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run.id, + version_id=self.version.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, + ) + + @patch("arkindex.documents.tasks.get_current_job") + @patch("arkindex.documents.tasks.worker_results_delete") + def test_run_worker_run_filter(self, worker_results_delete_mock, job_mock): + self.user.selected_elements.set([self.page1, self.page2]) + self.superuser.selected_elements.set([self.page3]) + job_mock.return_value.user_id = self.user.id + + selection_worker_results_delete( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run.id + ) + + self.assertEqual(job_mock.call_count, 1) + self.assertEqual(job_mock().set_progress.call_count, 2) + self.assertListEqual(job_mock().set_progress.call_args_list, [ + call(.0), + call(.5), + ]) + + self.assertEqual(worker_results_delete_mock.call_count, 2) + self.assertCountEqual(worker_results_delete_mock.call_args_list, [ + call( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run.id, + version_id=None, + model_version_id=None, + configuration_id=None, + element_id=self.page1.id, + ), + call( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run.id, + version_id=None, + model_version_id=None, + configuration_id=None, + element_id=self.page2.id, + ), + ]) + @patch("arkindex.documents.tasks.get_current_job") def test_run_dataset_failure(self, job_mock): """ diff --git a/arkindex/documents/tests/tasks/test_worker_results_delete.py b/arkindex/documents/tests/tasks/test_worker_results_delete.py index 4c71e41296..0fb898cf35 100644 --- a/arkindex/documents/tests/tasks/test_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_worker_results_delete.py @@ -157,7 +157,7 @@ class TestDeleteWorkerResults(FixtureTestCase): "version_id": str(self.version_1.id), "element_id": str(self.page1.id), }): - worker_results_delete(self.corpus.id, self.version_1.id, self.page1.id) + worker_results_delete(corpus_id=self.corpus.id, version_id=self.version_1.id, element_id=self.page1.id) self.check_deleted( self.classification2, self.transcription1, @@ -173,7 +173,7 @@ class TestDeleteWorkerResults(FixtureTestCase): "version_id": str(self.version_1.id), "element_id": str(self.page2.id), }): - worker_results_delete(self.corpus.id, self.version_1.id, self.page2.id) + worker_results_delete(corpus_id=self.corpus.id, version_id=self.version_1.id, element_id=self.page2.id) self.check_deleted( self.classification3, self.transcription2, @@ -278,3 +278,76 @@ class TestDeleteWorkerResults(FixtureTestCase): # https://code.djangoproject.com/ticket/11665 with self.assertRaises(IntegrityError): connections["default"].check_constraints() + + def test_run_worker_run_or_version(self): + with self.assertRaisesMessage(AssertionError, "The worker_run_id and version_id parameters are mutually exclusive."): + worker_results_delete( + corpus_id=self.corpus.id, + version_id=self.version_1.id, + worker_run_id=self.worker_run_1.id + ) + + def test_run_worker_run_on_corpus(self): + with self.assertExactQueries("worker_results_delete_in_corpus_worker_run.sql", params={ + "corpus_id": str(self.corpus.id), + "worker_run_id": str(self.worker_run_1.id), + }): + worker_results_delete( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run_1.id, + ) + self.check_deleted( + self.classification1, + self.classification2, + self.classification3 + ) + + def test_run_worker_run_on_parent(self): + with self.assertExactQueries("worker_results_delete_under_parent_worker_run.sql", params={ + "corpus_id": str(self.corpus.id), + "worker_run_id": str(self.worker_run_2.id), + "element_id": str(self.page1.id), + }): + worker_results_delete(corpus_id=self.corpus.id, worker_run_id=self.worker_run_2.id, element_id=self.page1.id) + self.check_deleted( + self.transcription1, + self.transcription_entity1, + ) + + def test_run_worker_run_on_parent_delete_element(self): + """ + The element itself is deleted after its related results from the same worker run + """ + self.page1.worker_run = self.worker_run_2 + self.page1.worker_version = self.version_2 + self.page1.save() + with self.assertExactQueries("worker_results_delete_under_parent_included_worker_run.sql", params={ + "corpus_id": str(self.corpus.id), + "worker_run_id": str(self.worker_run_2.id), + "element_id": str(self.page1.id), + }): + worker_results_delete(corpus_id=self.corpus.id, worker_run_id=self.worker_run_2.id, element_id=self.page1.id) + self.check_deleted( + self.transcription1, + self.transcription_entity1, + self.page1, + # self.classifications2 is deleted as well since it's on self.page1 + self.classification2 + ) + + def test_run_worker_run_ignore_filters(self): + with self.assertExactQueries("worker_results_delete_in_corpus_worker_run.sql", params={ + "corpus_id": str(self.corpus.id), + "worker_run_id": str(self.worker_run_1.id) + }): + worker_results_delete( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run_1.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id + ) + self.check_deleted( + self.classification1, + self.classification2, + self.classification3 + ) diff --git a/arkindex/documents/tests/test_destroy_worker_results.py b/arkindex/documents/tests/test_destroy_worker_results.py index 1f590c4c14..2aedc94152 100644 --- a/arkindex/documents/tests/test_destroy_worker_results.py +++ b/arkindex/documents/tests/test_destroy_worker_results.py @@ -5,7 +5,7 @@ from django.urls import reverse from rest_framework import status from arkindex.documents.models import Corpus -from arkindex.process.models import Worker, WorkerVersion +from arkindex.process.models import Worker, WorkerRun, WorkerVersion from arkindex.project.tests import FixtureAPITestCase from arkindex.training.models import Model, ModelVersionState @@ -16,6 +16,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): def setUpTestData(cls): super().setUpTestData() cls.version = WorkerVersion.objects.get(worker__slug="reco") + cls.worker_run = WorkerRun.objects.get(version=cls.version) cls.page = cls.corpus.elements.get(name="Volume 1, page 2r") cls.private_corpus = Corpus.objects.create(name="private", public=False) cls.model = Model.objects.create(name="Generic model", public=False) @@ -72,6 +73,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=None, element_id=None, model_version_id=None, @@ -93,6 +95,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, + worker_run_id=None, element_id=None, version_id=self.version.id, model_version_id=None, @@ -101,6 +104,59 @@ class TestDestroyWorkerResults(FixtureAPITestCase): description=f"Deletion of worker results produced by {self.version}", )) + @patch("arkindex.project.triggers.documents_tasks.worker_results_delete.delay") + def test_filter_worker_run_ignore_filters(self, delay_mock): + """ + When worker_run_id is passed, worker_version_id, model_version_id and configuration_id + are ignored + """ + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + ( + f"?worker_version_id={self.version.id}" + f"&worker_run_id={self.worker_run.id}" + f"&model_version_id={self.model_version.id}" + f"&configuration_id=false" + ) + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + version_id=None, + element_id=None, + worker_run_id=self.worker_run.id, + model_version_id=None, + configuration_id=None, + user_id=self.user.id, + description=f"Deletion of worker results produced by {self.worker_run.summary}", + )) + + @patch("arkindex.project.triggers.documents_tasks.worker_results_delete.delay") + def test_filter_worker_run(self, delay_mock): + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + f"?worker_run_id={self.worker_run.id}", + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + version_id=None, + element_id=None, + worker_run_id=self.worker_run.id, + model_version_id=None, + configuration_id=None, + user_id=self.user.id, + description=f"Deletion of worker results produced by {self.worker_run.summary}", + )) + @patch("arkindex.project.triggers.documents_tasks.worker_results_delete.delay") def test_filter_element(self, delay_mock): self.client.force_login(self.user) @@ -115,6 +171,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, element_id=self.page.id, + worker_run_id=None, version_id=None, model_version_id=None, configuration_id=None, @@ -122,6 +179,31 @@ class TestDestroyWorkerResults(FixtureAPITestCase): description="Deletion of worker results", )) + @patch("arkindex.project.triggers.documents_tasks.worker_results_delete.delay") + def test_filter_element_worker_run(self, delay_mock): + self.client.force_login(self.user) + with self.assertNumQueries(8): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + ( + f"?element_id={self.page.id}" + f"&worker_run_id={self.worker_run.id}" + ) + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + element_id=self.page.id, + worker_run_id=self.worker_run.id, + version_id=None, + model_version_id=None, + configuration_id=None, + user_id=self.user.id, + description=f"Deletion of worker results produced by {self.worker_run.summary}", + )) + @patch("arkindex.project.triggers.documents_tasks.worker_results_delete.delay") def test_filter_unset_configuration(self, delay_mock): self.client.force_login(self.user) @@ -136,6 +218,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, element_id=None, + worker_run_id=None, version_id=None, model_version_id=None, configuration_id=False, @@ -157,6 +240,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, element_id=None, + worker_run_id=None, version_id=None, model_version_id=self.model_version.id, configuration_id=None, @@ -183,6 +267,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, element_id=self.page.id, + worker_run_id=None, version_id=self.version.id, model_version_id=self.model_version.id, configuration_id=self.configuration.id, @@ -221,6 +306,32 @@ class TestDestroyWorkerResults(FixtureAPITestCase): {"worker_version_id": ["This worker version does not exist."]} ) + def test_invalid_worker_run_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + "?worker_run_id=lol" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {"worker_run_id": ["Invalid UUID."]}, + ) + + def test_wrong_worker_run_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + "?worker_run_id=12341234-1234-1234-1234-123412341234" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {"worker_run_id": ["This worker run does not exist."]} + ) + def test_invalid_element_id(self): self.client.force_login(self.user) with self.assertNumQueries(6): @@ -377,6 +488,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=None, user_id=self.user.id, model_version_id=None, @@ -400,6 +512,7 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, + worker_run_id=None, version_id=self.version.id, user_id=self.user.id, model_version_id=None, @@ -407,3 +520,28 @@ class TestDestroyWorkerResults(FixtureAPITestCase): description=f"Deletion of worker results on selected elements in {self.corpus.name} " f"produced by {self.version}" )) + + @override_settings(ARKINDEX_FEATURES={"selection": True}) + @patch("arkindex.project.triggers.documents_tasks.selection_worker_results_delete.delay") + def test_selection_worker_run_filter(self, delay_mock): + self.user.selected_elements.add(self.page) + self.client.force_login(self.user) + + with self.assertNumQueries(8): + response = self.client.delete( + reverse("api:worker-delete-results", kwargs={"corpus": str(self.corpus.id)}) + + f"?use_selection=true&worker_run_id={self.worker_run.id}" + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + worker_run_id=self.worker_run.id, + version_id=None, + user_id=self.user.id, + model_version_id=None, + configuration_id=None, + description=f"Deletion of worker results on selected elements in {self.corpus.name} " + f"produced by {self.worker_run.summary}" + )) diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index 08eb366194..46eee4a05a 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -13,7 +13,14 @@ from arkindex.documents.models import Corpus, CorpusExport, Element from arkindex.ponos import tasks as ponos_tasks from arkindex.ponos.models import State, Task from arkindex.process import tasks as process_tasks -from arkindex.process.models import Process, ProcessMode, WorkerActivityState, WorkerConfiguration, WorkerVersion +from arkindex.process.models import ( + Process, + ProcessMode, + WorkerActivityState, + WorkerConfiguration, + WorkerRun, + WorkerVersion, +) from arkindex.training.models import ModelVersion @@ -48,28 +55,34 @@ def element_trash(queryset: ElementQuerySet, def worker_results_delete(corpus_id: UUID, + worker_run: Optional[WorkerRun] = None, version: Optional[WorkerVersion] = None, element_id: Optional[UUID] = None, model_version: Optional[ModelVersion] = None, configuration: Optional[WorkerConfiguration | Literal[False]] = None, user_id: Optional[int] = None) -> None: """ - Delete all Worker Results produced by a specific WorkerVersion on a whole corpus or under + Delete all Worker Results produced by a specific WorkerRun or WorkerVersion + (+ ModelVersion and Configuration), or any worker, on a whole corpus or under a specified parent element (parent element included). """ description = "Deletion of worker results" - if version is not None: - description += f" produced by {version}" - if model_version is not None: - description += f" based on model version {model_version}" - if configuration is False: - description += " with no configuration" - elif configuration is not None: - description += f" configured with {configuration}" + if worker_run is not None: + description += f" produced by {worker_run.summary}" + else: + if version is not None: + description += f" produced by {version}" + if model_version is not None: + description += f" based on model version {model_version}" + if configuration is False: + description += " with no configuration" + elif configuration is not None: + description += f" configured with {configuration}" documents_tasks.worker_results_delete.delay( corpus_id=corpus_id, element_id=element_id, + worker_run_id=worker_run.id if worker_run else None, version_id=version.id if version else None, model_version_id=model_version.id if model_version else None, configuration_id=configuration.id if configuration else configuration, @@ -80,25 +93,31 @@ def worker_results_delete(corpus_id: UUID, def selection_worker_results_delete(corpus: Corpus, user_id: int = None, + worker_run: Optional[WorkerRun] = None, version: Optional[WorkerVersion] = None, model_version: Optional[ModelVersion] = None, configuration: Optional[WorkerConfiguration | Literal[False]] = None) -> None: """ - Delete all Worker Results produced by any WorkerVersion or a specific one on all elements - selected by a user in a corpus, including their child elements. + Delete all Worker Results by a specific WorkerRun or WorkerVersion + (+ ModelVersion and Configuration), or any worker, on all elements + selected by a user in a corpus, including their children elements. """ description = f"Deletion of worker results on selected elements in {corpus.name}" - if version is not None: - description += f" produced by {version}" - if model_version is not None: - description += f" based on model version {model_version}" - if configuration is False: - description += " with no configuration" - elif configuration is not None: - description += f" configured with {configuration}" + if worker_run is not None: + description += f" produced by {worker_run.summary}" + else: + if version is not None: + description += f" produced by {version}" + if model_version is not None: + description += f" based on model version {model_version}" + if configuration is False: + description += " with no configuration" + elif configuration is not None: + description += f" configured with {configuration}" documents_tasks.selection_worker_results_delete.delay( corpus_id=corpus.id, + worker_run_id=worker_run.id if worker_run else None, version_id=version.id if version else None, model_version_id=model_version.id if model_version else None, configuration_id=configuration.id if configuration else configuration, diff --git a/arkindex/sql_validation/worker_results_delete_in_corpus_worker_run.sql b/arkindex/sql_validation/worker_results_delete_in_corpus_worker_run.sql new file mode 100644 index 0000000000..e01a89ead2 --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_in_corpus_worker_run.sql @@ -0,0 +1,144 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND U1."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid)) diff --git a/arkindex/sql_validation/worker_results_delete_under_parent_included_worker_run.sql b/arkindex/sql_validation/worker_results_delete_under_parent_included_worker_run.sql new file mode 100644 index 0000000000..eec3d75ca8 --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_under_parent_included_worker_run.sql @@ -0,0 +1,472 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[])) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[]))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[])) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[]); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_elementpath" U5 ON (U2."id" = U5."element_id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND U1."worker_run_id" = '{worker_run_id}'::uuid + AND U5."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "documents_elementpath" U7 ON (U2."id" = U7."element_id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U7."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT W0."id" + FROM "documents_transcriptionentity" W0 + INNER JOIN "documents_transcription" W1 ON (W0."transcription_id" = W1."id") + WHERE W1."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT V0."id" + FROM "documents_element" V0 + INNER JOIN "documents_elementpath" V1 ON (V0."id" = V1."element_id") + WHERE V1."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + WHERE "documents_elementpath"."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +WHERE "documents_elementpath"."path"[2] IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND U1."worker_run_id" = '{worker_run_id}'::uuid + AND U1."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U1."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)) diff --git a/arkindex/sql_validation/worker_results_delete_under_parent_worker_run.sql b/arkindex/sql_validation/worker_results_delete_under_parent_worker_run.sql new file mode 100644 index 0000000000..ca7b324df3 --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_under_parent_worker_run.sql @@ -0,0 +1,337 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[])) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[]))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[])) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[]); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_elementpath" U5 ON (U2."id" = U5."element_id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND U1."worker_run_id" = '{worker_run_id}'::uuid + AND U5."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "documents_elementpath" U7 ON (U2."id" = U7."element_id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U7."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[])); + +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."id" = '{element_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND "documents_element"."worker_run_id" = '{worker_run_id}'::uuid + AND "documents_element"."id" = '{element_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND U1."worker_run_id" = '{worker_run_id}'::uuid + AND U1."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U1."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND U0."worker_run_id" = '{worker_run_id}'::uuid + AND U0."element_id" = '{element_id}'::uuid)) -- GitLab