diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index f62c30678518c3bf5bcbde60adda47a0048c292c..9ae8e392cda46ca78c8b37ea33ea0f2abe49d7bc 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -68,7 +68,7 @@ from arkindex.documents.serializers.light import CorpusAllowedMetaDataSerializer from arkindex.documents.serializers.ml import ElementTranscriptionSerializer from arkindex.images.models import Image from arkindex.ponos.utils import is_admin_or_ponos_task -from arkindex.process.models import WorkerRun, WorkerVersion +from arkindex.process.models import WorkerConfiguration, WorkerRun, WorkerVersion from arkindex.project.fields import Unnest from arkindex.project.mixins import ACLMixin, CorpusACLMixin, SelectionMixin from arkindex.project.openapi import UUID_OR_FALSE, AutoSchema @@ -82,6 +82,7 @@ from arkindex.project.triggers import ( selection_worker_results_delete, worker_results_delete, ) +from arkindex.training.models import ModelVersion from arkindex.users.models import Role from arkindex.users.utils import filter_rights @@ -2179,6 +2180,21 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView): description='Only delete Worker Results on selected elements in this corpus. ' 'Cannot be used together with `element_id`.', ), + OpenApiParameter( + 'model_version_id', + type=UUID, + required=False, + description='Only delete Worker Results produced by a specific model version.', + ), + OpenApiParameter( + 'configuration_id', + type=UUID_OR_FALSE, + required=False, + description=dedent(""" + Only delete Worker Results produced by a specific worker configuration. + If set to false, only delete results that use no specific configuration. + """) + ), ], tags=['ml'], ) @@ -2231,6 +2247,38 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): except WorkerVersion.DoesNotExist: errors['worker_version_id'].append('This worker version does not exist.') + model_version = None + if 'model_version_id' in self.request.query_params: + try: + model_version_id = UUID(self.request.query_params['model_version_id']) + except (TypeError, ValueError): + errors['model_version_id'].append('Invalid UUID.') + else: + try: + model_version = ModelVersion.objects.get(id=model_version_id) + except ModelVersion.DoesNotExist: + errors['model_version_id'].append('This model version does not exist.') + + configuration = None + if 'configuration_id' in self.request.query_params: + conf_id = self.request.query_params['configuration_id'] + if conf_id.lower() in ('false', '0'): + configuration = False + else: + try: + conf_id = UUID(conf_id) + except (TypeError, ValueError): + errors['configuration_id'].append( + 'Invalid UUID. You can set "false" to exclude results with a configuration.' + ) + else: + try: + configuration = WorkerConfiguration.objects.get(id=conf_id) + except WorkerConfiguration.DoesNotExist: + errors['configuration_id'].append( + 'This worker configuration does not exist.' + ) + if errors: raise ValidationError(errors) @@ -2238,6 +2286,8 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): selection_worker_results_delete( corpus=corpus, version=worker_version, + model_version=model_version, + configuration=configuration, user_id=self.request.user.id, ) else: @@ -2245,6 +2295,8 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): corpus_id=corpus.id, version=worker_version, element_id=element_id, + model_version=model_version, + configuration=configuration, user_id=self.request.user.id, ) diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index da80cf858e21e24ef91d0abb80f481ec48eb7be0..baf3d86ee81922b37c666fae155968da97cc124a 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -1,5 +1,5 @@ import logging -from typing import Optional +from typing import Literal, Optional from uuid import UUID from django.conf import settings @@ -98,7 +98,12 @@ def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None: @job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete']) -def selection_worker_results_delete(corpus_id: str, version_id: Optional[str] = None) -> None: +def selection_worker_results_delete( + corpus_id: str, + model_version_id: Optional[str] = None, + configuration_id: Optional[str | Literal[False]] = None, + version_id: Optional[str] = None, +) -> None: """ Delete all Worker Results produced by any WorkerVersion or a specific one on a user's selected elements, including their children. @@ -111,18 +116,27 @@ def selection_worker_results_delete(corpus_id: str, version_id: Optional[str] = total = queryset.count() for i, element_id in enumerate(queryset.values_list('element_id', flat=True).iterator()): rq_job.set_progress(i / total) - worker_results_delete(corpus_id=corpus_id, version_id=version_id, element_id=element_id) + worker_results_delete( + corpus_id=corpus_id, + element_id=element_id, + version_id=version_id, + model_version_id=model_version_id, + configuration_id=configuration_id, + ) @job('high', timeout=settings.RQ_TIMEOUTS['worker_results_delete']) def worker_results_delete( corpus_id: str, - version_id: Optional[str], - element_id: Optional[str], + version_id: Optional[str] = None, + element_id: Optional[str] = None, + model_version_id: Optional[str] = None, + configuration_id: Optional[str | Literal[False]] = None, include_children: bool = True) -> None: """ - Recursively delete all Worker Results produced by any WorkerVersion or a specific one - on a whole corpus, under a specified parent element (parent element included), or on a single element. + Recursively delete all Worker Results produced by any WorkerVersion or a specific one on a + whole corpus, under a specified parent element (parent element included), or on a single element. + Results can be filtered depending on a specific model version and a specific or unset configuration. """ elements = Element.objects.filter(corpus_id=corpus_id) classifications = Classification.objects.filter(element__corpus_id=corpus_id) @@ -184,6 +198,35 @@ def worker_results_delete( metadata = metadata.filter(element_id=element_id) worker_activities = worker_activities.filter(element_id=element_id) + if model_version_id: + elements = elements.filter(worker_run__model_version_id=model_version_id) + classifications = classifications.filter(worker_run__model_version_id=model_version_id) + transcriptions = transcriptions.filter(worker_run__model_version_id=model_version_id) + transcription_entities = transcription_entities.filter(transcription__worker_run__model_version_id=model_version_id) + worker_transcription_entities = worker_transcription_entities.filter(worker_run__model_version_id=model_version_id) + metadata = metadata.filter(worker_run__model_version_id=model_version_id) + # Activities are not linked to a worker run and cannot be filtered by model version + worker_activities = worker_activities.none() + + if configuration_id is not None: + if configuration_id is False: + # Only delete results generated on a worker run with no configuration + elements = elements.filter(worker_run__configuration_id=None) + classifications = classifications.filter(worker_run__configuration_id=None) + transcriptions = transcriptions.filter(worker_run__configuration_id=None) + transcription_entities = transcription_entities.filter(transcription__worker_run__configuration_id=None) + worker_transcription_entities = worker_transcription_entities.filter(worker_run__configuration_id=None) + metadata = metadata.filter(worker_run__configuration_id=None) + worker_activities = worker_activities.filter(configuration_id=None) + else: + elements = elements.filter(worker_run__configuration_id=configuration_id) + classifications = classifications.filter(worker_run__configuration_id=configuration_id) + transcriptions = transcriptions.filter(worker_run__configuration_id=configuration_id) + transcription_entities = transcription_entities.filter(transcription__worker_run__configuration_id=configuration_id) + worker_transcription_entities = worker_transcription_entities.filter(worker_run__configuration_id=configuration_id) + metadata = metadata.filter(worker_run__configuration_id=configuration_id) + worker_activities = worker_activities.filter(configuration_id=configuration_id) + elements.trash() classifications.delete() # Delete TranscriptionEntities before transcriptions so that we can delete transcriptions using a single DELETE query @@ -202,6 +245,8 @@ def worker_results_delete( corpus_id=corpus_id, version_id=version_id, element_id=element_id, + model_version_id=model_version_id, + configuration_id=configuration_id, include_children=False, ) diff --git a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py index 3c305b3b5f4fef18cc8d83e03c467e8ceacf158b..3c24b5c2a70daa35aafe14f6860a359e6866982e 100644 --- a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py @@ -1,8 +1,9 @@ from unittest.mock import call, patch from arkindex.documents.tasks import selection_worker_results_delete -from arkindex.process.models import WorkerVersion +from arkindex.process.models import Worker, WorkerVersion from arkindex.project.tests import FixtureTestCase +from arkindex.training.models import Model, ModelVersionState class TestDeleteSelectionWorkerResults(FixtureTestCase): @@ -14,6 +15,17 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): cls.page2 = cls.corpus.elements.get(name='Volume 1, page 1v') cls.page3 = cls.corpus.elements.get(name='Volume 1, page 2r') cls.version = WorkerVersion.objects.first() + cls.model = Model.objects.create(name='Generic model', public=False) + cls.model_version = cls.model.versions.create( + state=ModelVersionState.Available, + tag='Test', + hash='A' * 32, + archive_hash='42', + size=1337, + ) + cls.configuration = Worker.objects.get(slug='dla').configurations.create( + name="DLA configuration", configuration={"value": "test"} + ) def test_no_rq_job(self): with self.assertRaises(AssertionError) as ctx: @@ -46,18 +58,35 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): self.assertEqual(worker_results_delete_mock.call_count, 2) self.assertCountEqual(worker_results_delete_mock.call_args_list, [ - call(corpus_id=self.corpus.id, version_id=None, element_id=self.page1.id), - call(corpus_id=self.corpus.id, version_id=None, element_id=self.page2.id), + call( + corpus_id=self.corpus.id, + version_id=None, + model_version_id=None, + configuration_id=None, + element_id=self.page1.id, + ), + call( + corpus_id=self.corpus.id, + version_id=None, + model_version_id=None, + configuration_id=None, + element_id=self.page2.id, + ), ]) @patch('arkindex.documents.tasks.get_current_job') @patch('arkindex.documents.tasks.worker_results_delete') - def test_run_version_filter(self, worker_results_delete_mock, job_mock): + def test_run_filters(self, worker_results_delete_mock, job_mock): self.user.selected_elements.set([self.page1, self.page2]) self.superuser.selected_elements.set([self.page3]) job_mock.return_value.user_id = self.user.id - selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id) + selection_worker_results_delete( + corpus_id=self.corpus.id, + version_id=self.version.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, + ) self.assertEqual(job_mock.call_count, 1) self.assertEqual(job_mock().set_progress.call_count, 2) @@ -68,6 +97,18 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): self.assertEqual(worker_results_delete_mock.call_count, 2) self.assertCountEqual(worker_results_delete_mock.call_args_list, [ - call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page1.id), - call(corpus_id=self.corpus.id, version_id=self.version.id, element_id=self.page2.id), + call( + corpus_id=self.corpus.id, + version_id=self.version.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, + element_id=self.page1.id, + ), + call( + corpus_id=self.corpus.id, + version_id=self.version.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, + element_id=self.page2.id, + ), ]) diff --git a/arkindex/documents/tests/tasks/test_worker_results_delete.py b/arkindex/documents/tests/tasks/test_worker_results_delete.py index 5873bbd2d4778237ff703554484521f136883e69..52a9cda5878f4b4bd48862d8951566d8bb8b22cf 100644 --- a/arkindex/documents/tests/tasks/test_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_worker_results_delete.py @@ -1,7 +1,10 @@ +from django.core.exceptions import ObjectDoesNotExist + from arkindex.documents.models import Entity, EntityType, MLClass, TranscriptionEntity from arkindex.documents.tasks import worker_results_delete -from arkindex.process.models import WorkerVersion +from arkindex.process.models import ProcessMode, WorkerVersion from arkindex.project.tests import FixtureTestCase +from arkindex.training.models import Model, ModelVersionState class TestDeleteWorkerResults(FixtureTestCase): @@ -12,10 +15,36 @@ class TestDeleteWorkerResults(FixtureTestCase): cls.text = MLClass.objects.create(name='text', corpus=cls.corpus) cls.version_1 = WorkerVersion.objects.get(worker__slug='reco') cls.version_2 = WorkerVersion.objects.get(worker__slug='dla') + cls.process = cls.version_1.processes.get(mode=ProcessMode.Workers) + cls.model = Model.objects.create(name='Generic model', public=False) + cls.model_version = cls.model.versions.create( + state=ModelVersionState.Available, + tag='Test', + hash='A' * 32, + archive_hash='42', + size=1337, + ) + cls.configuration = cls.version_2.worker.configurations.create( + name="DLA configuration", configuration={"value": "test"} + ) + cls.process_2 = cls.user.processes.create( + mode=ProcessMode.Workers, + corpus=cls.corpus, + name='Second process', + ) + cls.worker_run_1 = cls.version_1.worker_runs.get(process=cls.process) + cls.worker_run_2 = cls.version_1.worker_runs.create( + process=cls.process_2, + version=cls.version_1, + model_version=cls.model_version, + configuration=cls.configuration, + parents=[], + ) cls.vol = cls.corpus.elements.get(name='Volume 1') cls.classification1 = cls.vol.classifications.create( worker_version=cls.version_1, + worker_run=cls.worker_run_1, ml_class=cls.text, confidence=.42 ) @@ -23,12 +52,14 @@ class TestDeleteWorkerResults(FixtureTestCase): cls.page1 = cls.corpus.elements.get(name='Volume 1, page 1r') cls.classification2 = cls.page1.classifications.create( worker_version=cls.version_1, + worker_run=cls.worker_run_1, ml_class=cls.text, confidence=.42 ) cls.transcription1 = cls.page1.transcriptions.create( text='something', worker_version=cls.version_1, + worker_run=cls.worker_run_2, confidence=0.42, ) cls.person_type = EntityType.objects.get( @@ -40,32 +71,62 @@ class TestDeleteWorkerResults(FixtureTestCase): corpus=cls.corpus, name='entity 1', worker_version=cls.version_2, + worker_run=cls.worker_run_2, ) cls.transcription_entity1 = TranscriptionEntity.objects.create( transcription=cls.transcription1, entity=cls.entity, offset=2, - length=len(cls.entity.name) + length=len(cls.entity.name), + worker_version=cls.version_1, + worker_run=cls.worker_run_2, ) cls.page2 = cls.corpus.elements.get(name='Volume 1, page 2r') cls.page2.worker_version = cls.version_1 cls.page2.save() cls.classification3 = cls.page2.classifications.create( - worker_version=cls.version_2, ml_class=cls.text, - confidence=.42 + confidence=.42, + worker_version=cls.version_2, + worker_run=cls.worker_run_1, ) cls.transcription2 = cls.page2.transcriptions.create( text='something', - worker_version=cls.version_1, confidence=0.42, + worker_version=cls.version_1, + worker_run=cls.worker_run_2, ) cls.transcription_entity2 = TranscriptionEntity.objects.create( transcription=cls.transcription2, entity=cls.entity, offset=2, - length=len(cls.entity.name) + length=len(cls.entity.name), + worker_version=cls.version_1, + worker_run=cls.worker_run_2, + ) + + def check_deleted(self, *deleted_items): + """ + Helper function to check which items are deleted and which items are preserved + """ + items = (self.vol, self.page1, self.page2, self.classification1, self.classification2, self.classification3, self.transcription1, self.transcription2, self.entity, self.transcription_entity1, self.transcription_entity2) + expected_del_ids = tuple(i.id for i in deleted_items) + + non_deleted = [] + missing = [] + for item in items: + try: + item.refresh_from_db() + except ObjectDoesNotExist: + if item.id not in expected_del_ids: + missing.append(item) + else: + if item.id in expected_del_ids: + non_deleted.append(item) + self.assertFalse( + bool(non_deleted or missing), + f'Missing items: {missing}. Non deleted items: {non_deleted}.', ) def test_run_on_corpus(self): @@ -73,21 +134,20 @@ class TestDeleteWorkerResults(FixtureTestCase): 'corpus_id': str(self.corpus.id), 'version_id': str(self.version_1.id), }): - worker_results_delete(self.corpus.id, self.version_1.id, None) - - self.corpus.refresh_from_db() - - self.assertEqual(self.corpus.elements.filter(id=self.vol.id).exists(), True) - self.assertEqual(self.vol.classifications.filter(id=self.classification1.id).exists(), False) - - self.assertEqual(self.corpus.elements.filter(id=self.page1.id).exists(), True) - self.page1.refresh_from_db() - self.assertEqual(self.page1.classifications.filter(id=self.classification2.id).exists(), False) - self.assertEqual(self.page1.transcriptions.filter(id=self.transcription1.id).exists(), False) - self.assertEqual(TranscriptionEntity.objects.filter(id=self.transcription_entity1.id).exists(), False) - self.assertEqual(Entity.objects.filter(id=self.entity.id).exists(), True) - - self.assertEqual(self.corpus.elements.filter(id=self.page2.id).exists(), False) + worker_results_delete( + corpus_id=self.corpus.id, + version_id=self.version_1.id, + ) + self.check_deleted( + self.classification1, + self.classification2, + self.classification3, + self.transcription1, + self.transcription2, + self.transcription_entity1, + self.transcription_entity2, + self.page2, + ) def test_run_on_parent(self): with self.assertExactQueries('worker_results_delete_under_parent.sql', params={ @@ -96,64 +156,107 @@ class TestDeleteWorkerResults(FixtureTestCase): 'element_id': str(self.page1.id), }): worker_results_delete(self.corpus.id, self.version_1.id, self.page1.id) - - self.corpus.refresh_from_db() - - self.assertEqual(self.corpus.elements.filter(id=self.vol.id).exists(), True) - self.assertEqual(self.vol.classifications.filter(id=self.classification1.id).exists(), True) - - self.assertEqual(self.corpus.elements.filter(id=self.page1.id).exists(), True) - self.page1.refresh_from_db() - self.assertEqual(self.page1.classifications.filter(id=self.classification2.id).exists(), False) - self.assertEqual(self.page1.transcriptions.filter(id=self.transcription1.id).exists(), False) - self.assertEqual(TranscriptionEntity.objects.filter(id=self.transcription_entity1.id).exists(), False) - self.assertEqual(Entity.objects.filter(id=self.entity.id).exists(), True) - - self.assertEqual(self.corpus.elements.filter(id=self.page2.id).exists(), True) - self.page2.refresh_from_db() - self.assertEqual(self.page2.classifications.filter(id=self.classification3.id).exists(), True) - self.assertEqual(self.page2.transcriptions.filter(id=self.transcription2.id).exists(), True) - self.assertEqual(TranscriptionEntity.objects.filter(id=self.transcription_entity2.id).exists(), True) - self.assertEqual(Entity.objects.filter(id=self.entity.id).exists(), True) + self.check_deleted( + self.classification2, + self.transcription1, + self.transcription_entity1, + ) def test_run_on_parent_delete_element(self): + """ + The element itself is deleted after its related results from the same version + """ with self.assertExactQueries('worker_results_delete_under_parent_included.sql', params={ 'corpus_id': str(self.corpus.id), 'version_id': str(self.version_1.id), 'element_id': str(self.page2.id), }): worker_results_delete(self.corpus.id, self.version_1.id, self.page2.id) + self.check_deleted( + self.classification3, + self.transcription2, + self.transcription_entity2, + self.page2, + ) - self.corpus.refresh_from_db() + def test_run_model_version_filter_on_parent(self): + with self.assertExactQueries('worker_results_delete_model_version_under_parent.sql', params={ + 'corpus_id': str(self.corpus.id), + 'element_id': str(self.page2.id), + 'model_version_id': str(self.model_version.id), + }): + worker_results_delete( + corpus_id=self.corpus.id, + element_id=self.page2.id, + model_version_id=self.model_version.id, + ) + self.check_deleted( + self.transcription2, + self.transcription_entity2, + ) - self.assertEqual(self.corpus.elements.filter(id=self.vol.id).exists(), True) - self.assertEqual(self.vol.classifications.filter(id=self.classification1.id).exists(), True) + def test_run_configuration_filter(self): + with self.assertExactQueries('worker_results_delete_configuration_filter.sql', params={ + 'corpus_id': str(self.corpus.id), + 'configuration_id': str(self.configuration.id), + }): + worker_results_delete( + corpus_id=self.corpus.id, + configuration_id=self.configuration.id, + ) + self.check_deleted( + self.transcription1, + self.transcription2, + self.transcription_entity1, + self.transcription_entity2, + ) - self.assertEqual(self.corpus.elements.filter(id=self.page1.id).exists(), True) - self.page1.refresh_from_db() - self.assertEqual(self.page1.classifications.filter(id=self.classification2.id).exists(), True) - self.assertEqual(self.page1.transcriptions.filter(id=self.transcription1.id).exists(), True) - self.assertEqual(TranscriptionEntity.objects.filter(id=self.transcription_entity1.id).exists(), True) - self.assertEqual(Entity.objects.filter(id=self.entity.id).exists(), True) + def test_run_no_configuration_filter(self): + # Results generated with worker run 2 are preserved + self.page2.worker_run = self.worker_run_2 + self.page2.save() + with self.assertExactQueries('worker_results_delete_unset_configuration.sql', params={ + 'corpus_id': str(self.corpus.id), + 'element_id': str(self.page2.id), + }): + worker_results_delete( + corpus_id=self.corpus.id, + element_id=self.page2.id, + configuration_id=False, + ) + self.check_deleted( + self.classification3, + ) - self.assertEqual(self.corpus.elements.filter(id=self.page2.id).exists(), False) + def test_run_model_version_configuration_worker_version_filter(self): + """ + Model version, configuration and worker version can be used simultaneously + """ + worker_results_delete( + corpus_id=self.corpus.id, + version_id=self.version_1.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, + ) + self.check_deleted( + self.transcription1, + self.transcription_entity1, + self.transcription2, + self.transcription_entity2, + ) def test_run_all_versions(self): with self.assertExactQueries('worker_results_delete_all_versions.sql', params={ 'corpus_id': str(self.corpus.id), }): - worker_results_delete(self.corpus.id, None, None) - - self.corpus.refresh_from_db() - - self.assertEqual(self.corpus.elements.filter(id=self.vol.id).exists(), True) - self.assertEqual(self.vol.classifications.filter(id=self.classification1.id).exists(), False) - - self.assertEqual(self.corpus.elements.filter(id=self.page1.id).exists(), True) - self.page1.refresh_from_db() - self.assertEqual(self.page1.classifications.filter(id=self.classification2.id).exists(), False) - self.assertEqual(self.page1.transcriptions.filter(id=self.transcription1.id).exists(), False) - self.assertEqual(TranscriptionEntity.objects.filter(id=self.transcription_entity1.id).exists(), False) - self.assertEqual(Entity.objects.filter(id=self.entity.id).exists(), True) - - self.assertEqual(self.corpus.elements.filter(id=self.page2.id).exists(), False) + worker_results_delete(corpus_id=self.corpus.id) + self.check_deleted( + self.classification1, + self.classification2, + self.classification3, + self.transcription1, + self.transcription2, + self.transcription_entity1, + self.transcription_entity2, + self.page2, + ) diff --git a/arkindex/documents/tests/test_destroy_worker_results.py b/arkindex/documents/tests/test_destroy_worker_results.py index 92e3095bf92c9031446775329df0679f58c89b8d..ac90a4caa4abe2f1303db6cdbefe01725473c72d 100644 --- a/arkindex/documents/tests/test_destroy_worker_results.py +++ b/arkindex/documents/tests/test_destroy_worker_results.py @@ -5,8 +5,9 @@ from django.urls import reverse from rest_framework import status from arkindex.documents.models import Corpus -from arkindex.process.models import WorkerVersion +from arkindex.process.models import Worker, WorkerVersion from arkindex.project.tests import FixtureAPITestCase +from arkindex.training.models import Model, ModelVersionState class TestDestroyWorkerResults(FixtureAPITestCase): @@ -17,6 +18,17 @@ class TestDestroyWorkerResults(FixtureAPITestCase): cls.version = WorkerVersion.objects.get(worker__slug='reco') cls.page = cls.corpus.elements.get(name='Volume 1, page 2r') cls.private_corpus = Corpus.objects.create(name='private', public=False) + cls.model = Model.objects.create(name='Generic model', public=False) + cls.model_version = cls.model.versions.create( + state=ModelVersionState.Available, + tag='Test', + hash='A' * 32, + archive_hash='42', + size=1337, + ) + cls.configuration = Worker.objects.get(slug='dla').configurations.create( + name="DLA configuration", configuration={"value": "test"} + ) def test_requires_login(self): with self.assertNumQueries(0): @@ -62,8 +74,10 @@ class TestDestroyWorkerResults(FixtureAPITestCase): corpus_id=self.corpus.id, version_id=None, element_id=None, + model_version_id=None, + configuration_id=None, user_id=self.user.id, - description='Worker results deletion', + description='Deletion of worker results', )) @patch('arkindex.project.triggers.documents_tasks.worker_results_delete.delay') @@ -79,10 +93,12 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, - version_id=self.version.id, element_id=None, + version_id=self.version.id, + model_version_id=None, + configuration_id=None, user_id=self.user.id, - description=f"Deletion of results produced by worker version: {self.version}" + description=f"Deletion of worker results produced by {self.version}", )) @patch('arkindex.project.triggers.documents_tasks.worker_results_delete.delay') @@ -98,29 +114,85 @@ class TestDestroyWorkerResults(FixtureAPITestCase): self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, - version_id=None, element_id=self.page.id, + version_id=None, + model_version_id=None, + configuration_id=None, user_id=self.user.id, - description='Worker results deletion', + description='Deletion of worker results', )) @patch('arkindex.project.triggers.documents_tasks.worker_results_delete.delay') - def test_filter_version_element(self, delay_mock): + def test_filter_unset_configuration(self, delay_mock): self.client.force_login(self.user) - with self.assertNumQueries(8): + with self.assertNumQueries(6): response = self.client.delete( reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) - + f'?worker_version_id={self.version.id}&element_id={self.page.id}' + + '?configuration_id=false', + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + element_id=None, + version_id=None, + model_version_id=None, + configuration_id=False, + user_id=self.user.id, + description="Deletion of worker results with no configuration", + )) + + @patch('arkindex.project.triggers.documents_tasks.worker_results_delete.delay') + def test_filter_model_version(self, delay_mock): + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + f'?model_version_id={self.model_version.id}', + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + self.assertEqual(delay_mock.call_count, 1) + self.assertEqual(delay_mock.call_args, call( + corpus_id=self.corpus.id, + element_id=None, + version_id=None, + model_version_id=self.model_version.id, + configuration_id=None, + user_id=self.user.id, + description="Deletion of worker results based on model version Test (aaaaaaaa…)", + )) + + @patch('arkindex.project.triggers.documents_tasks.worker_results_delete.delay') + def test_filter_element_worker_version_model_version_configuration(self, delay_mock): + self.client.force_login(self.user) + with self.assertNumQueries(10): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + ( + f'?worker_version_id={self.version.id}' + f'&element_id={self.page.id}' + f'&model_version_id={self.model_version.id}' + f'&configuration_id={self.configuration.id}' + ) ) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=self.corpus.id, - version_id=self.version.id, element_id=self.page.id, + version_id=self.version.id, + model_version_id=self.model_version.id, + configuration_id=self.configuration.id, user_id=self.user.id, - description=f"Deletion of results produced by worker version: {self.version}" + description=( + 'Deletion of worker results produced by Recognizer' + ' for revision 1337 "My w0rk3r" by Test user' + ' based on model version Test (aaaaaaaa…)' + ' configured with DLA configuration' + ) )) def test_invalid_version_id(self): @@ -175,6 +247,58 @@ class TestDestroyWorkerResults(FixtureAPITestCase): {'element_id': ['This element does not exist in the specified corpus.']} ) + def test_invalid_model_version_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?model_version_id=lol' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'model_version_id': ['Invalid UUID.']} + ) + + def test_wrong_model_version_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?model_version_id=12341234-1234-1234-1234-123412341234' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'model_version_id': ['This model version does not exist.']} + ) + + def test_invalid_configuration_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?configuration_id=true' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'configuration_id': ['Invalid UUID. You can set "false" to exclude results with a configuration.']} + ) + + def test_wrong_configuration_id(self): + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.delete( + reverse('api:worker-delete-results', kwargs={'corpus': str(self.corpus.id)}) + + '?configuration_id=12341234-1234-1234-1234-123412341234' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'configuration_id': ['This worker configuration does not exist.']} + ) + @override_settings(ARKINDEX_FEATURES={'selection': False}) def test_selection_feature_flag(self): self.client.force_login(self.user) @@ -255,6 +379,8 @@ class TestDestroyWorkerResults(FixtureAPITestCase): corpus_id=self.corpus.id, version_id=None, user_id=self.user.id, + model_version_id=None, + configuration_id=None, description=f"Deletion of worker results on selected elements in {self.corpus.name}" )) @@ -276,6 +402,8 @@ class TestDestroyWorkerResults(FixtureAPITestCase): corpus_id=self.corpus.id, version_id=self.version.id, user_id=self.user.id, + model_version_id=None, + configuration_id=None, description=f"Deletion of worker results on selected elements in {self.corpus.name} " f"produced by {self.version}" )) diff --git a/arkindex/project/triggers.py b/arkindex/project/triggers.py index c73338e817b5a15f8f752b9015f30eb03914f44e..65df843411476a3604778450818152ce767c8815 100644 --- a/arkindex/project/triggers.py +++ b/arkindex/project/triggers.py @@ -1,7 +1,7 @@ """ Helper methods to trigger tasks in asynchronous workers """ -from typing import Optional, Union +from typing import Literal, Optional, Union from uuid import UUID from arkindex.documents import export @@ -9,7 +9,8 @@ from arkindex.documents import tasks as documents_tasks from arkindex.documents.managers import ElementQuerySet from arkindex.documents.models import Corpus, CorpusExport, Element from arkindex.process import tasks as process_tasks -from arkindex.process.models import Process, WorkerActivityState, WorkerVersion +from arkindex.process.models import Process, WorkerActivityState, WorkerConfiguration, WorkerVersion +from arkindex.training.models import ModelVersion def corpus_delete(corpus: Union[Corpus, UUID, str], user_id: Optional[int] = None) -> None: @@ -45,28 +46,39 @@ def element_trash(queryset: ElementQuerySet, def worker_results_delete(corpus_id: UUID, version: Optional[WorkerVersion] = None, element_id: Optional[UUID] = None, + model_version: Optional[ModelVersion] = None, + configuration: Optional[WorkerConfiguration | Literal[False]] = None, user_id: Optional[int] = None) -> None: """ Delete all Worker Results produced by a specific WorkerVersion on a whole corpus or under a specified parent element (parent element included). """ - if version is None: - description = 'Worker results deletion' - else: - description = f"Deletion of results produced by worker version: {version}" + description = 'Deletion of worker results' + if version is not None: + description += f' produced by {version}' + if model_version is not None: + description += f' based on model version {model_version}' + if configuration is False: + description += ' with no configuration' + elif configuration is not None: + description += f' configured with {configuration}' documents_tasks.worker_results_delete.delay( corpus_id=corpus_id, - version_id=version.id if version else None, element_id=element_id, + version_id=version.id if version else None, + model_version_id=model_version.id if model_version else None, + configuration_id=configuration.id if configuration else configuration, user_id=user_id, description=description, ) def selection_worker_results_delete(corpus: Corpus, - user_id: int, - version: Optional[WorkerVersion] = None) -> None: + user_id: int = None, + version: Optional[WorkerVersion] = None, + model_version: Optional[ModelVersion] = None, + configuration: Optional[WorkerConfiguration | Literal[False]] = None) -> None: """ Delete all Worker Results produced by any WorkerVersion or a specific one on all elements selected by a user in a corpus, including their child elements. @@ -74,10 +86,18 @@ def selection_worker_results_delete(corpus: Corpus, description = f'Deletion of worker results on selected elements in {corpus.name}' if version is not None: description += f' produced by {version}' + if model_version is not None: + description += f' based on model version {model_version}' + if configuration is False: + description += ' with no configuration' + elif configuration is not None: + description += f' configured with {configuration}' documents_tasks.selection_worker_results_delete.delay( corpus_id=corpus.id, version_id=version.id if version else None, + model_version_id=model_version.id if model_version else None, + configuration_id=configuration.id if configuration else configuration, user_id=user_id, description=description, ) diff --git a/arkindex/sql_validation/worker_results_delete_configuration_filter.sql b/arkindex/sql_validation/worker_results_delete_configuration_filter.sql new file mode 100644 index 0000000000000000000000000000000000000000..1cf0d1e0af4d0029455b140bc8b796cf429caa10 --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_configuration_filter.sql @@ -0,0 +1,216 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "process_workerrun"."configuration_id" = '{configuration_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +UPDATE "process_process" +SET "train_folder_id" = NULL +WHERE "process_process"."train_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +UPDATE "process_process" +SET "validation_folder_id" = NULL +WHERE "process_process"."validation_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +UPDATE "process_process" +SET "test_folder_id" = NULL +WHERE "process_process"."test_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."configuration_id" = '{configuration_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "process_workerrun"."configuration_id" = '{configuration_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "process_workerrun"."configuration_id" = '{configuration_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "process_workerrun" U5 ON (U1."worker_run_id" = U5."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND NOT (U1."worker_version_id" IS NULL) + AND U5."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "process_workerrun" U7 ON (U0."worker_run_id" = U7."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND NOT (U0."worker_version_id" IS NULL) + AND U7."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."configuration_id" = '{configuration_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."id" IN + (SELECT U0."id" + FROM "process_workeractivity" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."configuration_id" = '{configuration_id}'::uuid)) diff --git a/arkindex/sql_validation/worker_results_delete_model_version_under_parent.sql b/arkindex/sql_validation/worker_results_delete_model_version_under_parent.sql new file mode 100644 index 0000000000000000000000000000000000000000..5739d8c0316553a9a43f1d74efff98578ce36b0c --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_model_version_under_parent.sql @@ -0,0 +1,470 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "train_folder_id" = NULL +WHERE "process_process"."train_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "validation_folder_id" = NULL +WHERE "process_process"."validation_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "test_folder_id" = NULL +WHERE "process_process"."test_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + INNER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_elementpath" U5 ON (U2."id" = U5."element_id") + INNER JOIN "process_workerrun" U6 ON (U1."worker_run_id" = U6."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND NOT (U1."worker_version_id" IS NULL) + AND U5."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U6."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "documents_elementpath" U7 ON (U2."id" = U7."element_id") + INNER JOIN "process_workerrun" U8 ON (U0."worker_run_id" = U8."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND NOT (U0."worker_version_id" IS NULL) + AND U7."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U8."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + INNER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + INNER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."model_version_id" = '{model_version_id}'::uuid)); + +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "train_folder_id" = NULL +WHERE "process_process"."train_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "validation_folder_id" = NULL +WHERE "process_process"."validation_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +UPDATE "process_process" +SET "test_folder_id" = NULL +WHERE "process_process"."test_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."model_version_id" = '{model_version_id}'::uuid)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."model_version_id" = '{model_version_id}'::uuid); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "process_workerrun" U5 ON (U1."worker_run_id" = U5."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND NOT (U1."worker_version_id" IS NULL) + AND U1."element_id" = '{element_id}'::uuid + AND U5."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "process_workerrun" U7 ON (U0."worker_run_id" = U7."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND NOT (U0."worker_version_id" IS NULL) + AND U1."element_id" = '{element_id}'::uuid + AND U7."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."model_version_id" = '{model_version_id}'::uuid)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."model_version_id" = '{model_version_id}'::uuid)) diff --git a/arkindex/sql_validation/worker_results_delete_unset_configuration.sql b/arkindex/sql_validation/worker_results_delete_unset_configuration.sql new file mode 100644 index 0000000000000000000000000000000000000000..f7a7131be0db64eca6fde8985fcefbdd3a9edc27 --- /dev/null +++ b/arkindex/sql_validation/worker_results_delete_unset_configuration.sql @@ -0,0 +1,493 @@ +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."configuration_id" IS NULL)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "train_folder_id" = NULL +WHERE "process_process"."train_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "validation_folder_id" = NULL +WHERE "process_process"."validation_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "test_folder_id" = NULL +WHERE "process_process"."test_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + INNER JOIN "documents_elementpath" U3 ON (U0."id" = U3."element_id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U3."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U4."configuration_id" IS NULL)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") + LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."configuration_id" IS NULL)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +INNER JOIN "documents_elementpath" ON ("documents_element"."id" = "documents_elementpath"."element_id") +LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_elementpath"."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND "process_workerrun"."configuration_id" IS NULL); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + LEFT OUTER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_elementpath" U5 ON (U2."id" = U5."element_id") + LEFT OUTER JOIN "process_workerrun" U6 ON (U1."worker_run_id" = U6."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND NOT (U1."worker_version_id" IS NULL) + AND U5."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U6."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + INNER JOIN "documents_elementpath" U7 ON (U2."id" = U7."element_id") + LEFT OUTER JOIN "process_workerrun" U8 ON (U0."worker_run_id" = U8."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND NOT (U0."worker_version_id" IS NULL) + AND U7."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U8."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + LEFT OUTER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."configuration_id" IS NULL)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + LEFT OUTER JOIN "process_workerrun" U5 ON (U0."worker_run_id" = U5."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U5."configuration_id" IS NULL)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."id" IN + (SELECT U0."id" + FROM "process_workeractivity" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + INNER JOIN "documents_elementpath" U4 ON (U1."id" = U4."element_id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U4."path" && (ARRAY['{element_id}'::uuid])::uuid[] + AND U0."configuration_id" IS NULL)); + +select min(length), + max(length) +FROM + (select array_length(p.path, 1) as length + from documents_elementpath as p + inner join + (SELECT "documents_element"."id" + FROM "documents_element" + LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."configuration_id" IS NULL)) as input on (array[input.id] && p.path)) as lengths ; + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT V0."id" + FROM "documents_transcriptionentity" V0 + INNER JOIN "documents_transcription" V1 ON (V0."transcription_id" = V1."id") + WHERE V1."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL))); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +DELETE +FROM "documents_selection" +WHERE "documents_selection"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +DELETE +FROM "process_processelement" +WHERE "process_processelement"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "element_id" = NULL +WHERE "process_process"."element_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "train_folder_id" = NULL +WHERE "process_process"."train_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "validation_folder_id" = NULL +WHERE "process_process"."validation_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +UPDATE "process_process" +SET "test_folder_id" = NULL +WHERE "process_process"."test_folder_id" IN + (SELECT U0."id" + FROM "documents_element" U0 + LEFT OUTER JOIN "process_workerrun" U3 ON (U0."worker_run_id" = U3."id") + WHERE (U0."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."id" = '{element_id}'::uuid + AND U3."configuration_id" IS NULL)); + +WITH element_ids (id) AS + (DELETE + FROM documents_elementpath + WHERE element_id IN + (SELECT "documents_element"."id" + FROM "documents_element" + LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") + WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."configuration_id" IS NULL)) RETURNING element_id) +DELETE +FROM documents_element element USING element_ids +WHERE element.id = element_ids.id ; + +SELECT "documents_element"."id" +FROM "documents_element" +LEFT OUTER JOIN "process_workerrun" ON ("documents_element"."worker_run_id" = "process_workerrun"."id") +WHERE ("documents_element"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("documents_element"."worker_version_id" IS NULL) + AND "documents_element"."id" = '{element_id}'::uuid + AND "process_workerrun"."configuration_id" IS NULL); + +DELETE +FROM "documents_classification" +WHERE "documents_classification"."id" IN + (SELECT U0."id" + FROM "documents_classification" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + LEFT OUTER JOIN "process_workerrun" U5 ON (U1."worker_run_id" = U5."id") + WHERE (U2."corpus_id" = '{corpus_id}'::uuid + AND NOT (U1."worker_version_id" IS NULL) + AND U1."element_id" = '{element_id}'::uuid + AND U5."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcriptionentity" +WHERE "documents_transcriptionentity"."id" IN + (SELECT U0."id" + FROM "documents_transcriptionentity" U0 + INNER JOIN "documents_transcription" U1 ON (U0."transcription_id" = U1."id") + INNER JOIN "documents_element" U2 ON (U1."element_id" = U2."id") + INNER JOIN "documents_entity" U4 ON (U0."entity_id" = U4."id") + LEFT OUTER JOIN "process_workerrun" U7 ON (U0."worker_run_id" = U7."id") + WHERE ((U2."corpus_id" = '{corpus_id}'::uuid + OR U4."corpus_id" = '{corpus_id}'::uuid) + AND NOT (U0."worker_version_id" IS NULL) + AND U1."element_id" = '{element_id}'::uuid + AND U7."configuration_id" IS NULL)); + +DELETE +FROM "documents_transcription" +WHERE "documents_transcription"."id" IN + (SELECT U0."id" + FROM "documents_transcription" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "documents_metadata" +WHERE "documents_metadata"."id" IN + (SELECT U0."id" + FROM "documents_metadata" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + LEFT OUTER JOIN "process_workerrun" U4 ON (U0."worker_run_id" = U4."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U4."configuration_id" IS NULL)); + +DELETE +FROM "process_workeractivity" +WHERE "process_workeractivity"."id" IN + (SELECT U0."id" + FROM "process_workeractivity" U0 + INNER JOIN "documents_element" U1 ON (U0."element_id" = U1."id") + WHERE (U1."corpus_id" = '{corpus_id}'::uuid + AND NOT (U0."worker_version_id" IS NULL) + AND U0."element_id" = '{element_id}'::uuid + AND U0."configuration_id" IS NULL)) diff --git a/arkindex/training/models.py b/arkindex/training/models.py index 10d7f20066be94ba0857870d92ead6696a187081..1a2ae3e955e64635eb1332cdfbd50e68c8c72860 100644 --- a/arkindex/training/models.py +++ b/arkindex/training/models.py @@ -162,6 +162,9 @@ class ModelVersion(S3FileMixin, IndexableModel): """ return sha256((str(self.id) + self.hash + settings.SECRET_KEY).encode('utf-8')).hexdigest() + def __str__(self): + return f'{self.tag} ({self.hash[:8]}…)' + class MetricMode(Enum): Series = 'series'