Skip to content
Snippets Groups Projects
Commit b4969adf authored by ml bonhomme's avatar ml bonhomme :bee:
Browse files

fix delete corpus

parent 9467a95d
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !2256. Comments created here will be created in the context of that merge request.
......@@ -2,6 +2,8 @@ SELECT
dataset.id,
dataset.name,
dataset.state,
ARRAY_TO_STRING(dataset.sets, ',', '')
string_agg(datasetset.name, ',')
FROM training_dataset dataset
INNER JOIN training_datasetset datasetset ON datasetset.dataset_id = dataset.id
WHERE dataset.corpus_id = '{corpus_id}'::uuid
GROUP BY dataset.id
SELECT
dataset_element.id,
dataset_element.element_id,
dataset_element.dataset_id,
dataset_element.set
dataset_set.dataset_id,
dataset_set.name
FROM training_datasetelement dataset_element
INNER JOIN training_dataset dataset ON (dataset_element.dataset_id = dataset.id)
INNER JOIN training_datasetset dataset_set ON (dataset_element.set_id = dataset_set.id)
INNER JOIN training_dataset dataset ON (dataset_set.dataset_id = dataset.id)
WHERE dataset.corpus_id = '{corpus_id}'::uuid
......@@ -24,7 +24,7 @@ from arkindex.documents.models import (
)
from arkindex.ponos.models import Task
from arkindex.process.models import Process, ProcessDataset, ProcessElement, WorkerActivity, WorkerRun
from arkindex.training.models import DatasetElement
from arkindex.training.models import DatasetElement, DatasetSet
from arkindex.users.models import User
logger = logging.getLogger(__name__)
......@@ -73,7 +73,8 @@ def corpus_delete(corpus_id: str) -> None:
# ProcessDataset M2M
ProcessDataset.objects.filter(dataset__corpus_id=corpus_id),
ProcessDataset.objects.filter(process__corpus_id=corpus_id),
DatasetElement.objects.filter(dataset__corpus_id=corpus_id),
DatasetElement.objects.filter(set__dataset__corpus_id=corpus_id),
DatasetSet.objects.filter(dataset__corpus_id=corpus_id),
corpus.datasets.all(),
# Delete the hidden M2M task parents table
Task.parents.through.objects.filter(from_task__process__corpus_id=corpus_id),
......
......@@ -5,7 +5,7 @@ from arkindex.documents.tasks import corpus_delete
from arkindex.ponos.models import Farm, State, Task
from arkindex.process.models import CorpusWorkerVersion, ProcessDataset, ProcessMode, Repository, WorkerVersion
from arkindex.project.tests import FixtureTestCase, force_constraints_immediate
from arkindex.training.models import Dataset
from arkindex.training.models import Dataset, DatasetSet
class TestDeleteCorpus(FixtureTestCase):
......@@ -114,18 +114,25 @@ class TestDeleteCorpus(FixtureTestCase):
cls.corpus2 = Corpus.objects.create(name="Other corpus")
dataset1 = Dataset.objects.get(name="First Dataset")
dataset1.dataset_elements.create(element=element, set="test")
test_set_1 = dataset1.sets.get(name="test")
test_set_1.set_elements.create(element=element)
cls.dataset2 = Dataset.objects.create(name="Dead Sea Scrolls", description="How to trigger a Third Impact", creator=cls.user, corpus=cls.corpus2)
DatasetSet.objects.bulk_create(
DatasetSet(
dataset=cls.dataset2,
name=set_name
) for set_name in ["test", "training", "validation"]
)
# Process on cls.corpus and with a dataset from cls.corpus
dataset_process1 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=dataset1.sets)
ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True)))
# Process on cls.corpus with a dataset from another corpus
dataset_process2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=dataset1.sets)
ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=cls.dataset2.sets)
ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True)))
ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True)))
# Process on another corpus with a dataset from another corpus and none from cls.corpus
cls.dataset_process3 = cls.corpus2.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=cls.dataset2.sets)
ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True)))
cls.rev = cls.repo.revisions.create(
hash="42",
......
......@@ -25,6 +25,7 @@ from arkindex.images.models import Image, ImageServer
from arkindex.ponos.models import Artifact
from arkindex.process.models import Repository, WorkerType, WorkerVersion, WorkerVersionState
from arkindex.project.tests import FixtureTestCase
from arkindex.training.models import DatasetElement
TABLE_NAMES = {
"export_version",
......@@ -132,8 +133,9 @@ class TestExport(FixtureTestCase):
)
dataset = self.corpus.datasets.get(name="First Dataset")
dataset.dataset_elements.create(element=element, set="train")
dataset.dataset_elements.create(element=element, set="validation")
_, train_set, validation_set = dataset.sets.all().order_by("name")
train_set.set_elements.create(element=element)
validation_set.set_elements.create(element=element)
export = self.corpus.exports.create(user=self.user)
......@@ -489,7 +491,7 @@ class TestExport(FixtureTestCase):
(
str(dataset.id),
dataset.name,
",".join(dataset.sets),
",".join(list(dataset.sets.values_list("name", flat=True))),
) for dataset in self.corpus.datasets.all()
]
)
......@@ -507,9 +509,9 @@ class TestExport(FixtureTestCase):
(
str(dataset_element.id),
str(dataset_element.element_id),
str(dataset_element.dataset_id),
dataset_element.set
) for dataset_element in dataset.dataset_elements.all()
str(dataset_element.set.dataset_id),
dataset_element.set.name
) for dataset_element in DatasetElement.objects.filter(set__dataset_id=dataset.id)
]
)
......
......@@ -183,7 +183,8 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase):
job_mock.return_value.user_id = self.user.id
self.page1.worker_version = self.version
self.page1.save()
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test")
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(element=self.page1)
self.user.selected_elements.set([self.page1])
selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id)
......
......@@ -6,7 +6,7 @@ from arkindex.documents.models import Entity, EntityType, MLClass, Transcription
from arkindex.documents.tasks import worker_results_delete
from arkindex.process.models import ProcessMode, WorkerVersion
from arkindex.project.tests import FixtureTestCase
from arkindex.training.models import Dataset, Model, ModelVersionState
from arkindex.training.models import DatasetSet, Model, ModelVersionState
class TestDeleteWorkerResults(FixtureTestCase):
......@@ -270,7 +270,7 @@ class TestDeleteWorkerResults(FixtureTestCase):
self.page1.worker_run = self.worker_run_1
self.page1.worker_version = self.version_1
self.page1.save()
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test")
DatasetSet.objects.get(name="test", dataset__name="First Dataset").set_elements.create(element=self.page1)
worker_results_delete(corpus_id=self.corpus.id)
# Prevent delaying constraints check at end of the test transaction
......
......@@ -148,7 +148,8 @@ class TestDestroyElements(FixtureAPITestCase):
"""
An element cannot be deleted via the API if linked to a dataset
"""
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.vol, set="test")
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(element=self.vol)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.delete(reverse("api:element-retrieve", kwargs={"pk": str(self.vol.id)}))
......@@ -179,9 +180,9 @@ class TestDestroyElements(FixtureAPITestCase):
"""
Elements that are part of a dataset cannot be deleted
"""
Dataset.objects.get(name="First Dataset").dataset_elements.create(
element=Element.objects.get_descending(self.vol.id).first(),
set="test",
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(
element=Element.objects.get_descending(self.vol.id).first()
)
Element.objects.filter(id=self.vol.id).trash()
......
......@@ -185,6 +185,15 @@ FROM "training_datasetelement"
WHERE "training_datasetelement"."id" IN
(SELECT U0."id"
FROM "training_datasetelement" U0
INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id")
INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id")
WHERE U2."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_datasetset"
WHERE "training_datasetset"."id" IN
(SELECT U0."id"
FROM "training_datasetset" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
......
......@@ -189,8 +189,17 @@ FROM "training_datasetelement"
WHERE "training_datasetelement"."id" IN
(SELECT U0."id"
FROM "training_datasetelement" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id")
INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id")
WHERE U2."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_datasetset"
WHERE "training_datasetset"."id" IN
(SELECT U0."id"
FROM "training_datasetset" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_dataset"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment