Skip to content
Snippets Groups Projects
Commit 2217829b authored by ml bonhomme's avatar ml bonhomme :bee:
Browse files

fix delete corpus

parent 0b9d79bf
No related branches found
No related tags found
1 merge request!2256New DatasetSet model
......@@ -2,6 +2,8 @@ SELECT
dataset.id,
dataset.name,
dataset.state,
ARRAY_TO_STRING(dataset.sets, ',', '')
string_agg(datasetset.name, ',')
FROM training_dataset dataset
INNER JOIN training_datasetset datasetset ON datasetset.dataset_id = dataset.id
WHERE dataset.corpus_id = '{corpus_id}'::uuid
GROUP BY dataset.id
SELECT
dataset_element.id,
dataset_element.element_id,
dataset_element.dataset_id,
dataset_element.set
dataset_set.dataset_id,
dataset_set.name
FROM training_datasetelement dataset_element
INNER JOIN training_dataset dataset ON (dataset_element.dataset_id = dataset.id)
INNER JOIN training_datasetset dataset_set ON (dataset_element.set_id = dataset_set.id)
INNER JOIN training_dataset dataset ON (dataset_set.dataset_id = dataset.id)
WHERE dataset.corpus_id = '{corpus_id}'::uuid
......@@ -24,7 +24,7 @@ from arkindex.documents.models import (
)
from arkindex.ponos.models import Task
from arkindex.process.models import Process, ProcessDataset, ProcessElement, WorkerActivity, WorkerRun
from arkindex.training.models import DatasetElement
from arkindex.training.models import DatasetElement, DatasetSet
from arkindex.users.models import User
logger = logging.getLogger(__name__)
......@@ -73,7 +73,8 @@ def corpus_delete(corpus_id: str) -> None:
# ProcessDataset M2M
ProcessDataset.objects.filter(dataset__corpus_id=corpus_id),
ProcessDataset.objects.filter(process__corpus_id=corpus_id),
DatasetElement.objects.filter(dataset__corpus_id=corpus_id),
DatasetElement.objects.filter(set__dataset__corpus_id=corpus_id),
DatasetSet.objects.filter(dataset__corpus_id=corpus_id),
corpus.datasets.all(),
# Delete the hidden M2M task parents table
Task.parents.through.objects.filter(from_task__process__corpus_id=corpus_id),
......
......@@ -5,7 +5,7 @@ from arkindex.documents.tasks import corpus_delete
from arkindex.ponos.models import Farm, State, Task
from arkindex.process.models import CorpusWorkerVersion, Process, ProcessDataset, ProcessMode, Repository, WorkerVersion
from arkindex.project.tests import FixtureTestCase, force_constraints_immediate
from arkindex.training.models import Dataset
from arkindex.training.models import Dataset, DatasetSet
class TestDeleteCorpus(FixtureTestCase):
......@@ -114,18 +114,25 @@ class TestDeleteCorpus(FixtureTestCase):
cls.corpus2 = Corpus.objects.create(name="Other corpus")
dataset1 = Dataset.objects.get(name="First Dataset")
dataset1.dataset_elements.create(element=element, set="test")
test_set_1 = dataset1.sets.get(name="test")
test_set_1.set_elements.create(element=element)
cls.dataset2 = Dataset.objects.create(name="Dead Sea Scrolls", description="How to trigger a Third Impact", creator=cls.user, corpus=cls.corpus2)
DatasetSet.objects.bulk_create(
DatasetSet(
dataset=cls.dataset2,
name=set_name
) for set_name in ["test", "training", "validation"]
)
# Process on cls.corpus and with a dataset from cls.corpus
dataset_process1 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=dataset1.sets)
ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True)))
# Process on cls.corpus with a dataset from another corpus
dataset_process2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=dataset1.sets)
ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=cls.dataset2.sets)
ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True)))
ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True)))
# Process on another corpus with a dataset from another corpus and none from cls.corpus
cls.dataset_process3 = cls.corpus2.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=cls.dataset2.sets)
ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True)))
cls.rev = cls.repo.revisions.create(
hash="42",
......
......@@ -24,6 +24,7 @@ from arkindex.documents.models import (
from arkindex.images.models import Image, ImageServer
from arkindex.process.models import Repository, WorkerType, WorkerVersion, WorkerVersionState
from arkindex.project.tests import FixtureTestCase
from arkindex.training.models import DatasetElement
TABLE_NAMES = {
"export_version",
......@@ -131,8 +132,9 @@ class TestExport(FixtureTestCase):
)
dataset = self.corpus.datasets.get(name="First Dataset")
dataset.dataset_elements.create(element=element, set="train")
dataset.dataset_elements.create(element=element, set="validation")
_, train_set, validation_set = dataset.sets.all().order_by("name")
train_set.set_elements.create(element=element)
validation_set.set_elements.create(element=element)
export = self.corpus.exports.create(user=self.user)
......@@ -488,7 +490,7 @@ class TestExport(FixtureTestCase):
(
str(dataset.id),
dataset.name,
",".join(dataset.sets),
",".join(list(dataset.sets.values_list("name", flat=True))),
) for dataset in self.corpus.datasets.all()
]
)
......@@ -506,9 +508,9 @@ class TestExport(FixtureTestCase):
(
str(dataset_element.id),
str(dataset_element.element_id),
str(dataset_element.dataset_id),
dataset_element.set
) for dataset_element in dataset.dataset_elements.all()
str(dataset_element.set.dataset_id),
dataset_element.set.name
) for dataset_element in DatasetElement.objects.filter(set__dataset_id=dataset.id)
]
)
......
......@@ -183,7 +183,8 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase):
job_mock.return_value.user_id = self.user.id
self.page1.worker_version = self.version
self.page1.save()
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test")
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(element=self.page1)
self.user.selected_elements.set([self.page1])
selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id)
......
......@@ -6,7 +6,7 @@ from arkindex.documents.models import Entity, EntityType, MLClass, Transcription
from arkindex.documents.tasks import worker_results_delete
from arkindex.process.models import ProcessMode, WorkerVersion
from arkindex.project.tests import FixtureTestCase
from arkindex.training.models import Dataset, Model, ModelVersionState
from arkindex.training.models import DatasetSet, Model, ModelVersionState
class TestDeleteWorkerResults(FixtureTestCase):
......@@ -270,7 +270,7 @@ class TestDeleteWorkerResults(FixtureTestCase):
self.page1.worker_run = self.worker_run_1
self.page1.worker_version = self.version_1
self.page1.save()
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test")
DatasetSet.objects.get(name="test", dataset__name="First Dataset").set_elements.create(element=self.page1)
worker_results_delete(corpus_id=self.corpus.id)
# Prevent delaying constraints check at end of the test transaction
......
......@@ -148,7 +148,8 @@ class TestDestroyElements(FixtureAPITestCase):
"""
An element cannot be deleted via the API if linked to a dataset
"""
Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.vol, set="test")
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(element=self.vol)
self.client.force_login(self.user)
with self.assertNumQueries(3):
response = self.client.delete(reverse("api:element-retrieve", kwargs={"pk": str(self.vol.id)}))
......@@ -179,9 +180,9 @@ class TestDestroyElements(FixtureAPITestCase):
"""
Elements that are part of a dataset cannot be deleted
"""
Dataset.objects.get(name="First Dataset").dataset_elements.create(
element=Element.objects.get_descending(self.vol.id).first(),
set="test",
dataset = Dataset.objects.get(name="First Dataset")
dataset.sets.get(name="test").set_elements.create(
element=Element.objects.get_descending(self.vol.id).first()
)
Element.objects.filter(id=self.vol.id).trash()
......
......@@ -185,6 +185,15 @@ FROM "training_datasetelement"
WHERE "training_datasetelement"."id" IN
(SELECT U0."id"
FROM "training_datasetelement" U0
INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id")
INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id")
WHERE U2."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_datasetset"
WHERE "training_datasetset"."id" IN
(SELECT U0."id"
FROM "training_datasetset" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
......
......@@ -189,8 +189,17 @@ FROM "training_datasetelement"
WHERE "training_datasetelement"."id" IN
(SELECT U0."id"
FROM "training_datasetelement" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id")
INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id")
WHERE U2."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_datasetset"
WHERE "training_datasetset"."id" IN
(SELECT U0."id"
FROM "training_datasetset" U0
INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id")
WHERE U1."corpus_id" = '{corpus_id}'::uuid);
DELETE
FROM "training_dataset"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment