From 879db1123709026d0cd5e70afc96f76334b95b23 Mon Sep 17 00:00:00 2001 From: mlbonhomme <bonhomme@teklia.com> Date: Wed, 6 Mar 2024 11:52:41 +0100 Subject: [PATCH] update models + data migration --- .../migrations/0007_datasetset_model.py | 101 ++++++++++++++++++ arkindex/training/models.py | 45 ++++---- 2 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 arkindex/training/migrations/0007_datasetset_model.py diff --git a/arkindex/training/migrations/0007_datasetset_model.py b/arkindex/training/migrations/0007_datasetset_model.py new file mode 100644 index 0000000000..94e1b35bfc --- /dev/null +++ b/arkindex/training/migrations/0007_datasetset_model.py @@ -0,0 +1,101 @@ +# Generated by Django 4.1.7 on 2024-03-05 16:28 + +import uuid + +import django.core.validators +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("training", "0006_model_archived"), + ] + + operations = [ + migrations.CreateModel( + name="DatasetSet", + fields=[ + ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ("name", models.CharField(max_length=50, validators=[django.core.validators.MinLengthValidator(1)])), + ("dataset", models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="tmp_sets", to="training.dataset")), + ], + ), + migrations.AddConstraint( + model_name="datasetset", + constraint=models.UniqueConstraint(fields=("dataset", "name"), name="unique_dataset_sets"), + ), + migrations.AddField( + model_name="datasetelement", + name="set_id", + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name="dataset_elements", to="training.datasetset"), + ), + migrations.RunSQL( + """ + INSERT INTO training_datasetset (id, dataset_id, name) + SELECT uuid_generate_v4(), ds.id, ds.set + FROM ( + SELECT DISTINCT id, unnest(sets) AS set + FROM training_dataset + ) ds + """, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RunSQL( + """ + INSERT INTO training_datasetset (id, dataset_id, name) + SELECT uuid_generate_v4(), sets.dataset_id, sets.set + FROM ( + SELECT DISTINCT dataset_id, set + FROM training_datasetelement + ) sets + ON CONFLICT DO NOTHING + """, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RunSQL( + """ + UPDATE training_datasetelement de + SET set_id_id = ds.id + FROM training_datasetset ds + WHERE de.dataset_id = ds.dataset_id + """, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RemoveConstraint( + model_name="datasetelement", + name="unique_dataset_elements", + ), + migrations.RemoveField( + model_name="datasetelement", + name="dataset" + ), + migrations.RemoveField( + model_name="datasetelement", + name="set" + ), + migrations.RenameField( + model_name="datasetelement", + old_name="set_id", + new_name="set" + ), + migrations.AlterField( + model_name="datasetelement", + name="set", + field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="dataset_elements", to="training.datasetset"), + ), + migrations.RemoveField( + model_name="dataset", + name="sets" + ), + migrations.RemoveField( + model_name="dataset", + name="elements", + ), + migrations.AlterField( + model_name="datasetset", + name="dataset", + field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="sets", to="training.dataset"), + ), + ] diff --git a/arkindex/training/models.py b/arkindex/training/models.py index adf03e8193..363393d2b8 100644 --- a/arkindex/training/models.py +++ b/arkindex/training/models.py @@ -4,7 +4,6 @@ from hashlib import sha256 from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation -from django.contrib.postgres.fields import ArrayField from django.core.exceptions import ValidationError from django.core.validators import MinLengthValidator from django.db import models @@ -278,21 +277,6 @@ class Dataset(models.Model): description = models.TextField(validators=[MinLengthValidator(1)]) state = EnumField(DatasetState, default=DatasetState.Open, max_length=50) - sets = ArrayField( - models.CharField(max_length=50, validators=[MinLengthValidator(1)]), - validators=[ - MinLengthValidator(1), - validate_unique_set_names, - ], - default=default_sets, - ) - - elements = models.ManyToManyField( - "documents.Element", - through="training.DatasetElement", - related_name="datasets", - ) - class Meta: constraints = [ models.UniqueConstraint( @@ -305,24 +289,33 @@ class Dataset(models.Model): return self.name -class DatasetElement(models.Model): +class DatasetSet(models.Model): id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) + name = models.CharField(max_length=50, validators=[MinLengthValidator(1)]) dataset = models.ForeignKey( Dataset, - related_name="dataset_elements", - on_delete=models.DO_NOTHING, - ) - element = models.ForeignKey( - "documents.Element", - related_name="dataset_elements", + related_name="sets", on_delete=models.DO_NOTHING, ) - set = models.CharField(max_length=50, validators=[MinLengthValidator(1)]) class Meta: constraints = [ models.UniqueConstraint( - fields=["dataset", "element", "set"], - name="unique_dataset_elements", + fields=["dataset", "name"], + name="unique_dataset_sets", ), ] + + +class DatasetElement(models.Model): + id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) + element = models.ForeignKey( + "documents.Element", + related_name="dataset_elements", + on_delete=models.DO_NOTHING, + ) + set = models.ForeignKey( + DatasetSet, + related_name="dataset_elements", + on_delete=models.DO_NOTHING, + ) -- GitLab