Skip to content
Snippets Groups Projects
Verified Commit 7a216aab authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

Split and add data migration

parent e1ec1ab9
No related branches found
No related tags found
No related merge requests found
# Generated by Django 3.1.3 on 2020-11-30 10:40
import os
from django.db import migrations, models
from arkindex.dataimport.models import RepositoryType
def migrate_data_sources(apps, schema_editor):
repo_prefix = os.environ.get('REPOSITORY_PREFIX', 'https://gitlab.com/teklia/workers')
DataSource = apps.get_model('documents', 'DataSource')
Element = apps.get_model('documents', 'Element')
Classification = apps.get_model('documents', 'Classification')
Transcription = apps.get_model('documents', 'Transcription')
Entity = apps.get_model('documents', 'Entity')
Repository = apps.get_model('dataimport', 'Repository')
Worker = apps.get_model('dataimport', 'Worker')
# Start by just removing the manual source, since a manual worker version is just None
print('Migrating manual sources…')
Element.objects.filter(source__slug='manual').update(source=None)
Classification.objects.filter(source__slug='manual').update(source=None)
Transcription.objects.filter(source__slug='manual').update(source=None)
Entity.objects.filter(source__slug='manual').update(source=None)
# Only migrate sources that have related objects
to_migrate = DataSource.objects.filter(
id__in=Element.objects.values('source_id').union(
Classification.objects.values('source_id')
).union(
Transcription.objects.values('source_id')
).union(
Entity.objects.values('source_id')
)
)
for source in to_migrate:
print(f'Migrating {source.name} {source.revision} ({source.id})…')
repo, _ = Repository.objects.get_or_create(
type=RepositoryType.Worker,
url=f'{repo_prefix}/{source.slug}',
defaults={
# This is supposed to be unique
'hook_token': str(source.id),
},
)
worker, _ = Worker.objects.get_or_create(
repository=repo,
slug=source.slug,
defaults={
'name': source.name,
'type': str(source.type),
}
)
revision, _ = repo.revisions.get_or_create(
hash='0' * 32,
message='Migrated DataSource',
author='Arkindex',
)
# There can be multiple DataSources with the same slug and revision,
# but with a different type, so we have to use get_or_create.
version, _ = worker.workerversion_set.get_or_create(
revision=revision,
defaults={
'configuration': {}
},
)
source.elements.update(source=None, worker_version=version)
source.classifications.update(source=None, worker_version=version)
source.transcriptions.update(source=None, worker_version=version)
source.entities.update(source=None, worker_version=version)
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_remove_transcription_type'),
]
operations = [
migrations.AlterField(
model_name='DataSource',
name='type',
field=models.CharField(max_length=50),
),
migrations.RemoveConstraint(
model_name='classification',
name='classification_unique_manual',
),
migrations.RemoveConstraint(
model_name='classification',
name='classification_unique_worker_version',
),
migrations.RemoveConstraint(
model_name='transcription',
name='transcription_source_not_worker_version',
),
migrations.RunPython(
code=migrate_data_sources,
reverse_code=migrations.RunPython.noop,
elidable=True,
),
# Deletion happens in another migration, since updating data then trying to update
# the structure causes errors with 'pending trigger events'
]
# Generated by Django 3.1.3 on 2020-11-30 10:40
# Generated by Django 3.1.3 on 2020-12-10 13:57
from django.db import migrations, models
......@@ -6,7 +6,7 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_remove_transcription_type'),
('documents', '0024_migrate_datasource'),
]
operations = [
......@@ -14,18 +14,6 @@ class Migration(migrations.Migration):
name='datasource',
unique_together=None,
),
migrations.RemoveConstraint(
model_name='classification',
name='classification_unique_manual',
),
migrations.RemoveConstraint(
model_name='classification',
name='classification_unique_worker_version',
),
migrations.RemoveConstraint(
model_name='transcription',
name='transcription_source_not_worker_version',
),
migrations.RemoveField(
model_name='classification',
name='source',
......@@ -47,7 +35,7 @@ class Migration(migrations.Migration):
constraint=models.UniqueConstraint(
condition=models.Q(worker_version_id__isnull=True),
fields=('element', 'ml_class'),
name='classification_unique_manual'
name='classification_unique_manual',
),
),
migrations.AddConstraint(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment