From c7d1e95101b82b72d006ffb9f27607b946d4d83f Mon Sep 17 00:00:00 2001 From: manon blanco <blanco@teklia.com> Date: Thu, 4 Nov 2021 11:59:02 +0000 Subject: [PATCH] Build WorkerConfiguration model and links --- arkindex/dataimport/admin.py | 24 ++++++- .../migrations/0039_worker_configuration.py | 65 +++++++++++++++++++ arkindex/dataimport/models.py | 45 +++++++++++-- arkindex/dataimport/serializers/imports.py | 2 +- arkindex/dataimport/signals.py | 11 +++- arkindex/dataimport/tests/test_workerruns.py | 4 +- 6 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 arkindex/dataimport/migrations/0039_worker_configuration.py diff --git a/arkindex/dataimport/admin.py b/arkindex/dataimport/admin.py index 47a93e390a..9e3efcea10 100644 --- a/arkindex/dataimport/admin.py +++ b/arkindex/dataimport/admin.py @@ -2,7 +2,15 @@ from django.contrib import admin from django.db.models import Max from enumfields.admin import EnumFieldListFilter -from arkindex.dataimport.models import DataFile, DataImport, Repository, Revision, Worker, WorkerVersion +from arkindex.dataimport.models import ( + DataFile, + DataImport, + Repository, + Revision, + Worker, + WorkerConfiguration, + WorkerVersion, +) from arkindex.users.admin import GroupMembershipInline, UserMembershipInline @@ -85,11 +93,16 @@ class WorkerVersionInline(admin.StackedInline): return super().get_queryset(*args, **kwargs).prefetch_related('worker', 'revision') +class WorkerConfigurationInline(admin.StackedInline): + model = WorkerConfiguration + readonly_fields = ('configuration_hash', ) + + class WorkerAdmin(admin.ModelAdmin): list_display = ('id', 'name', 'slug', 'type', 'repository') field = ('id', 'name', 'slug', 'type', 'repository') readonly_fields = ('id', ) - inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline] + inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline, WorkerConfigurationInline] class WorkerVersionAdmin(admin.ModelAdmin): @@ -99,9 +112,16 @@ class WorkerVersionAdmin(admin.ModelAdmin): readonly_fields = ('id', ) +class WorkerConfigurationAdmin(admin.ModelAdmin): + list_display = ('id', 'name', 'worker') + list_filter = ('worker', ) + readonly_fields = ('id', 'configuration_hash') + + admin.site.register(DataImport, DataImportAdmin) admin.site.register(DataFile, DataFileAdmin) admin.site.register(Revision, RevisionAdmin) admin.site.register(Repository, RepositoryAdmin) admin.site.register(Worker, WorkerAdmin) admin.site.register(WorkerVersion, WorkerVersionAdmin) +admin.site.register(WorkerConfiguration, WorkerConfigurationAdmin) diff --git a/arkindex/dataimport/migrations/0039_worker_configuration.py b/arkindex/dataimport/migrations/0039_worker_configuration.py new file mode 100644 index 0000000000..6dcdee4016 --- /dev/null +++ b/arkindex/dataimport/migrations/0039_worker_configuration.py @@ -0,0 +1,65 @@ +# Generated by Django 3.2.5 on 2021-11-04 09:42 + +import uuid + +import django.db.models.deletion +from django.db import migrations, models + +import arkindex.project.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0038_dataimport_use_gpu'), + ] + + operations = [ + migrations.CreateModel( + name='WorkerConfiguration', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('created', models.DateTimeField(auto_now_add=True)), + ('updated', models.DateTimeField(auto_now=True)), + ('name', models.CharField(max_length=250)), + ('configuration', models.JSONField(blank=True, default=dict)), + ('configuration_hash', arkindex.project.fields.MD5HashField(max_length=32)), + ], + ), + migrations.RemoveConstraint( + model_name='workerrun', + name='worker_run_configuration_objects', + ), + migrations.RenameField( + model_name='workerrun', + old_name='configuration', + new_name='old_configuration', + ), + migrations.AddConstraint( + model_name='workerrun', + constraint=models.CheckConstraint(check=models.Q(('old_configuration__typeof', 'object')), name='worker_run_old_configuration_objects'), + ), + migrations.AddField( + model_name='workerconfiguration', + name='worker', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='configurations', to='dataimport.worker'), + ), + migrations.AddField( + model_name='workeractivity', + name='configuration', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_activities', to='dataimport.workerconfiguration'), + ), + migrations.AddField( + model_name='workerrun', + name='configuration', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_runs', to='dataimport.workerconfiguration'), + ), + migrations.AddConstraint( + model_name='workerconfiguration', + constraint=models.CheckConstraint(check=models.Q(('configuration__typeof', 'object')), name='worker_configuration_configuration_objects'), + ), + migrations.AlterUniqueTogether( + name='workerconfiguration', + unique_together={('worker', 'name'), ('worker', 'configuration_hash')}, + ), + ] diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 93441cd618..a614f52413 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -18,7 +18,7 @@ from arkindex.dataimport.providers import get_provider, git_providers from arkindex.dataimport.utils import get_default_farm_id from arkindex.documents.models import ClassificationState, Element from arkindex.project.aws import S3FileMixin, S3FileStatus -from arkindex.project.fields import ArrayField +from arkindex.project.fields import ArrayField, MD5HashField from arkindex.project.models import IndexableModel from ponos.models import Artifact, State, Workflow @@ -601,19 +601,49 @@ class WorkerVersion(models.Model): return self.configuration['docker'].get('command') +class WorkerConfiguration(IndexableModel): + name = models.CharField(max_length=250) + configuration = models.JSONField(default=dict, blank=True) + configuration_hash = MD5HashField() + worker = models.ForeignKey( + Worker, + on_delete=models.CASCADE, + related_name='configurations', + ) + + class Meta: + unique_together = ( + ('worker', 'configuration_hash'), + ('worker', 'name') + ) + constraints = [ + models.CheckConstraint( + check=models.Q(configuration__typeof='object'), + name='worker_configuration_configuration_objects', + ) + ] + + class WorkerRun(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) dataimport = models.ForeignKey('dataimport.DataImport', on_delete=models.CASCADE, related_name='worker_runs') version = models.ForeignKey('dataimport.WorkerVersion', on_delete=models.CASCADE, related_name='worker_runs') parents = ArrayField(models.UUIDField()) - configuration = models.JSONField(default=dict) + old_configuration = models.JSONField(default=dict) + configuration = models.ForeignKey( + WorkerConfiguration, + related_name='worker_runs', + on_delete=models.SET_NULL, + null=True, + blank=True, + ) class Meta: unique_together = (('version', 'dataimport'),) constraints = [ models.CheckConstraint( - check=models.Q(configuration__typeof='object'), - name='worker_run_configuration_objects', + check=models.Q(old_configuration__typeof='object'), + name='worker_run_old_configuration_objects', ) ] @@ -692,6 +722,13 @@ class WorkerActivity(IndexableModel): null=True, blank=True, ) + configuration = models.ForeignKey( + WorkerConfiguration, + related_name='worker_activities', + on_delete=models.SET_NULL, + null=True, + blank=True, + ) # Specific WorkerActivity manager objects = ActivityManager() diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index dba792affb..a71de604a5 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -323,7 +323,7 @@ class WorkerRunSerializer(serializers.ModelSerializer): # Serialize worker with its basic informations worker = WorkerLightSerializer(source='version.worker', read_only=True) # A DictField will require valid dicts, but without a child= argument, it will allow any value - configuration = serializers.DictField(allow_empty=True, default={}) + configuration = serializers.DictField(source='old_configuration', allow_empty=True, default={}) class Meta: model = WorkerRun diff --git a/arkindex/dataimport/signals.py b/arkindex/dataimport/signals.py index be1b8bb260..362549955c 100644 --- a/arkindex/dataimport/signals.py +++ b/arkindex/dataimport/signals.py @@ -1,8 +1,11 @@ +import json +from hashlib import md5 + from django.db.models.signals import pre_save from django.dispatch import receiver from rest_framework.exceptions import ValidationError -from arkindex.dataimport.models import WorkerRun +from arkindex.dataimport.models import WorkerConfiguration, WorkerRun def _list_ancestors(graph, parents): @@ -39,3 +42,9 @@ def check_parents(sender, instance, **kwargs): ancestors = _list_ancestors(graph, instance.parents) if instance.id in ancestors: raise ValidationError(f"Can't add or update WorkerRun {instance.id} because parents field isn't properly defined. It would create a cycle.") + + +@receiver(pre_save, sender=WorkerConfiguration) +def update_configuration_hash(sender, instance, **kwargs): + configuration_json = json.dumps(instance.configuration, sort_keys=True).encode('utf-8') + instance.configuration_hash = md5(configuration_json).hexdigest() diff --git a/arkindex/dataimport/tests/test_workerruns.py b/arkindex/dataimport/tests/test_workerruns.py index d6a37b5b09..5a66da262c 100644 --- a/arkindex/dataimport/tests/test_workerruns.py +++ b/arkindex/dataimport/tests/test_workerruns.py @@ -437,7 +437,7 @@ class TestWorkerRuns(FixtureAPITestCase): def test_update_run_configuration(self): self.client.force_login(self.user) - self.assertDictEqual(self.run_1.configuration, {}) + self.assertDictEqual(self.run_1.old_configuration, {}) with self.assertNumQueries(8): response = self.client.patch( reverse('api:worker-run-details', kwargs={'pk': self.run_1.id}), @@ -453,7 +453,7 @@ class TestWorkerRuns(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.run_1.refresh_from_db() - self.assertDictEqual(self.run_1.configuration, { + self.assertDictEqual(self.run_1.old_configuration, { 'a': 'b', 'c': { 'd': 42 -- GitLab