From c7d1e95101b82b72d006ffb9f27607b946d4d83f Mon Sep 17 00:00:00 2001
From: manon blanco <blanco@teklia.com>
Date: Thu, 4 Nov 2021 11:59:02 +0000
Subject: [PATCH] Build WorkerConfiguration model and links

---
 arkindex/dataimport/admin.py                  | 24 ++++++-
 .../migrations/0039_worker_configuration.py   | 65 +++++++++++++++++++
 arkindex/dataimport/models.py                 | 45 +++++++++++--
 arkindex/dataimport/serializers/imports.py    |  2 +-
 arkindex/dataimport/signals.py                | 11 +++-
 arkindex/dataimport/tests/test_workerruns.py  |  4 +-
 6 files changed, 141 insertions(+), 10 deletions(-)
 create mode 100644 arkindex/dataimport/migrations/0039_worker_configuration.py

diff --git a/arkindex/dataimport/admin.py b/arkindex/dataimport/admin.py
index 47a93e390a..9e3efcea10 100644
--- a/arkindex/dataimport/admin.py
+++ b/arkindex/dataimport/admin.py
@@ -2,7 +2,15 @@ from django.contrib import admin
 from django.db.models import Max
 from enumfields.admin import EnumFieldListFilter
 
-from arkindex.dataimport.models import DataFile, DataImport, Repository, Revision, Worker, WorkerVersion
+from arkindex.dataimport.models import (
+    DataFile,
+    DataImport,
+    Repository,
+    Revision,
+    Worker,
+    WorkerConfiguration,
+    WorkerVersion,
+)
 from arkindex.users.admin import GroupMembershipInline, UserMembershipInline
 
 
@@ -85,11 +93,16 @@ class WorkerVersionInline(admin.StackedInline):
         return super().get_queryset(*args, **kwargs).prefetch_related('worker', 'revision')
 
 
+class WorkerConfigurationInline(admin.StackedInline):
+    model = WorkerConfiguration
+    readonly_fields = ('configuration_hash', )
+
+
 class WorkerAdmin(admin.ModelAdmin):
     list_display = ('id', 'name', 'slug', 'type', 'repository')
     field = ('id', 'name', 'slug', 'type', 'repository')
     readonly_fields = ('id', )
-    inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline]
+    inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline, WorkerConfigurationInline]
 
 
 class WorkerVersionAdmin(admin.ModelAdmin):
@@ -99,9 +112,16 @@ class WorkerVersionAdmin(admin.ModelAdmin):
     readonly_fields = ('id', )
 
 
+class WorkerConfigurationAdmin(admin.ModelAdmin):
+    list_display = ('id', 'name', 'worker')
+    list_filter = ('worker', )
+    readonly_fields = ('id', 'configuration_hash')
+
+
 admin.site.register(DataImport, DataImportAdmin)
 admin.site.register(DataFile, DataFileAdmin)
 admin.site.register(Revision, RevisionAdmin)
 admin.site.register(Repository, RepositoryAdmin)
 admin.site.register(Worker, WorkerAdmin)
 admin.site.register(WorkerVersion, WorkerVersionAdmin)
+admin.site.register(WorkerConfiguration, WorkerConfigurationAdmin)
diff --git a/arkindex/dataimport/migrations/0039_worker_configuration.py b/arkindex/dataimport/migrations/0039_worker_configuration.py
new file mode 100644
index 0000000000..6dcdee4016
--- /dev/null
+++ b/arkindex/dataimport/migrations/0039_worker_configuration.py
@@ -0,0 +1,65 @@
+# Generated by Django 3.2.5 on 2021-11-04 09:42
+
+import uuid
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+import arkindex.project.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('dataimport', '0038_dataimport_use_gpu'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='WorkerConfiguration',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('created', models.DateTimeField(auto_now_add=True)),
+                ('updated', models.DateTimeField(auto_now=True)),
+                ('name', models.CharField(max_length=250)),
+                ('configuration', models.JSONField(blank=True, default=dict)),
+                ('configuration_hash', arkindex.project.fields.MD5HashField(max_length=32)),
+            ],
+        ),
+        migrations.RemoveConstraint(
+            model_name='workerrun',
+            name='worker_run_configuration_objects',
+        ),
+        migrations.RenameField(
+            model_name='workerrun',
+            old_name='configuration',
+            new_name='old_configuration',
+        ),
+        migrations.AddConstraint(
+            model_name='workerrun',
+            constraint=models.CheckConstraint(check=models.Q(('old_configuration__typeof', 'object')), name='worker_run_old_configuration_objects'),
+        ),
+        migrations.AddField(
+            model_name='workerconfiguration',
+            name='worker',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='configurations', to='dataimport.worker'),
+        ),
+        migrations.AddField(
+            model_name='workeractivity',
+            name='configuration',
+            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_activities', to='dataimport.workerconfiguration'),
+        ),
+        migrations.AddField(
+            model_name='workerrun',
+            name='configuration',
+            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_runs', to='dataimport.workerconfiguration'),
+        ),
+        migrations.AddConstraint(
+            model_name='workerconfiguration',
+            constraint=models.CheckConstraint(check=models.Q(('configuration__typeof', 'object')), name='worker_configuration_configuration_objects'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='workerconfiguration',
+            unique_together={('worker', 'name'), ('worker', 'configuration_hash')},
+        ),
+    ]
diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py
index 93441cd618..a614f52413 100644
--- a/arkindex/dataimport/models.py
+++ b/arkindex/dataimport/models.py
@@ -18,7 +18,7 @@ from arkindex.dataimport.providers import get_provider, git_providers
 from arkindex.dataimport.utils import get_default_farm_id
 from arkindex.documents.models import ClassificationState, Element
 from arkindex.project.aws import S3FileMixin, S3FileStatus
-from arkindex.project.fields import ArrayField
+from arkindex.project.fields import ArrayField, MD5HashField
 from arkindex.project.models import IndexableModel
 from ponos.models import Artifact, State, Workflow
 
@@ -601,19 +601,49 @@ class WorkerVersion(models.Model):
         return self.configuration['docker'].get('command')
 
 
+class WorkerConfiguration(IndexableModel):
+    name = models.CharField(max_length=250)
+    configuration = models.JSONField(default=dict, blank=True)
+    configuration_hash = MD5HashField()
+    worker = models.ForeignKey(
+        Worker,
+        on_delete=models.CASCADE,
+        related_name='configurations',
+    )
+
+    class Meta:
+        unique_together = (
+            ('worker', 'configuration_hash'),
+            ('worker', 'name')
+        )
+        constraints = [
+            models.CheckConstraint(
+                check=models.Q(configuration__typeof='object'),
+                name='worker_configuration_configuration_objects',
+            )
+        ]
+
+
 class WorkerRun(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     dataimport = models.ForeignKey('dataimport.DataImport', on_delete=models.CASCADE, related_name='worker_runs')
     version = models.ForeignKey('dataimport.WorkerVersion', on_delete=models.CASCADE, related_name='worker_runs')
     parents = ArrayField(models.UUIDField())
-    configuration = models.JSONField(default=dict)
+    old_configuration = models.JSONField(default=dict)
+    configuration = models.ForeignKey(
+        WorkerConfiguration,
+        related_name='worker_runs',
+        on_delete=models.SET_NULL,
+        null=True,
+        blank=True,
+    )
 
     class Meta:
         unique_together = (('version', 'dataimport'),)
         constraints = [
             models.CheckConstraint(
-                check=models.Q(configuration__typeof='object'),
-                name='worker_run_configuration_objects',
+                check=models.Q(old_configuration__typeof='object'),
+                name='worker_run_old_configuration_objects',
             )
         ]
 
@@ -692,6 +722,13 @@ class WorkerActivity(IndexableModel):
         null=True,
         blank=True,
     )
+    configuration = models.ForeignKey(
+        WorkerConfiguration,
+        related_name='worker_activities',
+        on_delete=models.SET_NULL,
+        null=True,
+        blank=True,
+    )
 
     # Specific WorkerActivity manager
     objects = ActivityManager()
diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py
index dba792affb..a71de604a5 100644
--- a/arkindex/dataimport/serializers/imports.py
+++ b/arkindex/dataimport/serializers/imports.py
@@ -323,7 +323,7 @@ class WorkerRunSerializer(serializers.ModelSerializer):
     # Serialize worker with its basic informations
     worker = WorkerLightSerializer(source='version.worker', read_only=True)
     # A DictField will require valid dicts, but without a child= argument, it will allow any value
-    configuration = serializers.DictField(allow_empty=True, default={})
+    configuration = serializers.DictField(source='old_configuration', allow_empty=True, default={})
 
     class Meta:
         model = WorkerRun
diff --git a/arkindex/dataimport/signals.py b/arkindex/dataimport/signals.py
index be1b8bb260..362549955c 100644
--- a/arkindex/dataimport/signals.py
+++ b/arkindex/dataimport/signals.py
@@ -1,8 +1,11 @@
+import json
+from hashlib import md5
+
 from django.db.models.signals import pre_save
 from django.dispatch import receiver
 from rest_framework.exceptions import ValidationError
 
-from arkindex.dataimport.models import WorkerRun
+from arkindex.dataimport.models import WorkerConfiguration, WorkerRun
 
 
 def _list_ancestors(graph, parents):
@@ -39,3 +42,9 @@ def check_parents(sender, instance, **kwargs):
     ancestors = _list_ancestors(graph, instance.parents)
     if instance.id in ancestors:
         raise ValidationError(f"Can't add or update WorkerRun {instance.id} because parents field isn't properly defined. It would create a cycle.")
+
+
+@receiver(pre_save, sender=WorkerConfiguration)
+def update_configuration_hash(sender, instance, **kwargs):
+    configuration_json = json.dumps(instance.configuration, sort_keys=True).encode('utf-8')
+    instance.configuration_hash = md5(configuration_json).hexdigest()
diff --git a/arkindex/dataimport/tests/test_workerruns.py b/arkindex/dataimport/tests/test_workerruns.py
index d6a37b5b09..5a66da262c 100644
--- a/arkindex/dataimport/tests/test_workerruns.py
+++ b/arkindex/dataimport/tests/test_workerruns.py
@@ -437,7 +437,7 @@ class TestWorkerRuns(FixtureAPITestCase):
 
     def test_update_run_configuration(self):
         self.client.force_login(self.user)
-        self.assertDictEqual(self.run_1.configuration, {})
+        self.assertDictEqual(self.run_1.old_configuration, {})
         with self.assertNumQueries(8):
             response = self.client.patch(
                 reverse('api:worker-run-details', kwargs={'pk': self.run_1.id}),
@@ -453,7 +453,7 @@ class TestWorkerRuns(FixtureAPITestCase):
             )
             self.assertEqual(response.status_code, status.HTTP_200_OK)
         self.run_1.refresh_from_db()
-        self.assertDictEqual(self.run_1.configuration, {
+        self.assertDictEqual(self.run_1.old_configuration, {
             'a': 'b',
             'c': {
                 'd': 42
-- 
GitLab