From c656593d9de39677f833563c4eae506b237f935f Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 31 Aug 2023 14:25:32 +0200
Subject: [PATCH] Require WorkerRun.parents to be unique

---
 arkindex/process/serializers/worker_runs.py |  4 ++
 arkindex/process/signals.py                 |  3 ++
 arkindex/process/tests/test_workerruns.py   | 47 ++++++++++++++++++---
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/arkindex/process/serializers/worker_runs.py b/arkindex/process/serializers/worker_runs.py
index 6c34b0474b..3ffb1fcc3b 100644
--- a/arkindex/process/serializers/worker_runs.py
+++ b/arkindex/process/serializers/worker_runs.py
@@ -118,12 +118,16 @@ class WorkerRunSerializer(WorkerACLMixin, serializers.ModelSerializer):
         worker_version = data.get('version', worker_version)
         model_version = data.get('model_version', model_version)
         configuration = data.get('configuration', configuration)
+        parents = data.get('parents', [])
 
         if process.mode not in (ProcessMode.Workers, ProcessMode.Dataset):
             errors['process_id'].append('WorkerRuns can only be created or updated on Workers or Dataset processes.')
         if process.has_tasks:
             errors['process_id'].append('WorkerRuns cannot be added or updated on processes that have already started.')
 
+        if len(parents) != len(set(parents)):
+            errors['parents'].append('The parents of a WorkerRun must be unique.')
+
         # The worker version is not editable on existing WorkerRun instances, so we only validate when creating a new one
         if not self.instance:
             # Set self.request to please the WorkerACLMixin
diff --git a/arkindex/process/signals.py b/arkindex/process/signals.py
index 6d5b51cce9..661bf35172 100644
--- a/arkindex/process/signals.py
+++ b/arkindex/process/signals.py
@@ -33,6 +33,9 @@ def check_parents(sender, instance, **kwargs):
     if not instance.parents:
         return
 
+    if len(instance.parents) != len(set(instance.parents)):
+        raise ValidationError(f"Can't add or update WorkerRun {instance.id} because it has duplicate parents.")
+
     # Use `process_id` and not `process` to avoid an extra SQL query that would use the replica
     parents = WorkerRun.objects.using('default') \
         .filter(id__in=instance.parents, process_id=instance.process_id) \
diff --git a/arkindex/process/tests/test_workerruns.py b/arkindex/process/tests/test_workerruns.py
index 43569554af..f80d59f57c 100644
--- a/arkindex/process/tests/test_workerruns.py
+++ b/arkindex/process/tests/test_workerruns.py
@@ -4,6 +4,7 @@ from unittest.mock import patch
 from django.test import override_settings
 from django.urls import reverse
 from rest_framework import status
+from rest_framework.exceptions import ValidationError
 
 from arkindex.ponos.models import Artifact, State
 from arkindex.process.models import GitRefType, ProcessMode, Revision, WorkerRun, WorkerVersion, WorkerVersionState
@@ -28,6 +29,7 @@ class TestWorkerRuns(FixtureAPITestCase):
         cls.process_1 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Workers)
         cls.version_1 = WorkerVersion.objects.get(worker__slug='reco')
         cls.worker_1 = cls.version_1.worker
+        cls.version_2 = WorkerVersion.objects.get(worker__slug='dla')
         cls.repo = cls.worker_1.repository
         cls.run_1 = cls.process_1.worker_runs.create(version=cls.version_1, parents=[])
         cls.configuration_1 = cls.worker_1.configurations.create(name="My config", configuration={"key": "value"})
@@ -964,6 +966,41 @@ class TestWorkerRuns(FixtureAPITestCase):
             " same Process than this WorkerRun."
         ])
 
+    def test_update_duplicate_parents(self):
+        self.client.force_login(self.user)
+        run_2 = self.process_1.worker_runs.create(version=self.version_2)
+
+        with self.assertNumQueries(8):
+            response = self.client.put(
+                reverse('api:worker-run-details', kwargs={'pk': str(self.run_1.id)}),
+                data={
+                    'parents': [
+                        str(run_2.id),
+                        str(run_2.id),
+                    ],
+                },
+                format='json',
+            )
+            self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+        self.assertEqual(response.json(), {
+            'parents': ['The parents of a WorkerRun must be unique.'],
+        })
+
+    def test_duplicate_parents_signal(self):
+        """
+        Duplicates in WorkerRun.parents are also detected outside of the WorkerRun APIs
+        """
+        run_2 = self.process_1.worker_runs.create(
+            version=self.version_2,
+            parents=[],
+        )
+
+        run_2.parents = [self.run_1.id, self.run_1.id]
+        with self.assertRaisesRegex(ValidationError, f"Can't add or update WorkerRun {run_2.id} because it has duplicate parents."):
+            run_2.parents = [self.run_1.id, self.run_1.id]
+            run_2.save()
+
     def test_update_process_id(self):
         """
         Process field cannot be updated
@@ -1032,13 +1069,12 @@ class TestWorkerRuns(FixtureAPITestCase):
         Version field cannot be updated
         """
         self.client.force_login(self.user)
-        dla_version = WorkerVersion.objects.get(worker__slug='dla')
 
         with self.assertNumQueries(11):
             response = self.client.put(
                 reverse('api:worker-run-details', kwargs={'pk': str(self.run_1.id)}),
                 data={
-                    'worker_version_id': str(dla_version.id),
+                    'worker_version_id': str(self.version_2.id),
                 },
                 format='json',
             )
@@ -1089,7 +1125,7 @@ class TestWorkerRuns(FixtureAPITestCase):
             },
         })
         self.run_1.refresh_from_db()
-        self.assertNotEqual(self.run_1.version_id, dla_version.id)
+        self.assertNotEqual(self.run_1.version_id, self.version_2.id)
 
     def test_update_configuration(self):
         self.client.force_login(self.user)
@@ -1842,13 +1878,12 @@ class TestWorkerRuns(FixtureAPITestCase):
         Version field cannot be updated
         """
         self.client.force_login(self.user)
-        dla_version = WorkerVersion.objects.get(worker__slug='dla')
 
         with self.assertNumQueries(11):
             response = self.client.patch(
                 reverse('api:worker-run-details', kwargs={'pk': str(self.run_1.id)}),
                 data={
-                    'worker_version_id': str(dla_version.id),
+                    'worker_version_id': str(self.version_2.id),
                 },
                 format='json',
             )
@@ -1899,7 +1934,7 @@ class TestWorkerRuns(FixtureAPITestCase):
             },
         })
         self.run_1.refresh_from_db()
-        self.assertNotEqual(self.run_1.version_id, dla_version.id)
+        self.assertNotEqual(self.run_1.version_id, self.version_2.id)
 
     def test_partial_update_configuration(self):
         self.client.force_login(self.user)
-- 
GitLab