From 14d2f39a9b4895932329e6fdcc915929292e00c0 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Mon, 9 Sep 2024 11:59:28 +0200
Subject: [PATCH] Remove configurations before cleaning up archived workers

---
 arkindex/documents/management/commands/cleanup.py |  4 ++++
 arkindex/documents/tests/commands/test_cleanup.py | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/arkindex/documents/management/commands/cleanup.py b/arkindex/documents/management/commands/cleanup.py
index 9911d73042..850b764d86 100644
--- a/arkindex/documents/management/commands/cleanup.py
+++ b/arkindex/documents/management/commands/cleanup.py
@@ -304,6 +304,10 @@ class Command(BaseCommand):
                 continue
 
             self.stdout.write(f"Removing worker {worker.name} ({worker.id})")
+            # worker.delete would only try to set configurations to None on the WorkerRuns instead of deleting them,
+            # which could cause errors on unique constraints. We delete those runs instead, and do so with _raw_delete
+            # so that we don't update or delete anything more than worker runs.
+            WorkerRun.objects.filter(configuration__worker=worker)._raw_delete(using="default")
             worker.delete()
             deleted += 1
 
diff --git a/arkindex/documents/tests/commands/test_cleanup.py b/arkindex/documents/tests/commands/test_cleanup.py
index 9c016911af..2e795af566 100644
--- a/arkindex/documents/tests/commands/test_cleanup.py
+++ b/arkindex/documents/tests/commands/test_cleanup.py
@@ -1124,6 +1124,20 @@ class TestCleanupCommand(FixtureTestCase):
         removable_worker.archived = datetime.now(timezone.utc) - timedelta(days=11)
         removable_worker.save()
 
+        # Create a process where a worker version is used both with and without a configuration from this worker
+        # This could lead to integrity errors with the default Django removal, as the configuration would be removed
+        # from the WorkerRuns, so there would be 2 runs with the same version and no configuration when they should be unique
+        process = self.corpus.processes.create(mode=ProcessMode.Workers, creator=self.superuser)
+        version = removable_worker.versions.first()
+        process.worker_runs.create(version=version)
+        process.worker_runs.create(
+            version=version,
+            configuration=removable_worker.configurations.create(
+                name="Some configuration",
+                configuration={},
+            ),
+        )
+
         # This worker cannot be cleaned up because it is used in ML results
         used_worker = Worker.objects.get(slug="reco")
         self.assertTrue(used_worker.versions.all().in_use())
-- 
GitLab