From f183c5c300b77e39b08ace6488f7c7368d21cdeb Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 21 Mar 2024 09:40:50 +0100
Subject: [PATCH] Avoid filling up the RAM with dataset elements when cloning

---
 arkindex/training/api.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arkindex/training/api.py b/arkindex/training/api.py
index 454663899e..71f274f34c 100644
--- a/arkindex/training/api.py
+++ b/arkindex/training/api.py
@@ -1003,11 +1003,14 @@ class DatasetClone(CorpusACLMixin, CreateAPIView):
             DatasetSet(dataset_id=clone.id, name=set.name)
             for set in dataset.sets.all()
         ])
+        set_map = {set.name: set.id for set in cloned_sets}
+
         # Associate all elements to the clone
         DatasetElement.objects.bulk_create([
-            DatasetElement(element_id=elt_id, set=next(new_set for new_set in cloned_sets if new_set.name == set_name))
+            DatasetElement(element_id=elt_id, set=set_map[set_name])
             for elt_id, set_name in DatasetElement.objects.filter(set__dataset_id=dataset.id)
             .values_list("element_id", "set__name")
+            .iterator()
         ])
 
         # Add the set counts to the API response
-- 
GitLab