From f183c5c300b77e39b08ace6488f7c7368d21cdeb Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Thu, 21 Mar 2024 09:40:50 +0100 Subject: [PATCH] Avoid filling up the RAM with dataset elements when cloning --- arkindex/training/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arkindex/training/api.py b/arkindex/training/api.py index 454663899e..71f274f34c 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -1003,11 +1003,14 @@ class DatasetClone(CorpusACLMixin, CreateAPIView): DatasetSet(dataset_id=clone.id, name=set.name) for set in dataset.sets.all() ]) + set_map = {set.name: set.id for set in cloned_sets} + # Associate all elements to the clone DatasetElement.objects.bulk_create([ - DatasetElement(element_id=elt_id, set=next(new_set for new_set in cloned_sets if new_set.name == set_name)) + DatasetElement(element_id=elt_id, set=set_map[set_name]) for elt_id, set_name in DatasetElement.objects.filter(set__dataset_id=dataset.id) .values_list("element_id", "set__name") + .iterator() ]) # Add the set counts to the API response -- GitLab