From 6158f1c7eb786d1099e1e7e7a09628ac3713bbb2 Mon Sep 17 00:00:00 2001
From: Yoann Schneider <yschneider@teklia.com>
Date: Mon, 28 Aug 2023 07:33:48 +0000
Subject: [PATCH] Fix parsing of labels.json when generating stats

---
 dan/datasets/analyze/statistics.py            |  4 +---
 .../training/training_dataset/labels.json     | 24 +++++--------------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/dan/datasets/analyze/statistics.py b/dan/datasets/analyze/statistics.py
index 880eb1d3..c5f62664 100644
--- a/dan/datasets/analyze/statistics.py
+++ b/dan/datasets/analyze/statistics.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 from collections import Counter, defaultdict
-from operator import itemgetter
 from pathlib import Path
 from typing import Dict, List, Optional
 
@@ -156,8 +155,7 @@ class Statistics:
             # Path to the images are the key of the dict
             self.create_image_statistics(images=split_data.keys())
 
-            # The text is actually under the "text" key of the values
-            labels = list(map(itemgetter("text"), split_data.values()))
+            labels = list(split_data.values())
             # Text statistics
             self.create_label_statistics(labels=labels)
 
diff --git a/tests/data/training/training_dataset/labels.json b/tests/data/training/training_dataset/labels.json
index ebd62c6a..da3b3df7 100644
--- a/tests/data/training/training_dataset/labels.json
+++ b/tests/data/training/training_dataset/labels.json
@@ -1,26 +1,14 @@
 {
     "train": {
-        "tests/data/training/training_dataset/images/0a34e13a-4ab0-4a91-8d7c-b1d8fee32628.png": {
-            "text": "The latter do not regard"
-        },
-        "tests/data/training/training_dataset/images/0a70e14f-feda-4607-989c-36cf581ddff5.png": {
-            "text": "At the beginning of"
-        }
+        "tests/data/training/training_dataset/images/0a34e13a-4ab0-4a91-8d7c-b1d8fee32628.png": "The latter do not regard",
+        "tests/data/training/training_dataset/images/0a70e14f-feda-4607-989c-36cf581ddff5.png": "At the beginning of"
     },
     "val": {
-        "tests/data/training/training_dataset/images/0a576062-303c-4893-a729-c09c92865d31.png": {
-            "text": "One can remember with"
-        },
-        "tests/data/training/training_dataset/images/0b2457c8-81f1-4600-84d9-f8bf2822a991.png": {
-            "text": "The play was no more"
-        }
+        "tests/data/training/training_dataset/images/0a576062-303c-4893-a729-c09c92865d31.png": "One can remember with",
+        "tests/data/training/training_dataset/images/0b2457c8-81f1-4600-84d9-f8bf2822a991.png": "The play was no more"
     },
     "test": {
-        "tests/data/training/training_dataset/images/fb3edb59-3678-49f8-8e16-8e32e3b0f051.png": {
-            "text": "Both her wrists bore"
-        },
-        "tests/data/training/training_dataset/images/fe498de2-ece4-4fbe-8b53-edfce1b820f0.png": {
-            "text": "SOME years ago a contemporary"
-        }
+        "tests/data/training/training_dataset/images/fb3edb59-3678-49f8-8e16-8e32e3b0f051.png": "Both her wrists bore",
+        "tests/data/training/training_dataset/images/fe498de2-ece4-4fbe-8b53-edfce1b820f0.png": "SOME years ago a contemporary"
     }
 }
-- 
GitLab