From 92bcc25db61b9e438e583538e76f81f7552f1644 Mon Sep 17 00:00:00 2001
From: Nolan <nboukachab@teklia.com>
Date: Mon, 15 May 2023 11:11:28 +0000
Subject: [PATCH] Remove nb_cols from dataset file

---
 dan/manager/dataset.py | 3 ---
 dan/manager/ocr.py     | 4 ----
 dan/manager/utils.py   | 2 --
 3 files changed, 9 deletions(-)

diff --git a/dan/manager/dataset.py b/dan/manager/dataset.py
index 6e467b13..b93cae10 100644
--- a/dan/manager/dataset.py
+++ b/dan/manager/dataset.py
@@ -268,9 +268,6 @@ class GenericDataset(Dataset):
                         "label": label,
                         "unchanged_label": label,
                         "path": os.path.abspath(filename),
-                        "nb_cols": 1
-                        if "nb_cols" not in gt[filename]
-                        else gt[filename]["nb_cols"],
                     }
                 )
                 if load_in_memory:
diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py
index 777b2079..6d72ca06 100644
--- a/dan/manager/ocr.py
+++ b/dan/manager/ocr.py
@@ -445,7 +445,6 @@ class OCRDataset(GenericDataset):
             sample["label_begin"] = pages[0][1]["begin"]
             sample["label_sem"] = pages[0][1]["sem"]
             sample["label"] = pages[0][1]
-            sample["nb_cols"] = pages[0][2]
         else:
             if pages[0][0].shape[0] != pages[1][0].shape[0]:
                 max_height = max(pages[0][0].shape[0], pages[1][0].shape[0])
@@ -459,7 +458,6 @@ class OCRDataset(GenericDataset):
             sample["label_begin"] = pages[0][1]["begin"] + pages[1][1]["begin"]
             sample["label_sem"] = pages[0][1]["sem"] + pages[1][1]["sem"]
             sample["img"] = np.concatenate([pages[0][0], pages[1][0]], axis=1)
-            sample["nb_cols"] = pages[0][2] + pages[1][2]
         sample["label"] = sample["label_raw"]
         if "â“‘" in self.charset:
             sample["label"] = sample["label_begin"]
@@ -587,7 +585,6 @@ class OCRCollateFunction:
             batch_data[i]["unchanged_label"] for i in range(len(batch_data))
         ]
 
-        nb_cols = [batch_data[i]["nb_cols"] for i in range(len(batch_data))]
         nb_lines = [batch_data[i]["nb_lines"] for i in range(len(batch_data))]
         line_raw = [batch_data[i]["line_label"] for i in range(len(batch_data))]
         line_token = [batch_data[i]["token_line_label"] for i in range(len(batch_data))]
@@ -652,7 +649,6 @@ class OCRCollateFunction:
             "names": names,
             "ids": ids,
             "nb_lines": nb_lines,
-            "nb_cols": nb_cols,
             "labels": labels,
             "reverse_labels": reverse_labels,
             "raw_labels": raw_labels,
diff --git a/dan/manager/utils.py b/dan/manager/utils.py
index b5b852b7..a6885038 100644
--- a/dan/manager/utils.py
+++ b/dan/manager/utils.py
@@ -54,7 +54,6 @@ class OCRManager(GenericTrainingManager):
                                 {
                                     "path": sample["path"],
                                     "label": chunk,
-                                    "nb_cols": 1,
                                 }
                             )
 
@@ -67,7 +66,6 @@ class OCRManager(GenericTrainingManager):
                 Image.fromarray(img).save(img_path)
                 gt[set_name][img_name] = {
                     "text": sample["label"],
-                    "nb_cols": sample["nb_cols"] if "nb_cols" in sample else 1,
                 }
                 if "line_label" in sample:
                     gt[set_name][img_name]["lines"] = sample["line_label"]
-- 
GitLab