From 92bcc25db61b9e438e583538e76f81f7552f1644 Mon Sep 17 00:00:00 2001 From: Nolan <nboukachab@teklia.com> Date: Mon, 15 May 2023 11:11:28 +0000 Subject: [PATCH] Remove nb_cols from dataset file --- dan/manager/dataset.py | 3 --- dan/manager/ocr.py | 4 ---- dan/manager/utils.py | 2 -- 3 files changed, 9 deletions(-) diff --git a/dan/manager/dataset.py b/dan/manager/dataset.py index 6e467b13..b93cae10 100644 --- a/dan/manager/dataset.py +++ b/dan/manager/dataset.py @@ -268,9 +268,6 @@ class GenericDataset(Dataset): "label": label, "unchanged_label": label, "path": os.path.abspath(filename), - "nb_cols": 1 - if "nb_cols" not in gt[filename] - else gt[filename]["nb_cols"], } ) if load_in_memory: diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 777b2079..6d72ca06 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -445,7 +445,6 @@ class OCRDataset(GenericDataset): sample["label_begin"] = pages[0][1]["begin"] sample["label_sem"] = pages[0][1]["sem"] sample["label"] = pages[0][1] - sample["nb_cols"] = pages[0][2] else: if pages[0][0].shape[0] != pages[1][0].shape[0]: max_height = max(pages[0][0].shape[0], pages[1][0].shape[0]) @@ -459,7 +458,6 @@ class OCRDataset(GenericDataset): sample["label_begin"] = pages[0][1]["begin"] + pages[1][1]["begin"] sample["label_sem"] = pages[0][1]["sem"] + pages[1][1]["sem"] sample["img"] = np.concatenate([pages[0][0], pages[1][0]], axis=1) - sample["nb_cols"] = pages[0][2] + pages[1][2] sample["label"] = sample["label_raw"] if "â“‘" in self.charset: sample["label"] = sample["label_begin"] @@ -587,7 +585,6 @@ class OCRCollateFunction: batch_data[i]["unchanged_label"] for i in range(len(batch_data)) ] - nb_cols = [batch_data[i]["nb_cols"] for i in range(len(batch_data))] nb_lines = [batch_data[i]["nb_lines"] for i in range(len(batch_data))] line_raw = [batch_data[i]["line_label"] for i in range(len(batch_data))] line_token = [batch_data[i]["token_line_label"] for i in range(len(batch_data))] @@ -652,7 +649,6 @@ class OCRCollateFunction: "names": names, "ids": ids, "nb_lines": nb_lines, - "nb_cols": nb_cols, "labels": labels, "reverse_labels": reverse_labels, "raw_labels": raw_labels, diff --git a/dan/manager/utils.py b/dan/manager/utils.py index b5b852b7..a6885038 100644 --- a/dan/manager/utils.py +++ b/dan/manager/utils.py @@ -54,7 +54,6 @@ class OCRManager(GenericTrainingManager): { "path": sample["path"], "label": chunk, - "nb_cols": 1, } ) @@ -67,7 +66,6 @@ class OCRManager(GenericTrainingManager): Image.fromarray(img).save(img_path) gt[set_name][img_name] = { "text": sample["label"], - "nb_cols": sample["nb_cols"] if "nb_cols" in sample else 1, } if "line_label" in sample: gt[set_name][img_name]["lines"] = sample["line_label"] -- GitLab