From 88f53144557c93ffce766a4a87712e4d0b29bbc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9lodie=20Boillet?= <boillet@teklia.com> Date: Wed, 24 May 2023 14:09:36 +0200 Subject: [PATCH] Remove padding for ctc loss --- dan/manager/ocr.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index ec98755a..45d851c3 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -14,7 +14,6 @@ from dan.ocr.utils import LM_str_to_ind from dan.utils import ( pad_image, pad_image_width_random, - pad_image_width_right, pad_images, pad_sequences_1D, rand, @@ -154,37 +153,7 @@ class OCRDataset(GenericDataset): sample["img_shape"] / self.reduce_dims_factor ).astype(int) - # Padding to handle CTC requirements if self.set_name == "train": - max_label_len = 0 - height = 1 - ctc_padding = False - if "CTC_line" in self.params["config"]["constraints"]: - max_label_len = sample["label_len"] - ctc_padding = True - if "CTC_va" in self.params["config"]["constraints"]: - max_label_len = max(sample["line_label_len"]) - ctc_padding = True - if "CTC_pg" in self.params["config"]["constraints"]: - max_label_len = sample["label_len"] - height = max(sample["img_reduced_shape"][0], 1) - ctc_padding = True - if ( - ctc_padding - and 2 * max_label_len + 1 > sample["img_reduced_shape"][1] * height - ): - sample["img"] = pad_image_width_right( - sample["img"], - int( - np.ceil((2 * max_label_len + 1) / height) - * self.reduce_dims_factor[1] - ), - self.padding_value, - ) - sample["img_shape"] = sample["img"].shape - sample["img_reduced_shape"] = np.ceil( - sample["img_shape"] / self.reduce_dims_factor - ).astype(int) sample["img_reduced_shape"] = [ max(1, t) for t in sample["img_reduced_shape"] ] -- GitLab