diff --git a/dan/manager/dataset.py b/dan/manager/dataset.py index 9d987c6827af887d9da6e6a6b1af6236a028e89b..525e637cdc5e369828d8e49262b3ddd1be345ef0 100644 --- a/dan/manager/dataset.py +++ b/dan/manager/dataset.py @@ -18,7 +18,6 @@ class DatasetManager: def __init__(self, params, device: str): self.params = params self.dataset_class = None - self.img_padding_value = params["config"]["padding_value"] self.my_collate_function = None # Whether data should be copied on GPU via https://pytorch.org/docs/stable/generated/torch.Tensor.pin_memory.html @@ -224,13 +223,6 @@ class GenericDataset(Dataset): if self.load_in_memory: self.apply_preprocessing(params["config"]["preprocessings"]) - self.padding_value = params["config"]["padding_value"] - if self.padding_value == "mean": - if self.mean is None: - _, _ = self.compute_std_mean() - self.padding_value = self.mean - self.params["config"]["padding_value"] = self.padding_value - self.curriculum_config = None def __len__(self): diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 533d074cc72495b10f38a72b492a00ca4f7a1d06..6bd7926838609c041bfa91353e5362cddfc66a48 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -25,14 +25,9 @@ class OCRDatasetManager(DatasetManager): params["charset"] if "charset" in params else self.get_merged_charsets() ) - self.tokens = { - "pad": params["config"]["padding_token"], - } + self.tokens = {"pad": len(self.charset) + 2} self.tokens["end"] = len(self.charset) self.tokens["start"] = len(self.charset) + 1 - self.tokens["pad"] = ( - self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2 - ) self.params["config"]["padding_token"] = self.tokens["pad"] def get_merged_charsets(self): @@ -143,7 +138,6 @@ class OCRDataset(GenericDataset): sample["img"], sample["img_position"] = pad_image( sample["img"], - padding_value=self.padding_value, new_width=self.params["config"]["padding"]["min_width"], new_height=self.params["config"]["padding"]["min_height"], pad_width=pad_width, @@ -177,7 +171,6 @@ class OCRCollateFunction: """ def __init__(self, config): - self.img_padding_value = float(config["padding_value"]) self.label_padding_value = config["padding_token"] self.config = config @@ -190,9 +183,7 @@ class OCRCollateFunction: self.config["padding_mode"] if "padding_mode" in self.config else "br" ) imgs = [batch_data[i]["img"] for i in range(len(batch_data))] - imgs = pad_images( - imgs, padding_value=self.img_padding_value, padding_mode=padding_mode - ) + imgs = pad_images(imgs, padding_mode=padding_mode) imgs = torch.tensor(imgs).float().permute(0, 3, 1, 2) formatted_batch_data = { diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index d0a5fc479236911debe3ac5cfc3cf57d120e5d33..ebfa34de1ef577cbb4d20c8a41a8e10704954b23 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -107,8 +107,6 @@ def get_config(): "worker_per_gpu": 4, # Num of parallel processes per gpu for data loading "width_divisor": 8, # Image width will be divided by 8 "height_divisor": 32, # Image height will be divided by 32 - "padding_value": 0, # Image padding value - "padding_token": None, # Label padding value "preprocessings": [ { "type": "to_RGB", diff --git a/dan/utils.py b/dan/utils.py index 8bffd18a71b002651f5875dd5d37f2324d1ce015..147bcee0bb5174310039fa92cbc84bda6996ab36 100644 --- a/dan/utils.py +++ b/dan/utils.py @@ -25,7 +25,7 @@ def pad_sequences_1D(data, padding_value): return padded_data -def pad_images(data, padding_value, padding_mode="br"): +def pad_images(data, padding_mode="br"): """ data: list of numpy array mode: "br"/"tl"/"random" (bottom-right, top-left, random) @@ -34,9 +34,7 @@ def pad_images(data, padding_value, padding_mode="br"): y_lengths = [x.shape[1] for x in data] longest_x = max(x_lengths) longest_y = max(y_lengths) - padded_data = ( - np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * padding_value - ) + padded_data = np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * 0 for i, xy_len in enumerate(zip(x_lengths, y_lengths)): x_len, y_len = xy_len if padding_mode == "br": @@ -56,7 +54,6 @@ def pad_images(data, padding_value, padding_mode="br"): def pad_image( image, - padding_value, new_height=None, new_width=None, pad_width=None, @@ -90,7 +87,7 @@ def pad_image( ) if not (pad_width == 0 and pad_height == 0): - padded_image = np.ones((h + pad_height, w + pad_width, c)) * padding_value + padded_image = np.ones((h + pad_height, w + pad_width, c)) * 0 if padding_mode == "br": hi, wi = 0, 0 elif padding_mode == "tl": diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index 8d97ae637cb1a47bd467f07f1f6b4ae743d9f714..12cba43d55511a6053d07635367d331f0e2174b6 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -16,8 +16,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.worker_per_gpu` | Number of parallel processes per gpu for data loading. | `int` | `4` | | `dataset_params.config.height_divisor` | Factor to reduce the width of the feature vector before feeding the decoder. | `int` | `8` | | `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | -| `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | -| `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | | `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) | diff --git a/tests/conftest.py b/tests/conftest.py index e660869c0f5c0fb80b79a2d8f133ee246e66da52..8fa1e79c674d9a73444796978b6a92031dbe43ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,8 +70,6 @@ def training_config(): "load_in_memory": True, # Load all images in CPU memory "width_divisor": 8, # Image width will be divided by 8 "height_divisor": 32, # Image height will be divided by 32 - "padding_value": 0, # Image padding value - "padding_token": None, # Label padding value "preprocessings": [ { "type": "to_RGB",