Skip to content
Snippets Groups Projects
Commit f3718eb1 authored by Manon Blanco's avatar Manon Blanco
Browse files

Remove padding value and padding token parameters from training configuration

parent a93bc055
No related branches found
No related tags found
No related merge requests found
......@@ -18,7 +18,6 @@ class DatasetManager:
def __init__(self, params, device: str):
self.params = params
self.dataset_class = None
self.img_padding_value = params["config"]["padding_value"]
self.my_collate_function = None
# Whether data should be copied on GPU via https://pytorch.org/docs/stable/generated/torch.Tensor.pin_memory.html
......@@ -224,13 +223,6 @@ class GenericDataset(Dataset):
if self.load_in_memory:
self.apply_preprocessing(params["config"]["preprocessings"])
self.padding_value = params["config"]["padding_value"]
if self.padding_value == "mean":
if self.mean is None:
_, _ = self.compute_std_mean()
self.padding_value = self.mean
self.params["config"]["padding_value"] = self.padding_value
self.curriculum_config = None
def __len__(self):
......
......@@ -25,14 +25,9 @@ class OCRDatasetManager(DatasetManager):
params["charset"] if "charset" in params else self.get_merged_charsets()
)
self.tokens = {
"pad": params["config"]["padding_token"],
}
self.tokens = {"pad": len(self.charset) + 2}
self.tokens["end"] = len(self.charset)
self.tokens["start"] = len(self.charset) + 1
self.tokens["pad"] = (
self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2
)
self.params["config"]["padding_token"] = self.tokens["pad"]
def get_merged_charsets(self):
......@@ -143,7 +138,6 @@ class OCRDataset(GenericDataset):
sample["img"], sample["img_position"] = pad_image(
sample["img"],
padding_value=self.padding_value,
new_width=self.params["config"]["padding"]["min_width"],
new_height=self.params["config"]["padding"]["min_height"],
pad_width=pad_width,
......@@ -177,7 +171,6 @@ class OCRCollateFunction:
"""
def __init__(self, config):
self.img_padding_value = float(config["padding_value"])
self.label_padding_value = config["padding_token"]
self.config = config
......@@ -190,9 +183,7 @@ class OCRCollateFunction:
self.config["padding_mode"] if "padding_mode" in self.config else "br"
)
imgs = [batch_data[i]["img"] for i in range(len(batch_data))]
imgs = pad_images(
imgs, padding_value=self.img_padding_value, padding_mode=padding_mode
)
imgs = pad_images(imgs, padding_mode=padding_mode)
imgs = torch.tensor(imgs).float().permute(0, 3, 1, 2)
formatted_batch_data = {
......
......@@ -107,8 +107,6 @@ def get_config():
"worker_per_gpu": 4, # Num of parallel processes per gpu for data loading
"width_divisor": 8, # Image width will be divided by 8
"height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value
"padding_token": None, # Label padding value
"preprocessings": [
{
"type": "to_RGB",
......
......@@ -25,7 +25,7 @@ def pad_sequences_1D(data, padding_value):
return padded_data
def pad_images(data, padding_value, padding_mode="br"):
def pad_images(data, padding_mode="br"):
"""
data: list of numpy array
mode: "br"/"tl"/"random" (bottom-right, top-left, random)
......@@ -34,9 +34,7 @@ def pad_images(data, padding_value, padding_mode="br"):
y_lengths = [x.shape[1] for x in data]
longest_x = max(x_lengths)
longest_y = max(y_lengths)
padded_data = (
np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * padding_value
)
padded_data = np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * 0
for i, xy_len in enumerate(zip(x_lengths, y_lengths)):
x_len, y_len = xy_len
if padding_mode == "br":
......@@ -56,7 +54,6 @@ def pad_images(data, padding_value, padding_mode="br"):
def pad_image(
image,
padding_value,
new_height=None,
new_width=None,
pad_width=None,
......@@ -90,7 +87,7 @@ def pad_image(
)
if not (pad_width == 0 and pad_height == 0):
padded_image = np.ones((h + pad_height, w + pad_width, c)) * padding_value
padded_image = np.ones((h + pad_height, w + pad_width, c)) * 0
if padding_mode == "br":
hi, wi = 0, 0
elif padding_mode == "tl":
......
......@@ -16,8 +16,6 @@ All hyperparameters are specified and editable in the training scripts (meaning
| `dataset_params.config.worker_per_gpu` | Number of parallel processes per gpu for data loading. | `int` | `4` |
| `dataset_params.config.height_divisor` | Factor to reduce the width of the feature vector before feeding the decoder. | `int` | `8` |
| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` |
| `dataset_params.config.padding_value` | Image padding value. | `int` | `0` |
| `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` |
| `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) |
| `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) |
......
......@@ -70,8 +70,6 @@ def training_config():
"load_in_memory": True, # Load all images in CPU memory
"width_divisor": 8, # Image width will be divided by 8
"height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value
"padding_token": None, # Label padding value
"preprocessings": [
{
"type": "to_RGB",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment