diff --git a/dan/manager/dataset.py b/dan/manager/dataset.py
index 9d987c6827af887d9da6e6a6b1af6236a028e89b..525e637cdc5e369828d8e49262b3ddd1be345ef0 100644
--- a/dan/manager/dataset.py
+++ b/dan/manager/dataset.py
@@ -18,7 +18,6 @@ class DatasetManager:
     def __init__(self, params, device: str):
         self.params = params
         self.dataset_class = None
-        self.img_padding_value = params["config"]["padding_value"]
 
         self.my_collate_function = None
         # Whether data should be copied on GPU via https://pytorch.org/docs/stable/generated/torch.Tensor.pin_memory.html
@@ -224,13 +223,6 @@ class GenericDataset(Dataset):
         if self.load_in_memory:
             self.apply_preprocessing(params["config"]["preprocessings"])
 
-        self.padding_value = params["config"]["padding_value"]
-        if self.padding_value == "mean":
-            if self.mean is None:
-                _, _ = self.compute_std_mean()
-            self.padding_value = self.mean
-            self.params["config"]["padding_value"] = self.padding_value
-
         self.curriculum_config = None
 
     def __len__(self):
diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py
index 533d074cc72495b10f38a72b492a00ca4f7a1d06..6bd7926838609c041bfa91353e5362cddfc66a48 100644
--- a/dan/manager/ocr.py
+++ b/dan/manager/ocr.py
@@ -25,14 +25,9 @@ class OCRDatasetManager(DatasetManager):
             params["charset"] if "charset" in params else self.get_merged_charsets()
         )
 
-        self.tokens = {
-            "pad": params["config"]["padding_token"],
-        }
+        self.tokens = {"pad": len(self.charset) + 2}
         self.tokens["end"] = len(self.charset)
         self.tokens["start"] = len(self.charset) + 1
-        self.tokens["pad"] = (
-            self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2
-        )
         self.params["config"]["padding_token"] = self.tokens["pad"]
 
     def get_merged_charsets(self):
@@ -143,7 +138,6 @@ class OCRDataset(GenericDataset):
 
                 sample["img"], sample["img_position"] = pad_image(
                     sample["img"],
-                    padding_value=self.padding_value,
                     new_width=self.params["config"]["padding"]["min_width"],
                     new_height=self.params["config"]["padding"]["min_height"],
                     pad_width=pad_width,
@@ -177,7 +171,6 @@ class OCRCollateFunction:
     """
 
     def __init__(self, config):
-        self.img_padding_value = float(config["padding_value"])
         self.label_padding_value = config["padding_token"]
         self.config = config
 
@@ -190,9 +183,7 @@ class OCRCollateFunction:
             self.config["padding_mode"] if "padding_mode" in self.config else "br"
         )
         imgs = [batch_data[i]["img"] for i in range(len(batch_data))]
-        imgs = pad_images(
-            imgs, padding_value=self.img_padding_value, padding_mode=padding_mode
-        )
+        imgs = pad_images(imgs, padding_mode=padding_mode)
         imgs = torch.tensor(imgs).float().permute(0, 3, 1, 2)
 
         formatted_batch_data = {
diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py
index d0a5fc479236911debe3ac5cfc3cf57d120e5d33..ebfa34de1ef577cbb4d20c8a41a8e10704954b23 100644
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -107,8 +107,6 @@ def get_config():
                 "worker_per_gpu": 4,  # Num of parallel processes per gpu for data loading
                 "width_divisor": 8,  # Image width will be divided by 8
                 "height_divisor": 32,  # Image height will be divided by 32
-                "padding_value": 0,  # Image padding value
-                "padding_token": None,  # Label padding value
                 "preprocessings": [
                     {
                         "type": "to_RGB",
diff --git a/dan/utils.py b/dan/utils.py
index 8bffd18a71b002651f5875dd5d37f2324d1ce015..147bcee0bb5174310039fa92cbc84bda6996ab36 100644
--- a/dan/utils.py
+++ b/dan/utils.py
@@ -25,7 +25,7 @@ def pad_sequences_1D(data, padding_value):
     return padded_data
 
 
-def pad_images(data, padding_value, padding_mode="br"):
+def pad_images(data, padding_mode="br"):
     """
     data: list of numpy array
     mode: "br"/"tl"/"random" (bottom-right, top-left, random)
@@ -34,9 +34,7 @@ def pad_images(data, padding_value, padding_mode="br"):
     y_lengths = [x.shape[1] for x in data]
     longest_x = max(x_lengths)
     longest_y = max(y_lengths)
-    padded_data = (
-        np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * padding_value
-    )
+    padded_data = np.ones((len(data), longest_x, longest_y, data[0].shape[2])) * 0
     for i, xy_len in enumerate(zip(x_lengths, y_lengths)):
         x_len, y_len = xy_len
         if padding_mode == "br":
@@ -56,7 +54,6 @@ def pad_images(data, padding_value, padding_mode="br"):
 
 def pad_image(
     image,
-    padding_value,
     new_height=None,
     new_width=None,
     pad_width=None,
@@ -90,7 +87,7 @@ def pad_image(
     )
 
     if not (pad_width == 0 and pad_height == 0):
-        padded_image = np.ones((h + pad_height, w + pad_width, c)) * padding_value
+        padded_image = np.ones((h + pad_height, w + pad_width, c)) * 0
         if padding_mode == "br":
             hi, wi = 0, 0
         elif padding_mode == "tl":
diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md
index 8d97ae637cb1a47bd467f07f1f6b4ae743d9f714..12cba43d55511a6053d07635367d331f0e2174b6 100644
--- a/docs/usage/train/parameters.md
+++ b/docs/usage/train/parameters.md
@@ -16,8 +16,6 @@ All hyperparameters are specified and editable in the training scripts (meaning
 | `dataset_params.config.worker_per_gpu`  | Number of parallel processes per gpu for data loading.                                 | `int`        | `4`                                            |
 | `dataset_params.config.height_divisor`  | Factor to reduce the width of the feature vector before feeding the decoder.           | `int`        | `8`                                            |
 | `dataset_params.config.width_divisor`   | Factor to reduce the height of the feature vector before feeding the decoder.          | `int`        | `32`                                           |
-| `dataset_params.config.padding_value`   | Image padding value.                                                                   | `int`        | `0`                                            |
-| `dataset_params.config.padding_token`   | Transcription padding value.                                                           | `int`        | `None`                                         |
 | `dataset_params.config.preprocessings`  | List of pre-processing functions to apply to input images.                             | `list`       | (see [dedicated section](#data-preprocessing)) |
 | `dataset_params.config.augmentation`    | Configuration for data augmentation.                                                   | `dict`       | (see [dedicated section](#data-augmentation))  |
 
diff --git a/tests/conftest.py b/tests/conftest.py
index e660869c0f5c0fb80b79a2d8f133ee246e66da52..8fa1e79c674d9a73444796978b6a92031dbe43ac 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -70,8 +70,6 @@ def training_config():
                 "load_in_memory": True,  # Load all images in CPU memory
                 "width_divisor": 8,  # Image width will be divided by 8
                 "height_divisor": 32,  # Image height will be divided by 32
-                "padding_value": 0,  # Image padding value
-                "padding_token": None,  # Label padding value
                 "preprocessings": [
                     {
                         "type": "to_RGB",