Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atr/dan
1 result
Show changes
Commits on Source (5)
...@@ -6,10 +6,10 @@ import pickle ...@@ -6,10 +6,10 @@ import pickle
import cv2 import cv2
import numpy as np import numpy as np
import torch import torch
from torch import randint
from dan.manager.dataset import DatasetManager, GenericDataset, apply_preprocessing from dan.manager.dataset import DatasetManager, GenericDataset, apply_preprocessing
from dan.ocr.utils import LM_str_to_ind from dan.utils import pad_image, pad_images, pad_sequences_1D, token_to_ind
from dan.utils import pad_image, pad_images, pad_sequences_1D, randint
class OCRDatasetManager(DatasetManager): class OCRDatasetManager(DatasetManager):
...@@ -111,9 +111,8 @@ class OCRDataset(GenericDataset): ...@@ -111,9 +111,8 @@ class OCRDataset(GenericDataset):
) )
sample["img"] = cv2.resize(sample["img"], (new_w, new_h)) sample["img"] = cv2.resize(sample["img"], (new_w, new_h))
# Normalization if requested # Normalization
if "normalize" in self.params["config"] and self.params["config"]["normalize"]: sample["img"] = (sample["img"] - self.mean) / self.std
sample["img"] = (sample["img"] - self.mean) / self.std
sample["img_reduced_shape"] = np.ceil( sample["img_reduced_shape"] = np.ceil(
sample["img"].shape / self.reduce_dims_factor sample["img"].shape / self.reduce_dims_factor
...@@ -137,12 +136,12 @@ class OCRDataset(GenericDataset): ...@@ -137,12 +136,12 @@ class OCRDataset(GenericDataset):
min_pad = self.params["config"]["padding"]["min_pad"] min_pad = self.params["config"]["padding"]["min_pad"]
max_pad = self.params["config"]["padding"]["max_pad"] max_pad = self.params["config"]["padding"]["max_pad"]
pad_width = ( pad_width = (
randint(min_pad, max_pad) randint(min_pad, max_pad, (1,))
if min_pad is not None and max_pad is not None if min_pad is not None and max_pad is not None
else None else None
) )
pad_height = ( pad_height = (
randint(min_pad, max_pad) randint(min_pad, max_pad, (1,))
if min_pad is not None and max_pad is not None if min_pad is not None and max_pad is not None
else None else None
) )
...@@ -174,12 +173,10 @@ class OCRDataset(GenericDataset): ...@@ -174,12 +173,10 @@ class OCRDataset(GenericDataset):
full_label = label full_label = label
sample["label"] = full_label sample["label"] = full_label
sample["token_label"] = LM_str_to_ind(self.charset, full_label) sample["token_label"] = token_to_ind(self.charset, full_label)
if "add_eot" in self.params["config"]["constraints"]: sample["token_label"].append(self.tokens["end"])
sample["token_label"].append(self.tokens["end"])
sample["label_len"] = len(sample["token_label"]) sample["label_len"] = len(sample["token_label"])
if "add_sot" in self.params["config"]["constraints"]: sample["token_label"].insert(0, self.tokens["start"])
sample["token_label"].insert(0, self.tokens["start"])
return sample return sample
......
...@@ -20,8 +20,8 @@ from tqdm import tqdm ...@@ -20,8 +20,8 @@ from tqdm import tqdm
from dan.manager.metrics import MetricManager from dan.manager.metrics import MetricManager
from dan.manager.ocr import OCRDatasetManager from dan.manager.ocr import OCRDatasetManager
from dan.mlflow import MLFLOW_AVAILABLE, logging_metrics, logging_tags_metrics from dan.mlflow import MLFLOW_AVAILABLE, logging_metrics, logging_tags_metrics
from dan.ocr.utils import LM_ind_to_str
from dan.schedulers import DropoutScheduler from dan.schedulers import DropoutScheduler
from dan.utils import ind_to_token
if MLFLOW_AVAILABLE: if MLFLOW_AVAILABLE:
import mlflow import mlflow
...@@ -1010,7 +1010,7 @@ class Manager(OCRManager): ...@@ -1010,7 +1010,7 @@ class Manager(OCRManager):
predicted_tokens = torch.argmax(pred, dim=1).detach().cpu().numpy() predicted_tokens = torch.argmax(pred, dim=1).detach().cpu().numpy()
predicted_tokens = [predicted_tokens[i, : y_len[i]] for i in range(b)] predicted_tokens = [predicted_tokens[i, : y_len[i]] for i in range(b)]
str_x = [ str_x = [
LM_ind_to_str(self.dataset.charset, t, oov_symbol="") ind_to_token(self.dataset.charset, t, oov_symbol="")
for t in predicted_tokens for t in predicted_tokens
] ]
...@@ -1130,7 +1130,7 @@ class Manager(OCRManager): ...@@ -1130,7 +1130,7 @@ class Manager(OCRManager):
confidence_scores[i, : prediction_len[i]].tolist() for i in range(b) confidence_scores[i, : prediction_len[i]].tolist() for i in range(b)
] ]
str_x = [ str_x = [
LM_ind_to_str(self.dataset.charset, t, oov_symbol="") ind_to_token(self.dataset.charset, t, oov_symbol="")
for t in predicted_tokens for t in predicted_tokens
] ]
......
...@@ -109,11 +109,7 @@ def get_config(): ...@@ -109,11 +109,7 @@ def get_config():
"height_divisor": 32, # Image height will be divided by 32 "height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value "padding_value": 0, # Image padding value
"padding_token": None, # Label padding value "padding_token": None, # Label padding value
"constraints": [ "constraints": [],
"add_eot",
"add_sot",
], # add end-of-transcription and start-of-transcription tokens in labels
"normalize": True, # Normalize with mean and variance of training dataset
"preprocessings": [ "preprocessings": [
{ {
"type": "to_RGB", "type": "to_RGB",
......
# -*- coding: utf-8 -*-
# Charset / labels conversion
def LM_str_to_ind(labels, str):
return [labels.index(c) for c in str]
def LM_ind_to_str(labels, ind, oov_symbol=None):
if oov_symbol is not None:
res = []
for i in ind:
if i < len(labels):
res.append(labels[i])
else:
res.append(oov_symbol)
else:
res = [labels[i] for i in ind]
return "".join(res)
...@@ -6,7 +6,6 @@ import numpy as np ...@@ -6,7 +6,6 @@ import numpy as np
from PIL import Image from PIL import Image
from dan import logger from dan import logger
from dan.utils import round_floats
def parse_delimiters(delimiters): def parse_delimiters(delimiters):
...@@ -78,7 +77,7 @@ def split_text_and_confidences( ...@@ -78,7 +77,7 @@ def split_text_and_confidences(
offset = 1 offset = 1
else: else:
logger.error("Level should be either 'char', 'word', or 'line'") logger.error("Level should be either 'char', 'word', or 'line'")
return texts, round_floats(probs), offset return texts, [np.around(num, 2) for num in probs], offset
def get_predicted_polygons_with_confidence( def get_predicted_polygons_with_confidence(
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import os import os
import pickle import pickle
from itertools import pairwise
from pathlib import Path from pathlib import Path
import cv2 import cv2
...@@ -13,14 +14,13 @@ from dan import logger ...@@ -13,14 +14,13 @@ from dan import logger
from dan.datasets.extract.utils import save_json from dan.datasets.extract.utils import save_json
from dan.decoder import GlobalHTADecoder from dan.decoder import GlobalHTADecoder
from dan.encoder import FCN_Encoder from dan.encoder import FCN_Encoder
from dan.ocr.utils import LM_ind_to_str
from dan.predict.attention import ( from dan.predict.attention import (
get_predicted_polygons_with_confidence, get_predicted_polygons_with_confidence,
parse_delimiters, parse_delimiters,
plot_attention, plot_attention,
split_text_and_confidences, split_text_and_confidences,
) )
from dan.utils import pairwise, read_image from dan.utils import ind_to_token, read_image
class DAN: class DAN:
...@@ -220,7 +220,7 @@ class DAN: ...@@ -220,7 +220,7 @@ class DAN:
# Transform tokens to characters # Transform tokens to characters
predicted_text = [ predicted_text = [
LM_ind_to_str(self.charset, t, oov_symbol="") for t in predicted_tokens ind_to_token(self.charset, t, oov_symbol="") for t in predicted_tokens
] ]
logger.info("Images processed") logger.info("Images processed")
......
...@@ -9,6 +9,8 @@ import numpy as np ...@@ -9,6 +9,8 @@ import numpy as np
from cv2 import dilate, erode, normalize from cv2 import dilate, erode, normalize
from numpy import random from numpy import random
from PIL import Image from PIL import Image
from torch import rand, randint
from torch.distributions.uniform import Uniform
from torchvision.transforms import ( from torchvision.transforms import (
ColorJitter, ColorJitter,
GaussianBlur, GaussianBlur,
...@@ -17,8 +19,6 @@ from torchvision.transforms import ( ...@@ -17,8 +19,6 @@ from torchvision.transforms import (
) )
from torchvision.transforms.functional import InterpolationMode from torchvision.transforms.functional import InterpolationMode
from dan.utils import rand, rand_uniform, randint
class DPIAdjusting: class DPIAdjusting:
""" """
...@@ -173,14 +173,14 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -173,14 +173,14 @@ def get_list_augmenters(img, aug_configs, fill_value):
""" """
augmenters = list() augmenters = list()
for aug_config in aug_configs: for aug_config in aug_configs:
if rand() > aug_config["proba"]: if rand((1,)) > aug_config["proba"]:
continue continue
if aug_config["type"] == "dpi": if aug_config["type"] == "dpi":
valid_factor = False valid_factor = False
while not valid_factor: while not valid_factor:
factor = rand_uniform( factor = Uniform(
aug_config["min_factor"], aug_config["max_factor"] aug_config["min_factor"], aug_config["max_factor"]
) ).sample()
valid_factor = not ( valid_factor = not (
( (
"max_width" in aug_config "max_width" in aug_config
...@@ -202,8 +202,12 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -202,8 +202,12 @@ def get_list_augmenters(img, aug_configs, fill_value):
augmenters.append(DPIAdjusting(factor)) augmenters.append(DPIAdjusting(factor))
elif aug_config["type"] == "zoom_ratio": elif aug_config["type"] == "zoom_ratio":
ratio_h = rand_uniform(aug_config["min_ratio_h"], aug_config["max_ratio_h"]) ratio_h = Uniform(
ratio_w = rand_uniform(aug_config["min_ratio_w"], aug_config["max_ratio_w"]) aug_config["min_ratio_h"], aug_config["max_ratio_h"]
).sample()
ratio_w = Uniform(
aug_config["min_ratio_w"], aug_config["max_ratio_w"]
).sample()
augmenters.append( augmenters.append(
ZoomRatio( ZoomRatio(
ratio_h=ratio_h, ratio_w=ratio_w, keep_dim=aug_config["keep_dim"] ratio_h=ratio_h, ratio_w=ratio_w, keep_dim=aug_config["keep_dim"]
...@@ -211,7 +215,7 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -211,7 +215,7 @@ def get_list_augmenters(img, aug_configs, fill_value):
) )
elif aug_config["type"] == "perspective": elif aug_config["type"] == "perspective":
scale = rand_uniform(aug_config["min_factor"], aug_config["max_factor"]) scale = Uniform(aug_config["min_factor"], aug_config["max_factor"]).sample()
augmenters.append( augmenters.append(
RandomPerspective( RandomPerspective(
distortion_scale=scale, distortion_scale=scale,
...@@ -223,13 +227,20 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -223,13 +227,20 @@ def get_list_augmenters(img, aug_configs, fill_value):
elif aug_config["type"] == "elastic_distortion": elif aug_config["type"] == "elastic_distortion":
kernel_size = ( kernel_size = (
randint(aug_config["min_kernel_size"], aug_config["max_kernel_size"]) randint(
// 2 aug_config["min_kernel_size"], aug_config["max_kernel_size"], (1,)
* 2 ).item()
+ 1 ) // 2 * 2 + 1
sigma = (
Uniform(aug_config["min_sigma"], aug_config["max_sigma"])
.sample()
.item()
)
alpha = (
Uniform(aug_config["min_alpha"], aug_config["max_alpha"])
.sample()
.item()
) )
sigma = rand_uniform(aug_config["min_sigma"], aug_config["max_sigma"])
alpha = rand_uniform(aug_config["min_alpha"], aug_config["max_alpha"])
augmenters.append( augmenters.append(
ElasticDistortion( ElasticDistortion(
kernel_size=(kernel_size, kernel_size), sigma=sigma, alpha=alpha kernel_size=(kernel_size, kernel_size), sigma=sigma, alpha=alpha
...@@ -237,9 +248,13 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -237,9 +248,13 @@ def get_list_augmenters(img, aug_configs, fill_value):
) )
elif aug_config["type"] == "dilation_erosion": elif aug_config["type"] == "dilation_erosion":
kernel_h = randint(aug_config["min_kernel"], aug_config["max_kernel"] + 1) kernel_h = randint(
kernel_w = randint(aug_config["min_kernel"], aug_config["max_kernel"] + 1) aug_config["min_kernel"], aug_config["max_kernel"] + 1, (1,)
if randint(0, 2) == 0: )
kernel_w = randint(
aug_config["min_kernel"], aug_config["max_kernel"] + 1, (1,)
)
if randint(0, 2, (1,)) == 0:
augmenters.append( augmenters.append(
Erosion((kernel_w, kernel_h), aug_config["iterations"]) Erosion((kernel_w, kernel_h), aug_config["iterations"])
) )
...@@ -261,9 +276,17 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -261,9 +276,17 @@ def get_list_augmenters(img, aug_configs, fill_value):
elif aug_config["type"] == "gaussian_blur": elif aug_config["type"] == "gaussian_blur":
max_kernel_h = min(aug_config["max_kernel"], img.size[1]) max_kernel_h = min(aug_config["max_kernel"], img.size[1])
max_kernel_w = min(aug_config["max_kernel"], img.size[0]) max_kernel_w = min(aug_config["max_kernel"], img.size[0])
kernel_h = randint(aug_config["min_kernel"], max_kernel_h + 1) // 2 * 2 + 1 kernel_h = (
kernel_w = randint(aug_config["min_kernel"], max_kernel_w + 1) // 2 * 2 + 1 randint(aug_config["min_kernel"], max_kernel_h + 1, (1,)).item()
sigma = rand_uniform(aug_config["min_sigma"], aug_config["max_sigma"]) ) // 2 * 2 + 1
kernel_w = (
randint(aug_config["min_kernel"], max_kernel_w + 1, (1,)).item()
) // 2 * 2 + 1
sigma = (
Uniform(aug_config["min_sigma"], aug_config["max_sigma"])
.sample()
.item()
)
augmenters.append( augmenters.append(
GaussianBlur(kernel_size=(kernel_w, kernel_h), sigma=sigma) GaussianBlur(kernel_size=(kernel_w, kernel_h), sigma=sigma)
) )
...@@ -272,10 +295,10 @@ def get_list_augmenters(img, aug_configs, fill_value): ...@@ -272,10 +295,10 @@ def get_list_augmenters(img, aug_configs, fill_value):
augmenters.append(GaussianNoise(std=aug_config["std"])) augmenters.append(GaussianNoise(std=aug_config["std"]))
elif aug_config["type"] == "sharpen": elif aug_config["type"] == "sharpen":
alpha = rand_uniform(aug_config["min_alpha"], aug_config["max_alpha"]) alpha = Uniform(aug_config["min_alpha"], aug_config["max_alpha"]).sample()
strength = rand_uniform( strength = Uniform(
aug_config["min_strength"], aug_config["max_strength"] aug_config["min_strength"], aug_config["max_strength"]
) ).sample()
augmenters.append(Sharpen(alpha=alpha, strength=strength)) augmenters.append(Sharpen(alpha=alpha, strength=strength))
else: else:
...@@ -289,7 +312,7 @@ def apply_data_augmentation(img, da_config): ...@@ -289,7 +312,7 @@ def apply_data_augmentation(img, da_config):
""" """
Apply data augmentation strategy on input image Apply data augmentation strategy on input image
""" """
if da_config["proba"] != 1 and rand() > da_config["proba"]: if da_config["proba"] != 1 and rand((1,)) > da_config["proba"]:
return img return img
# Convert to PIL Image # Convert to PIL Image
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from itertools import tee
import cv2 import cv2
import numpy as np import numpy as np
import torch from torch import randint
from torch.distributions.uniform import Uniform
# Layout begin-token to end-token # Layout begin-token to end-token
SEM_MATCHING_TOKENS = {"": "", "": "", "": "", "": "", "": "", "": ""} SEM_MATCHING_TOKENS = {"": "", "": "", "": "", "": "", "": "", "": ""}
...@@ -16,27 +13,6 @@ class MLflowNotInstalled(Exception): ...@@ -16,27 +13,6 @@ class MLflowNotInstalled(Exception):
""" """
def randint(low, high):
"""
call torch.randint to preserve random among dataloader workers
"""
return int(torch.randint(low, high, (1,)))
def rand():
"""
call torch.rand to preserve random among dataloader workers
"""
return float(torch.rand((1,)))
def rand_uniform(low, high):
"""
call torch uniform to preserve random among dataloader workers
"""
return float(Uniform(low, high).sample())
def pad_sequences_1D(data, padding_value): def pad_sequences_1D(data, padding_value):
""" """
Pad data with padding_value to get same length Pad data with padding_value to get same length
...@@ -70,8 +46,8 @@ def pad_images(data, padding_value, padding_mode="br"): ...@@ -70,8 +46,8 @@ def pad_images(data, padding_value, padding_mode="br"):
elif padding_mode == "random": elif padding_mode == "random":
xmax = longest_x - x_len xmax = longest_x - x_len
ymax = longest_y - y_len ymax = longest_y - y_len
xi = randint(0, xmax) if xmax >= 1 else 0 xi = randint(0, xmax, (1,)) if xmax >= 1 else 0
yi = randint(0, ymax) if ymax >= 1 else 0 yi = randint(0, ymax, (1,)) if ymax >= 1 else 0
padded_data[i, xi : xi + x_len, yi : yi + y_len, ...] = data[i] padded_data[i, xi : xi + x_len, yi : yi + y_len, ...] = data[i]
else: else:
raise NotImplementedError("Undefined padding mode: {}".format(padding_mode)) raise NotImplementedError("Undefined padding mode: {}".format(padding_mode))
...@@ -120,8 +96,8 @@ def pad_image( ...@@ -120,8 +96,8 @@ def pad_image(
elif padding_mode == "tl": elif padding_mode == "tl":
hi, wi = pad_height, pad_width hi, wi = pad_height, pad_width
elif padding_mode == "random": elif padding_mode == "random":
hi = randint(0, pad_height) if pad_height >= 1 else 0 hi = randint(0, pad_height, (1,)) if pad_height >= 1 else 0
wi = randint(0, pad_width) if pad_width >= 1 else 0 wi = randint(0, pad_width, (1,)) if pad_width >= 1 else 0
else: else:
raise NotImplementedError("Undefined padding mode: {}".format(padding_mode)) raise NotImplementedError("Undefined padding mode: {}".format(padding_mode))
padded_image[hi : hi + h, wi : wi + w, ...] = image padded_image[hi : hi + h, wi : wi + w, ...] = image
...@@ -156,11 +132,19 @@ def round_floats(float_list, decimals=2): ...@@ -156,11 +132,19 @@ def round_floats(float_list, decimals=2):
return [np.around(num, decimals) for num in float_list] return [np.around(num, decimals) for num in float_list]
def pairwise(iterable): # Charset / labels conversion
""" def token_to_ind(labels, str):
Not necessary when using 3.10. See https://docs.python.org/3/library/itertools.html#itertools.pairwise. return [labels.index(c) for c in str]
"""
# pairwise('ABCDEFG') --> AB BC CD DE EF FG
a, b = tee(iterable) def ind_to_token(labels, ind, oov_symbol=None):
next(b, None) if oov_symbol is not None:
return zip(a, b) res = []
for i in ind:
if i < len(labels):
res.append(labels[i])
else:
res.append(oov_symbol)
else:
res = [labels[i] for i in ind]
return "".join(res)
# Utils
::: dan.ocr.utils
...@@ -8,7 +8,7 @@ All hyperparameters are specified and editable in the training scripts (meaning ...@@ -8,7 +8,7 @@ All hyperparameters are specified and editable in the training scripts (meaning
| `dataset_name` | Name of the dataset. | `str` | | | `dataset_name` | Name of the dataset. | `str` | |
| `dataset_level` | Level of the dataset. Should be named after the element type. | `str` | | | `dataset_level` | Level of the dataset. Should be named after the element type. | `str` | |
| `dataset_variant` | Variant of the dataset. Usually empty for HTR datasets, `"_sem"` for HTR+NER datasets. | `str` | | | `dataset_variant` | Variant of the dataset. Usually empty for HTR datasets, `"_sem"` for HTR+NER datasets. | `str` | |
| `dataset_path` | Path to the dataset. | `str` | | `dataset_path` | Path to the dataset. | `str` | |
| `dataset_params.config.dataset_manager` | Dataset manager class. | custom class | `OCRDatasetManager` | | `dataset_params.config.dataset_manager` | Dataset manager class. | custom class | `OCRDatasetManager` |
| `dataset_params.config.dataset_class` | Dataset class. | custom class | `OCRDataset` | | `dataset_params.config.dataset_class` | Dataset class. | custom class | `OCRDataset` |
| `dataset_params.config.datasets` | Dataset dictionary with the dataset name as key and dataset path as value. | `dict` | | | `dataset_params.config.datasets` | Dataset dictionary with the dataset name as key and dataset path as value. | `dict` | |
...@@ -18,8 +18,7 @@ All hyperparameters are specified and editable in the training scripts (meaning ...@@ -18,8 +18,7 @@ All hyperparameters are specified and editable in the training scripts (meaning
| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` |
| `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` |
| `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` |
| `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `["add_eot", "add_sot"]` | | `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `[]` |
| `dataset_params.config.normalize` | Normalize with mean and variance of training dataset. | `bool` | `True` |
| `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) |
| `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) | | `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) |
......
...@@ -86,7 +86,6 @@ nav: ...@@ -86,7 +86,6 @@ nav:
- Training managers: ref/managers/training.md - Training managers: ref/managers/training.md
- OCR: - OCR:
- ref/ocr/index.md - ref/ocr/index.md
- Utils: ref/ocr/utils.md
- Document: - Document:
- ref/ocr/document/index.md - ref/ocr/document/index.md
- Training: ref/ocr/document/train.md - Training: ref/ocr/document/train.md
......
...@@ -72,11 +72,7 @@ def training_config(): ...@@ -72,11 +72,7 @@ def training_config():
"height_divisor": 32, # Image height will be divided by 32 "height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value "padding_value": 0, # Image padding value
"padding_token": None, # Label padding value "padding_token": None, # Label padding value
"constraints": [ "constraints": [],
"add_eot",
"add_sot",
], # add end-of-transcription and start-of-transcription tokens in labels
"normalize": True, # Normalize with mean and variance of training dataset
"preprocessings": [ "preprocessings": [
{ {
"type": "to_RGB", "type": "to_RGB",
......