training.md

output/
├── charset.pkl
├── labels.json
├── split.json
├── images
│   ├── train
│   ├── val
│   └── test
└── labels
    ├── train
    ├── val
    └── test
version: 0.0.1
parameters:
  max_char_prediction: int
  encoder:
    input_channels: int
    dropout: float
  decoder:
    enc_dim: int
    l_max: int
    dec_pred_dropout: float
    attention_win: int
    vocab_size: int
    h_max: int
    w_max: int
    dec_num_layers: int
    dec_dim_feedforward: int
    dec_num_heads: int
    dec_att_dropout: float
    dec_res_dropout: float
  preprocessings:
    - type: str
      max_height: int
      max_width: int
      fixed_height: int
      fixed_width: int