config.md

You must replace the pseudo-variables `$dataset_name` and `$dataset_path` with respectively the name and the relative/absolute path to your dataset.
- linebreaks are treated as spaces by language models, as a result predictions will not include linebreaks.
folder/
├── <model.lm.path> # Path to the language model
├── lexicon.txt
└── tokens.txt
[
    {
        "type": "fixed_height_resize",
        "fixed_height": 1500,
    }
]
[
    {
        "type": "fixed_width_resize",
        "fixed_width": 1500,
    }
]
[
    {
        "type": "fixed_resize",
        "fixed_height": 1900,
        "fixed_width": 1250,
    }
]
[
    {
        "type": "max_resize,
        "max_height": 2000,
        "max_width": 2000,
    }
]
[
    {
        "type": "fixed_height_resize",
        "fixed_height": 2000,
    },
    {
        "type": "fixed_width_resize",
        "fixed_width": 2000,
    }
]
transforms = A.Compose(
    [
        # Scale between 0.75 and 1.0
        RandomScale(scale_limit=[-0.25, 0], p=1, interpolation=cv2.INTER_AREA),
        A.SomeOf(
            [
                ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
                Perspective(scale=(0.05, 0.09), fit_output=True, p=0.4),
                GaussianBlur(sigma_limit=2.5, p=1),
                GaussNoise(var_limit=50**2, p=1),
                ColorJitter(
                    contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
                ),
                ElasticTransform(
                    alpha=20.0, sigma=5.0, border_mode=0, p=1
                ),
                Sharpen(alpha=(0.0, 1.0), p=1),
                Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),
                CoarseDropout(p=1),
                ToGray(p=0.5),
            ],
            n=2,
            p=0.9,
        ),
    ],
    p=0.9,
)
$ pip install .[mlflow]
wandb login
{
  "wandb": {
    "init": {
      "id": "<unique_ID>",
      "resume": "auto"
    }
  }
}
{
  "wandb": {
    "init": {
      "mode": "offline"
    }
  }
}
wandb sync --project <wandb_project> --sync-all --append
#!/bin/bash

while :
do
    echo "[`date +%Y-%m-%d\ %H:%M:%S`] Publishing W&B runs...";
    wandb sync --project <wandb_project> --sync-all --append;
    echo "[`date +%Y-%m-%d\ %H:%M:%S`] W&B runs published.";

    # Publish W&B runs every 5 minutes
    sleep 5m
done