Skip to content
Snippets Groups Projects
Commit 19a0518e authored by Yoann Schneider's avatar Yoann Schneider :tennis: Committed by Mélodie Boillet
Browse files

Dedicated evaluate command

parent 23bb21f9
No related branches found
No related tags found
1 merge request!303Dedicated evaluate command
Showing with 373 additions and 132 deletions
{
"dataset": {
"datasets": {
"training": "tests/data/training/training_dataset"
},
"train": {
"name": "training-train",
"datasets": [
["training", "train"]
]
},
"val": {
"training-val": [
["training", "val"]
]
},
"test": {
"training-test": [
["training", "test"]
]
},
"max_char_prediction": 30,
"tokens": null
},
"model": {
"transfered_charset": true,
"additional_tokens": 1,
"encoder": {
"dropout": 0.5,
"nb_layers": 5
},
"h_max": 500,
"w_max": 1000,
"decoder": {
"l_max": 15000,
"dec_num_layers": 8,
"dec_num_heads": 4,
"dec_res_dropout": 0.1,
"dec_pred_dropout": 0.1,
"dec_att_dropout": 0.1,
"dec_dim_feedforward": 256,
"attention_win": 100,
"enc_dim": 256
}
},
"training": {
"data": {
"batch_size": 2,
"load_in_memory": true,
"worker_per_gpu": 4,
"preprocessings": [
{
"type": "max_resize",
"max_width": 2000,
"max_height": 2000
}
],
"augmentation": true
},
"device": {
"use_ddp": false,
"ddp_port": "20027",
"use_amp": true,
"nb_gpu": 0,
"force": "cpu"
},
"metrics": {
"train": [
"loss_ce",
"cer",
"wer",
"wer_no_punct"
],
"eval": [
"cer",
"wer",
"wer_no_punct"
]
},
"validation": {
"eval_on_valid": true,
"eval_on_valid_interval": 2,
"set_name_focus_metric": "training-val"
},
"output_folder": "tests/data/evaluate",
"gradient_clipping": {},
"max_nb_epochs": 4,
"load_epoch": "best",
"optimizers": {
"all": {
"args": {
"lr": 0.0001,
"amsgrad": false
}
}
},
"lr_schedulers": null,
"label_noise_scheduler": {
"min_error_rate": 0.2,
"max_error_rate": 0.2,
"total_num_steps": 5e4
},
"transfer_learning": null
}
}
......@@ -3,7 +3,7 @@ import argparse
import errno
from dan.datasets import add_dataset_parser
from dan.ocr import add_predict_parser, add_train_parser
from dan.ocr import add_evaluate_parser, add_predict_parser, add_train_parser
def get_parser():
......@@ -12,6 +12,7 @@ def get_parser():
add_dataset_parser(subcommands)
add_train_parser(subcommands)
add_evaluate_parser(subcommands)
add_predict_parser(subcommands)
return parser
......
......@@ -3,6 +3,8 @@
Train a new DAN model.
"""
from dan.ocr.evaluate import add_evaluate_parser # noqa
from dan.ocr.predict import add_predict_parser # noqa
from dan.ocr.train import run
from dan.utils import read_json
......
# -*- coding: utf-8 -*-
"""
Evaluate a trained DAN model.
"""
import logging
import random
import numpy as np
import torch
import torch.multiprocessing as mp
from dan.ocr.manager.training import Manager
from dan.ocr.utils import update_config
from dan.utils import read_json
logger = logging.getLogger(__name__)
def add_evaluate_parser(subcommands) -> None:
parser = subcommands.add_parser(
"evaluate",
description=__doc__,
help=__doc__,
)
parser.add_argument(
"--config",
type=read_json,
required=True,
help="Configuration file.",
)
parser.set_defaults(func=run)
def eval(rank, config, mlflow_logging):
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
random.seed(0)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
config["training"]["device"]["ddp_rank"] = rank
# Load best checkpoint
config["training"]["load_epoch"] = "best"
model = Manager(config)
model.load_model()
metrics = ["cer", "wer", "wer_no_punct", "time"]
for dataset_name in config["dataset"]["datasets"]:
for set_name in ["test", "val", "train"]:
logger.info(f"Evaluating on set `{set_name}`")
model.evaluate(
"{}-{}".format(dataset_name, set_name),
[
(dataset_name, set_name),
],
metrics,
output=True,
mlflow_logging=mlflow_logging,
)
def run(config: dict):
update_config(config)
mlflow_logging = bool(config.get("mlflow"))
if mlflow_logging:
logger.info("MLflow logging enabled")
if (
config["training"]["device"]["use_ddp"]
and config["training"]["device"]["force"] in [None, "cuda"]
and torch.cuda.is_available()
):
mp.spawn(
eval,
args=(config, mlflow_logging),
nprocs=config["training"]["device"]["nb_gpu"],
)
else:
eval(0, config, mlflow_logging)
......@@ -3,18 +3,14 @@ import json
import logging
import random
from copy import deepcopy
from pathlib import Path
import numpy as np
import torch
import torch.multiprocessing as mp
from torch.optim import Adam
from dan.ocr.decoder import GlobalHTADecoder
from dan.ocr.encoder import FCN_Encoder
from dan.ocr.manager.training import Manager
from dan.ocr.mlflow import MLFLOW_AVAILABLE
from dan.ocr.transforms import Preprocessing
from dan.ocr.utils import update_config
from dan.utils import MLflowNotInstalled
if MLFLOW_AVAILABLE:
......@@ -26,7 +22,7 @@ if MLFLOW_AVAILABLE:
logger = logging.getLogger(__name__)
def train_and_test(rank, params, mlflow_logging=False):
def train(rank, params, mlflow_logging=False):
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
......@@ -43,67 +39,6 @@ def train_and_test(rank, params, mlflow_logging=False):
model.train(mlflow_logging=mlflow_logging)
# load weights giving best CER on valid set
model.params["training"]["load_epoch"] = "best"
model.load_model()
metrics = ["cer", "wer", "wer_no_punct", "time"]
for dataset_name in params["dataset"]["datasets"]:
for set_name in ["test", "val", "train"]:
model.evaluate(
"{}-{}".format(dataset_name, set_name),
[
(dataset_name, set_name),
],
metrics,
output=True,
mlflow_logging=mlflow_logging,
)
def update_config(config: dict):
"""
Update some fields for easier
"""
# .dataset.datasets cast all values to Path
config["dataset"]["datasets"] = {
name: Path(path) for name, path in config["dataset"]["datasets"].items()
}
# .model.encoder.class = FCN_ENCODER
config["model"]["encoder"]["class"] = FCN_Encoder
# .model.decoder.class = GlobalHTADecoder
config["model"]["decoder"]["class"] = GlobalHTADecoder
# Update preprocessing type
for prepro in config["training"]["data"]["preprocessings"]:
prepro["type"] = Preprocessing(prepro["type"])
# .training.output_folder to Path
config["training"]["output_folder"] = Path(config["training"]["output_folder"])
if config["training"]["transfer_learning"]:
# .training.transfer_learning.encoder[1]
config["training"]["transfer_learning"]["encoder"][1] = Path(
config["training"]["transfer_learning"]["encoder"][1]
)
# .training.transfer_learning.decoder[1]
config["training"]["transfer_learning"]["decoder"][1] = Path(
config["training"]["transfer_learning"]["decoder"][1]
)
# Parse optimizers
for optimizer_setup in config["training"]["optimizers"].values():
# Only supported optimizer is Adam
optimizer_setup["class"] = Adam
# set nb_gpu if not present
if config["training"]["device"]["nb_gpu"] is None:
config["training"]["device"]["nb_gpu"] = torch.cuda.device_count()
def serialize_config(config):
"""
......@@ -150,12 +85,12 @@ def start_training(config, mlflow_logging: bool) -> None:
and torch.cuda.is_available()
):
mp.spawn(
train_and_test,
train,
args=(config, mlflow_logging),
nprocs=config["training"]["device"]["nb_gpu"],
)
else:
train_and_test(0, config, mlflow_logging)
train(0, config, mlflow_logging)
def run(config: dict):
......
# -*- coding: utf-8 -*-
from pathlib import Path
import torch
from torch.optim import Adam
from dan.ocr.decoder import GlobalHTADecoder
from dan.ocr.encoder import FCN_Encoder
from dan.ocr.transforms import Preprocessing
def update_config(config: dict):
"""
Complete the fields that are not JSON serializable.
"""
# .dataset.datasets cast all values to Path
config["dataset"]["datasets"] = {
name: Path(path) for name, path in config["dataset"]["datasets"].items()
}
# .model.encoder.class = FCN_ENCODER
config["model"]["encoder"]["class"] = FCN_Encoder
# .model.decoder.class = GlobalHTADecoder
config["model"]["decoder"]["class"] = GlobalHTADecoder
# Update preprocessing type
for prepro in config["training"]["data"]["preprocessings"]:
prepro["type"] = Preprocessing(prepro["type"])
# .training.output_folder to Path
config["training"]["output_folder"] = Path(config["training"]["output_folder"])
if config["training"]["transfer_learning"]:
# .training.transfer_learning.encoder[1]
config["training"]["transfer_learning"]["encoder"][1] = Path(
config["training"]["transfer_learning"]["encoder"][1]
)
# .training.transfer_learning.decoder[1]
config["training"]["transfer_learning"]["decoder"][1] = Path(
config["training"]["transfer_learning"]["decoder"][1]
)
# Parse optimizers
for optimizer_setup in config["training"]["optimizers"].values():
# Only supported optimizer is Adam
optimizer_setup["class"] = Adam
# set nb_gpu if not present
if config["training"]["device"]["nb_gpu"] is None:
config["training"]["device"]["nb_gpu"] = torch.cuda.device_count()
......@@ -46,6 +46,12 @@ teklia-dan predict \
--output /tmp/dan-predict
```
The library already has all the documents needed to run the [evaluation command](../usage/evaluate/index.md) on a minimalist dataset. You can use the configuration available at `configs/eval.json`. It is already populated with the parameters used in the unit tests.
```shell
teklia-dan evaluate --config configs/eval.json
```
## Documentation
This documentation uses [Sphinx](http://www.sphinx-doc.org/) and was generated using [MkDocs](https://mkdocs.org/) and [mkdocstrings](https://mkdocstrings.github.io/).
......
# Evaluation
::: dan.ocr.evaluate
# Evaluation
Use the `teklia-dan evaluate` command to evaluate a trained DAN model.
To evaluate DAN on your dataset:
1. Create a JSON configuration file. You can base the configuration file off the training one. Refer to the [dedicated page](../train/config.md) for a description of parameters.
1. Run `teklia-dan evaluate --config path/to/your/config.json`.
1. Evaluation results for every split are available in the `results` subfolder of the output folder indicated in your configuration.
......@@ -8,5 +8,8 @@ When `teklia-dan` is installed in your environment, you may use the following co
`teklia-dan train`
: To train a new DAN model. More details in the [dedicated page](./train/index.md).
`teklia-dan evaluate`
: To evaluate a trained DAN model. More details in the [dedicated page](./evaluate/index.md).
`teklia-dan predict`
: To predict an image using a trained DAN model. More details in the [dedicated page](./predict/index.md).
# Predict
# Prediction
Use the `teklia-dan predict` command to apply a trained DAN model on an image.
......
# Train
Use the `teklia-dan train` command to train a new DAN model. It is able to train a DAN model at line or document-level and evaluate it.
Use the `teklia-dan train` command to train a new DAN model. It is able to train a DAN model at line or document-level.
To train DAN on your dataset:
1. Create a training JSON configuration file. Refer to the [dedicated page](config.md) for a description of parameters.
1. Run `teklia-dan train --config path/to/your/config.json`.
1. Look into evaluation results in the output folder indicated in your configuration:
- `checkpoints` contains model weights for the last trained epoch and for the epoch giving the best valid CER.
- `results` contains the tensorboard log file, the parameters file, and the evaluation results for the best epoch.
1. (Optional) Train a language model. Refer to the [dedicated page](language_model.md).
1. Look into the training results in the output folder indicated in your configuration:
- `checkpoints` contains model weights for the last trained epoch and for the epoch giving the best valid CER.
- `results` contains the tensorboard log file and the parameters file.
## Additional pages
......
......@@ -71,7 +71,8 @@ nav:
- Data augmentation: usage/train/augmentation.md
- Language model: usage/train/language_model.md
- Jean Zay tutorial: usage/train/jeanzay.md
- Predict: usage/predict/index.md
- Evaluation: usage/evaluate/index.md
- Prediction: usage/predict/index.md
- Python Reference:
- Datasets:
......@@ -101,6 +102,7 @@ nav:
- OCR managers: ref/ocr/managers/ocr.md
- Training managers: ref/ocr/managers/training.md
- Training: ref/ocr/train.md
- Evaluation: ref/ocr/evaluate.md
- Prediction:
- ref/ocr/predict/index.md
- Inference: ref/ocr/predict/inference.md
......
......@@ -19,7 +19,6 @@ from arkindex_export import (
WorkerVersion,
database,
)
from dan.ocr.train import update_config
from tests import FIXTURES
......@@ -184,9 +183,12 @@ def mock_database(tmp_path_factory):
@pytest.fixture
def training_config():
config = json.loads((FIXTURES.parent.parent / "configs" / "tests.json").read_text())
update_config(config)
return config
return json.loads((FIXTURES.parent.parent / "configs" / "tests.json").read_text())
@pytest.fixture
def evaluate_config():
return json.loads((FIXTURES.parent.parent / "configs" / "eval.json").read_text())
@pytest.fixture
......
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
# -*- coding: utf-8 -*-
import shutil
import pytest
import yaml
from dan.ocr import evaluate
from tests import FIXTURES
@pytest.mark.parametrize(
"training_res, val_res, test_res",
(
(
{
"nb_chars": 43,
"cer": 1.3023,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
{
"nb_chars": 41,
"cer": 1.2683,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
{
"nb_chars": 49,
"cer": 1.1224,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
),
),
)
def test_evaluate(training_res, val_res, test_res, evaluate_config):
# Use the tmp_path as base folder
evaluate_config["training"]["output_folder"] = FIXTURES / "evaluate"
evaluate.run(evaluate_config)
# Check that the evaluation results are correct
for split_name, expected_res in zip(
["train", "val", "test"], [training_res, val_res, test_res]
):
filename = (
evaluate_config["training"]["output_folder"]
/ "results"
/ f"predict_training-{split_name}_0.yaml"
)
with filename.open() as f:
# Remove the times from the results as they vary
res = {
metric: value
for metric, value in yaml.safe_load(f).items()
if "time" not in metric
}
assert res == expected_res
# Remove results files
shutil.rmtree(evaluate_config["training"]["output_folder"] / "results")
......@@ -6,43 +6,17 @@ import pytest
import torch
import yaml
from dan.ocr.train import train_and_test
from dan.ocr.train import train
from dan.ocr.utils import update_config
from tests.conftest import FIXTURES
@pytest.mark.parametrize(
"expected_best_model_name, expected_last_model_name, training_res, val_res, test_res, params_res",
"expected_best_model_name, expected_last_model_name, params_res",
(
(
"best_0.pt",
"last_3.pt",
{
"nb_chars": 43,
"cer": 1.3023,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
{
"nb_chars": 41,
"cer": 1.2683,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
{
"nb_chars": 49,
"cer": 1.1224,
"nb_words": 9,
"wer": 1.0,
"nb_words_no_punct": 9,
"wer_no_punct": 1.0,
"nb_samples": 2,
},
{
"parameters": {
"max_char_prediction": 30,
......@@ -79,22 +53,21 @@ from tests.conftest import FIXTURES
),
),
)
def test_train_and_test(
def test_train(
expected_best_model_name,
expected_last_model_name,
training_res,
val_res,
test_res,
params_res,
training_config,
tmp_path,
):
update_config(training_config)
# Use the tmp_path as base folder
training_config["training"]["output_folder"] = (
tmp_path / training_config["training"]["output_folder"]
)
train_and_test(0, training_config)
train(0, training_config)
# There should only be two checkpoints left
checkpoints = (
......@@ -175,24 +148,6 @@ def test_train_and_test(
]:
assert trained_model[elt] == expected_model[elt]
# Check that the evaluation results are correct
for split_name, expected_res in zip(
["train", "val", "test"], [training_res, val_res, test_res]
):
with (
tmp_path
/ training_config["training"]["output_folder"]
/ "results"
/ f"predict_training-{split_name}_0.yaml"
).open() as f:
# Remove the times from the results as they vary
res = {
metric: value
for metric, value in yaml.safe_load(f).items()
if "time" not in metric
}
assert res == expected_res
# Check that the inference parameters file is correct
res = yaml.safe_load(
(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment