Skip to content
Snippets Groups Projects
Commit 20161125 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Merge branch 'save-detailed-results-evaluation' into 'main'

Add an option to save all results in the `evaluate` command

Closes #277

See merge request !404
parents 823416d4 c5ede812
No related branches found
No related tags found
1 merge request!404Add an option to save all results in the `evaluate` command
......@@ -6,6 +6,7 @@
Evaluate a trained DAN model.
"""
import json
import logging
import random
from argparse import ArgumentTypeError
......@@ -73,6 +74,13 @@ def add_evaluate_parser(subcommands) -> None:
type=parse_threshold,
)
parser.add_argument(
"--output-json",
help="Where to save evaluation results in JSON format.",
default=None,
type=Path,
)
parser.set_defaults(func=run)
......@@ -161,7 +169,13 @@ def eval_nerval(
print_results(scores)
def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
def eval(
rank,
config: dict,
nerval_threshold: float,
output_json: Path | None,
mlflow_logging: bool,
):
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
......@@ -218,8 +232,12 @@ def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
print_worst_predictions(all_inferences)
# Save to JSON
if output_json is not None:
output_json.write_text(json.dumps(all_inferences, indent=2))
def run(config: dict, nerval_threshold: float):
def run(config: dict, nerval_threshold: float, output_json: Path | None):
update_config(config)
mlflow_logging = bool(config.get("mlflow"))
......@@ -234,8 +252,8 @@ def run(config: dict, nerval_threshold: float):
):
mp.spawn(
eval,
args=(config, nerval_threshold, mlflow_logging),
args=(config, nerval_threshold, output_json, mlflow_logging),
nprocs=config["training"]["device"]["nb_gpu"],
)
else:
eval(0, config, nerval_threshold, mlflow_logging)
eval(0, config, nerval_threshold, output_json, mlflow_logging)
......@@ -23,6 +23,7 @@ This will, for each evaluated split:
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------- |
| `--config` | Path to the configuration file. | `pathlib.Path` | |
| `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float` | `0.3` |
| `--output-json` | Where to save evaluation results in JSON format. | `pathlib.Path` | `None` |
## Examples
......
{
"train": [
[
"0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png",
"\u24c8Bellisson \u24bbGeorges \u24b791 \u24c1P \u24b8M \u24c0Ch \u24c4Plombier \u24c512241",
"\u24c8Bellisson \u24bbGeorges \u24b791 \u24c1P \u24b8M \u24c0Ch \u24c4Plombier \u24c5Patron?12241",
"",
0.125
],
[
"0dfe8bcd-ed0b-453e-bf19-cc697012296e.png",
"\u24c8Templi\u00e9 \u24bbMarcelle \u24b793 \u24c1J \u24c0ch \u24c4E dachyle",
"\u24c8Templi\u00e9 \u24bbMarcelle \u24b793 \u24c1S \u24c0ch \u24c4E dactylo \u24c518376",
"",
0.4286
]
],
"val": [
[
"2c242f5c-e979-43c4-b6f2-a6d4815b651d.png",
"\u24c8A \u24bbCharles \u24b711 \u24c1P \u24b8C \u24c0F \u24c4A \u24c514331",
"\u24c8d \u24bbCharles \u24b711 \u24c1P \u24b8C \u24c0F \u24c4d \u24c514 31",
"",
0.5
]
],
"test": [
[
"ffdec445-7f14-4f5f-be44-68d0844d0df1.png",
"\u24c8Naudin \u24bbMarie \u24b753 \u24c1S \u24b8V \u24c0Belle m\u00e8re",
"\u24c8Naudin \u24bbMarie \u24b753 \u24c1S \u24b8v \u24c0Belle m\u00e8re",
"",
0.1429
]
]
}
......@@ -3,6 +3,7 @@
# -*- coding: utf-8 -*-
import json
import shutil
from pathlib import Path
......@@ -199,11 +200,23 @@ def test_eval_nerval(capsys, evaluate_config):
),
),
)
def test_evaluate(capsys, training_res, val_res, test_res, evaluate_config):
@pytest.mark.parametrize("is_output_json", ((True, False)))
def test_evaluate(
capsys, training_res, val_res, test_res, is_output_json, evaluate_config, tmp_path
):
evaluate_path = FIXTURES / "evaluate"
# Use the tmp_path as base folder
evaluate_config["training"]["output_folder"] = FIXTURES / "evaluate"
evaluate_config["training"]["output_folder"] = evaluate_path
evaluate.run(evaluate_config, evaluate.NERVAL_THRESHOLD)
output_json = tmp_path / "inference.json" if is_output_json else None
evaluate.run(evaluate_config, evaluate.NERVAL_THRESHOLD, output_json=output_json)
if is_output_json:
assert json.loads(output_json.read_text()) == json.loads(
(evaluate_path / "inference.json").read_text()
)
# Check that the evaluation results are correct
for split_name, expected_res in zip(
......@@ -365,7 +378,7 @@ def test_evaluate_language_model(
"weight": language_model_weight,
}
evaluate.run(evaluate_config, evaluate.NERVAL_THRESHOLD)
evaluate.run(evaluate_config, evaluate.NERVAL_THRESHOLD, output_json=None)
# Check that the evaluation results are correct
for split_name, expected_res in [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment