Skip to content
Snippets Groups Projects
Commit ed481c6f authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Merge branch 'eval-support-empty-strings' into 'main'

Support empty strings during evaluation

Closes #261

See merge request !390
parents bada606d 72c96bc6
No related branches found
No related tags found
1 merge request!390Support empty strings during evaluation
......@@ -43,7 +43,7 @@ repos:
rev: 0.7.17
hooks:
- id: mdformat
exclude: tests/data/analyze|tests/data/evaluate/metrics_table.md
exclude: tests/data/analyze|tests/data/evaluate/.*.md
# Optionally add plugins
additional_dependencies:
- mdformat-mkdocs[recommended]
......@@ -31,6 +31,8 @@ logger = logging.getLogger(__name__)
NERVAL_THRESHOLD = 0.30
NB_WORST_PREDICTIONS = 5
EMPTY_STRING = ""
def parse_threshold(value: str) -> float:
"""
......@@ -87,14 +89,24 @@ def print_worst_predictions(all_inferences: Dict[str, List[Inference]]):
reverse=True,
)[:NB_WORST_PREDICTIONS]
for inference in worst_inferences:
if not inference.ground_truth:
logger.warning(
f"Ground truth is empty for {inference.image}. `{EMPTY_STRING}` will be displayed"
)
if not inference.prediction:
logger.warning(
f"Prediction is empty for {inference.image}. `{EMPTY_STRING}` will be displayed"
)
alignment = getNiceAlignment(
align(
inference.ground_truth,
inference.prediction,
inference.ground_truth or EMPTY_STRING,
inference.prediction or EMPTY_STRING,
task="path",
),
inference.ground_truth,
inference.prediction,
inference.ground_truth or EMPTY_STRING,
inference.prediction or EMPTY_STRING,
)
alignment_str = f'{alignment["query_aligned"]}\n{alignment["matched_aligned"]}\n{alignment["target_aligned"]}'
table.add_row([inference.image, round(inference.wer * 100, 2), alignment_str])
......
#### Nerval evaluation
##### test
| tag | predicted | matched | Precision | Recall | F1 | Support |
|:---------:|:---------:|:-------:|:---------:|:------:|:-----:|:-------:|
| Surname | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Patron | 1 | 0 | 0.0 | 0.0 | 0 | 1 |
| Operai | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Louche | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Koala | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Firstname | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Chalumeau | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Batiment | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| All | 8 | 7 | 87.5 | 87.5 | 87.5 | 8 |
#### 5 worst prediction(s)
| Image name | WER | Alignment between ground truth - prediction |
|:----------------------------------------:|:----:|:---------------------------------------------------------:|
| 0dfe8bcd-ed0b-453e-bf19-cc697012296e.png | 100 | Some text |
| | | .-------- |
| | | ∅-------- |
| 2c242f5c-e979-43c4-b6f2-a6d4815b651d.png | 100 | ∅-------- |
| | | .-------- |
| | | Some text |
| 0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png | 12.5 | ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier Ⓟ-------12241 |
| | | |||||||||||||||||||||||||||||||||||||||||||||-------||||| |
| | | ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241 |
| ffdec445-7f14-4f5f-be44-68d0844d0df1.png | 0 | ∅ |
| | | | |
| | | ∅ |
......@@ -50,6 +50,100 @@ def test_add_metrics_table_row():
assert metrics_table.rows == [["train", 130.23, 100, ""]]
def test_print_worst_predictions(capsys):
evaluate.print_worst_predictions(
all_inferences={
"test": [
Inference(
image="0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png",
ground_truth="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier Ⓟ12241",
prediction="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
lm_prediction="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
wer=0.125,
),
# Test with empty strings
Inference(
image="0dfe8bcd-ed0b-453e-bf19-cc697012296e.png",
ground_truth="Some text",
prediction="",
lm_prediction="",
wer=1,
),
Inference(
image="2c242f5c-e979-43c4-b6f2-a6d4815b651d.png",
ground_truth="",
prediction="Some text",
lm_prediction="",
wer=1,
),
Inference(
image="ffdec445-7f14-4f5f-be44-68d0844d0df1.png",
ground_truth="",
prediction="",
lm_prediction="Some text",
wer=0,
),
]
}
)
# Check the metrics Markdown table
captured_std = capsys.readouterr()
last_printed_lines = captured_std.out.split("\n")
assert (
"\n".join(last_printed_lines)
== Path(FIXTURES / "evaluate" / "worst_predictions.md").read_text()
)
def test_eval_nerval(capsys, evaluate_config):
evaluate.eval_nerval(
all_inferences={
"test": [
Inference(
image="0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png",
ground_truth="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier Ⓟ12241",
prediction="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
lm_prediction="ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
wer=0.125,
),
# Test with empty strings
Inference(
image="0dfe8bcd-ed0b-453e-bf19-cc697012296e.png",
ground_truth="Some text",
prediction="",
lm_prediction="",
wer=1,
),
Inference(
image="2c242f5c-e979-43c4-b6f2-a6d4815b651d.png",
ground_truth="",
prediction="Some text",
lm_prediction="",
wer=1,
),
Inference(
image="ffdec445-7f14-4f5f-be44-68d0844d0df1.png",
ground_truth="",
prediction="",
lm_prediction="Some text",
wer=0,
),
]
},
tokens=evaluate_config["dataset"]["tokens"],
threshold=evaluate.NERVAL_THRESHOLD,
)
# Check the metrics Markdown table
captured_std = capsys.readouterr()
last_printed_lines = captured_std.out.split("\n")
assert (
"\n".join(last_printed_lines)
== Path(FIXTURES / "evaluate" / "eval_nerval.md").read_text()
)
@pytest.mark.parametrize(
"training_res, val_res, test_res",
(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment