diff --git a/README.md b/README.md index cc24d3a2a38e44d7982c654a70b7d2d3acc311c9..f3526541b6e47690b0d4d5cf1b5e941206ef0a7c 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou For development and tests purpose it may be useful to install the project as a editable package with pip. - Use a virtualenv (e.g. with virtualenvwrapper `mkvirtualenv -a . dan`) +- Initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule (e.g. `git submodule update --init --recursive`) - Install `dan` as a package (e.g. `pip install -e .`) ### Linter diff --git a/dan/ocr/evaluate.py b/dan/ocr/evaluate.py index 079b0e5d11455426132c410adf0f5383e9329e1f..f8f1fc743803967384f217b2c8f7f35e48525ca0 100644 --- a/dan/ocr/evaluate.py +++ b/dan/ocr/evaluate.py @@ -6,12 +6,16 @@ Evaluate a trained DAN model. import logging import random from argparse import ArgumentTypeError +from itertools import chain +from operator import attrgetter from pathlib import Path from typing import Dict, List import numpy as np import torch import torch.multiprocessing as mp +from edlib import align, getNiceAlignment +from prettytable import MARKDOWN, PrettyTable from dan.bio import convert from dan.ocr.manager.metrics import Inference @@ -25,6 +29,7 @@ from nerval.utils import print_results logger = logging.getLogger(__name__) NERVAL_THRESHOLD = 0.30 +NB_WORST_PREDICTIONS = 5 def parse_threshold(value: str) -> float: @@ -66,6 +71,38 @@ def add_evaluate_parser(subcommands) -> None: parser.set_defaults(func=run) +def print_worst_predictions(all_inferences: Dict[str, List[Inference]]): + table = PrettyTable( + field_names=[ + "Image name", + "WER", + "Alignment between ground truth - prediction", + ] + ) + table.set_style(MARKDOWN) + + worst_inferences = sorted( + chain.from_iterable(all_inferences.values()), + key=attrgetter("wer"), + reverse=True, + )[:NB_WORST_PREDICTIONS] + for inference in worst_inferences: + alignment = getNiceAlignment( + align( + inference.ground_truth, + inference.prediction, + task="path", + ), + inference.ground_truth, + inference.prediction, + ) + alignment_str = f'{alignment["query_aligned"]}\n{alignment["matched_aligned"]}\n{alignment["target_aligned"]}' + table.add_row([inference.image, round(inference.wer * 100, 2), alignment_str]) + + print(f"\n#### {NB_WORST_PREDICTIONS} worst prediction(s)\n") + print(table) + + def eval_nerval( all_inferences: Dict[str, List[Inference]], tokens: Path, @@ -159,6 +196,8 @@ def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool): threshold=nerval_threshold, ) + print_worst_predictions(all_inferences) + def run(config: dict, nerval_threshold: float): update_config(config) diff --git a/dan/ocr/manager/metrics.py b/dan/ocr/manager/metrics.py index 988c90d073c95cb80a682cf3a916972da540233f..07cece06993cea371d6c4f3b34970ac63f8aa924 100644 --- a/dan/ocr/manager/metrics.py +++ b/dan/ocr/manager/metrics.py @@ -29,8 +29,10 @@ class Inference(NamedTuple): inferring again when we need to compute new metrics """ + image: str ground_truth: str prediction: str + wer: float class MetricManager: diff --git a/dan/ocr/manager/training.py b/dan/ocr/manager/training.py index ad35f823e312c51a21975229fff0da38bac0545d..d436ead177c6f959fcc757c27722f970477df784 100644 --- a/dan/ocr/manager/training.py +++ b/dan/ocr/manager/training.py @@ -4,6 +4,7 @@ import os import random from copy import deepcopy from enum import Enum +from itertools import repeat from pathlib import Path from time import time from typing import Dict, List, Tuple @@ -768,7 +769,9 @@ class GenericTrainingManager: tokens=self.tokens, ) - # Keep inferences in memory to evaluate with Nerval + # Keep inferences in memory to: + # - evaluate with Nerval + # - display worst predictions inferences = [] with tqdm(total=len(loader.dataset)) as pbar: @@ -796,7 +799,13 @@ class GenericTrainingManager: pbar.update(len(batch_data["names"]) * self.nb_workers) inferences.extend( - map(Inference, batch_values["str_y"], batch_values["str_x"]) + map( + Inference, + batch_data["names"], + batch_values["str_y"], + batch_values["str_x"], + repeat(display_values["wer"]), + ) ) # log metrics in MLflow diff --git a/docs/get_started/index.md b/docs/get_started/index.md index ddc34d0fad9aca53dba03f1abc732d6e13057585..90e88151d4bc0991179d4e8068660404c251b803 100644 --- a/docs/get_started/index.md +++ b/docs/get_started/index.md @@ -26,6 +26,12 @@ To install DAN manually, you need to first clone via: git clone git@gitlab.teklia.com:atr/dan.git ``` +Then you can initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule: + +```shell +git submodule update --init --recursive +``` + Then you can install it via pip: ```shell diff --git a/docs/usage/evaluate/index.md b/docs/usage/evaluate/index.md index 2153ebd776c0767b8e55edf8ffaa9cec2422f307..94d57d18c812acbe184c721c0de3e52ad196d8a2 100644 --- a/docs/usage/evaluate/index.md +++ b/docs/usage/evaluate/index.md @@ -12,15 +12,19 @@ To evaluate DAN on your dataset: This will, for each evaluated split: 1. Create a YAML file with the evaluation results in the `results` subfolder of the `training.output_folder` indicated in your configuration. -1. Print in the console a metrics Markdown table (see [table example below](#htr-evaluation)). -1. Print in the console a [Nerval](https://gitlab.teklia.com/ner/nerval) metrics Markdown table, if the `dataset.tokens` parameter in your configuration is defined (see [table example below](#htr-and-ner-evaluation)). +1. Print in the console a metrics Markdown table (see [HTR example below](#htr-evaluation)). +1. Print in the console a [Nerval](https://gitlab.teklia.com/ner/nerval) metrics Markdown table, if the `dataset.tokens` parameter in your configuration is defined (see [HTR and NER example below](#htr-and-ner-evaluation)). +1. Print in the console the 5 worst predictions (see [examples below](#examples)). + +!!! warning + The display of the worst predictions does not support batch evaluation. If the `training.data.batch_size` parameter is not equal to `1`, then the `WER` displayed is the `WER` of the **whole batch** and not just the image. | Parameter | Description | Type | Default | | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------- | | `--config` | Path to the configuration file. | `pathlib.Path` | | | `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float` | `0.3` | -## Example output +## Examples ### HTR evaluation @@ -32,6 +36,14 @@ This will, for each evaluated split: | train | x | x | x | x | x | | val | x | x | x | x | x | | test | x | x | x | x | x | + +#### 5 worst prediction(s) + +| Image name | WER | Alignment between ground truth - prediction | +| :------------: | :-: | :-----------------------------------------: | +| <image_id>.png | x | x | +| | | | | +| | | x | ``` ### HTR and NER evaluation @@ -67,4 +79,12 @@ This will, for each evaluated split: | :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: | | Surname | x | x | x | x | x | x | | All | x | x | x | x | x | x | + +#### 5 worst prediction(s) + +| Image name | WER | Alignment between ground truth - prediction | +| :------------: | :-: | :-----------------------------------------: | +| <image_id>.png | x | x | +| | | | | +| | | x | ``` diff --git a/mkdocs.yml b/mkdocs.yml index 758964cef9c45bec8799828241470a9f196ca57a..3ac3aea259d4d272fc8975909b3f723d811cddef 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -144,7 +144,7 @@ extra: link: https://teklia.com - icon: fontawesome/brands/gitlab name: Git repository for this project - link: https://gitlab.com/teklia/atr/dan + link: https://gitlab.teklia.com/atr/dan - icon: fontawesome/brands/linkedin name: Teklia @ LinkedIn link: https://www.linkedin.com/company/teklia diff --git a/tests/data/evaluate/metrics_table.md b/tests/data/evaluate/metrics_table.md index 107bef4188e44a6f8e5509c74b7ceb8d4ded4625..27bf53ad329cbd0441b030695b322728ce62b3f0 100644 --- a/tests/data/evaluate/metrics_table.md +++ b/tests/data/evaluate/metrics_table.md @@ -47,3 +47,20 @@ | Chalumeau | 1 | 0 | 0.0 | 0.0 | 0 | 1 | | Batiment | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 | | All | 6 | 5 | 83.33 | 83.33 | 83.33 | 6 | + +#### 5 worst prediction(s) + +| Image name | WER | Alignment between ground truth - prediction | +|:----------------------------------------:|:-----:|:---------------------------------------------------------:| +| 2c242f5c-e979-43c4-b6f2-a6d4815b651d.png | 50.0 | ⓈA â’»Charles â’·11 â“P â’¸C â“€F â“„A â“…14331 | +| | | |.||||||||||||||||||||||||.||||.|| | +| | | Ⓢd â’»Charles â’·11 â“P â’¸C â“€F â“„d â“…14 31 | +| 0dfe8bcd-ed0b-453e-bf19-cc697012296e.png | 26.67 | ⓈTemplié â’»Marcelle â’·93 â“J â“€ch â“„E dachyle------- | +| | | ||||||||||||||||||||||||.|||||||||||.||.------- | +| | | ⓈTemplié â’»Marcelle â’·93 â“S â“€ch â“„E dactylo â“…18376 | +| ffdec445-7f14-4f5f-be44-68d0844d0df1.png | 14.29 | ⓈNaudin â’»Marie â’·53 â“S â’¸V â“€Belle mère | +| | | |||||||||||||||||||||||.|||||||||||| | +| | | ⓈNaudin â’»Marie â’·53 â“S â’¸v â“€Belle mère | +| 0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png | 12.5 | ⓈBellisson â’»Georges â’·91 â“P â’¸M â“€Ch â“„Plombier â“…-------12241 | +| | | |||||||||||||||||||||||||||||||||||||||||||||-------||||| | +| | | ⓈBellisson â’»Georges â’·91 â“P â’¸M â“€Ch â“„Plombier â“…Patron?12241 |