Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atr/dan
1 result
Show changes
Commits on Source (6)
[submodule "nerval"]
path = nerval
url = ../../ner/nerval.git
...@@ -7,7 +7,12 @@ RUN apt-get -y update && \ ...@@ -7,7 +7,12 @@ RUN apt-get -y update && \
WORKDIR /src WORKDIR /src
# Install DAN as a package # Copy submodule data
COPY nerval nerval
# Copy DAN data
COPY dan dan COPY dan dan
COPY requirements.txt *-requirements.txt setup.py VERSION README.md ./ COPY requirements.txt *-requirements.txt setup.py VERSION README.md ./
# Install DAN as a package
RUN pip install . --no-cache-dir RUN pip install . --no-cache-dir
...@@ -282,6 +282,10 @@ class ArkindexExtractor: ...@@ -282,6 +282,10 @@ class ArkindexExtractor:
) )
continue continue
# Extract the train set first to correctly build the `self.charset` variable
splits.remove(TRAIN_NAME)
splits.insert(0, TRAIN_NAME)
# Iterate over the subsets to find the page images and labels. # Iterate over the subsets to find the page images and labels.
for split in splits: for split in splits:
with tqdm( with tqdm(
......
...@@ -5,17 +5,42 @@ Evaluate a trained DAN model. ...@@ -5,17 +5,42 @@ Evaluate a trained DAN model.
import logging import logging
import random import random
from argparse import ArgumentTypeError
from pathlib import Path
from typing import Dict, List
import numpy as np import numpy as np
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
from dan.bio import convert
from dan.ocr.manager.metrics import Inference
from dan.ocr.manager.training import Manager from dan.ocr.manager.training import Manager
from dan.ocr.utils import add_metrics_table_row, create_metrics_table, update_config from dan.ocr.utils import add_metrics_table_row, create_metrics_table, update_config
from dan.utils import read_json from dan.utils import parse_tokens, read_json
from nerval.evaluate import evaluate
from nerval.parse import parse_bio
from nerval.utils import print_results
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
NERVAL_THRESHOLD = 0.30
def parse_threshold(value: str) -> float:
"""
Check that the string passed as parameter is a correct floating point number between 0 and 1
"""
try:
value = float(value)
except ValueError:
raise ArgumentTypeError("Must be a floating point number.")
if value < 0 or value > 1:
raise ArgumentTypeError("Must be between 0 and 1.")
return value
def add_evaluate_parser(subcommands) -> None: def add_evaluate_parser(subcommands) -> None:
parser = subcommands.add_parser( parser = subcommands.add_parser(
...@@ -31,10 +56,55 @@ def add_evaluate_parser(subcommands) -> None: ...@@ -31,10 +56,55 @@ def add_evaluate_parser(subcommands) -> None:
help="Configuration file.", help="Configuration file.",
) )
parser.add_argument(
"--nerval-threshold",
help="Distance threshold for the match between gold and predicted entity during Nerval evaluation.",
default=NERVAL_THRESHOLD,
type=parse_threshold,
)
parser.set_defaults(func=run) parser.set_defaults(func=run)
def eval(rank, config, mlflow_logging): def eval_nerval(
all_inferences: Dict[str, List[Inference]],
tokens: Path,
threshold: float,
):
print("\n#### Nerval evaluation")
def inferences_to_parsed_bio(attr: str):
bio_values = []
for inference in inferences:
value = getattr(inference, attr)
bio_value = convert(value, ner_tokens=tokens)
bio_values.extend(bio_value.split("\n"))
# Parse this BIO format
return parse_bio(bio_values)
# Evaluate with Nerval
tokens = parse_tokens(tokens)
for split_name, inferences in all_inferences.items():
ground_truths = inferences_to_parsed_bio("ground_truth")
predictions = inferences_to_parsed_bio("prediction")
if not (ground_truths and predictions):
continue
scores = {
key: {
k: round(value * 100, 2) if k in ["P", "R", "F1"] else value
for k, value in values.items()
}
for key, values in evaluate(ground_truths, predictions, threshold).items()
}
print(f"\n##### {split_name}\n")
print_results(scores)
def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
torch.manual_seed(0) torch.manual_seed(0)
torch.cuda.manual_seed(0) torch.cuda.manual_seed(0)
np.random.seed(0) np.random.seed(0)
...@@ -62,10 +132,12 @@ def eval(rank, config, mlflow_logging): ...@@ -62,10 +132,12 @@ def eval(rank, config, mlflow_logging):
metric_names.append("ner") metric_names.append("ner")
metrics_table = create_metrics_table(metric_names) metrics_table = create_metrics_table(metric_names)
all_inferences = {}
for dataset_name in config["dataset"]["datasets"]: for dataset_name in config["dataset"]["datasets"]:
for set_name in ["train", "val", "test"]: for set_name in ["train", "val", "test"]:
logger.info(f"Evaluating on set `{set_name}`") logger.info(f"Evaluating on set `{set_name}`")
metrics = model.evaluate( metrics, inferences = model.evaluate(
"{}-{}".format(dataset_name, set_name), "{}-{}".format(dataset_name, set_name),
[ [
(dataset_name, set_name), (dataset_name, set_name),
...@@ -75,11 +147,20 @@ def eval(rank, config, mlflow_logging): ...@@ -75,11 +147,20 @@ def eval(rank, config, mlflow_logging):
) )
add_metrics_table_row(metrics_table, set_name, metrics) add_metrics_table_row(metrics_table, set_name, metrics)
all_inferences[set_name] = inferences
print("\n#### DAN evaluation\n")
print(metrics_table) print(metrics_table)
if "ner" in metric_names:
eval_nerval(
all_inferences,
tokens=config["dataset"]["tokens"],
threshold=nerval_threshold,
)
def run(config: dict): def run(config: dict, nerval_threshold: float):
update_config(config) update_config(config)
mlflow_logging = bool(config.get("mlflow")) mlflow_logging = bool(config.get("mlflow"))
...@@ -94,8 +175,8 @@ def run(config: dict): ...@@ -94,8 +175,8 @@ def run(config: dict):
): ):
mp.spawn( mp.spawn(
eval, eval,
args=(config, mlflow_logging), args=(config, nerval_threshold, mlflow_logging),
nprocs=config["training"]["device"]["nb_gpu"], nprocs=config["training"]["device"]["nb_gpu"],
) )
else: else:
eval(0, config, mlflow_logging) eval(0, config, nerval_threshold, mlflow_logging)
...@@ -3,7 +3,7 @@ import re ...@@ -3,7 +3,7 @@ import re
from collections import defaultdict from collections import defaultdict
from operator import attrgetter from operator import attrgetter
from pathlib import Path from pathlib import Path
from typing import Dict, List from typing import Dict, List, NamedTuple
import editdistance import editdistance
import numpy as np import numpy as np
...@@ -23,6 +23,16 @@ REGEX_ONLY_ONE_SPACE = re.compile(r"\s+") ...@@ -23,6 +23,16 @@ REGEX_ONLY_ONE_SPACE = re.compile(r"\s+")
METRICS_KEYWORD = {"cer": "chars", "wer": "words", "ner": "tokens"} METRICS_KEYWORD = {"cer": "chars", "wer": "words", "ner": "tokens"}
class Inference(NamedTuple):
"""
Store a prediction with its ground truth to avoid
inferring again when we need to compute new metrics
"""
ground_truth: str
prediction: str
class MetricManager: class MetricManager:
def __init__(self, metric_names: List[str], dataset_name: str, tokens: Path | None): def __init__(self, metric_names: List[str], dataset_name: str, tokens: Path | None):
self.dataset_name: str = dataset_name self.dataset_name: str = dataset_name
......
...@@ -6,7 +6,7 @@ from copy import deepcopy ...@@ -6,7 +6,7 @@ from copy import deepcopy
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from time import time from time import time
from typing import Dict from typing import Dict, List, Tuple
import numpy as np import numpy as np
import torch import torch
...@@ -20,7 +20,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP ...@@ -20,7 +20,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm from tqdm import tqdm
from dan.ocr.manager.metrics import MetricManager from dan.ocr.manager.metrics import Inference, MetricManager
from dan.ocr.manager.ocr import OCRDatasetManager from dan.ocr.manager.ocr import OCRDatasetManager
from dan.ocr.mlflow import MLFLOW_AVAILABLE, logging_metrics, logging_tags_metrics from dan.ocr.mlflow import MLFLOW_AVAILABLE, logging_metrics, logging_tags_metrics
from dan.ocr.schedulers import DropoutScheduler from dan.ocr.schedulers import DropoutScheduler
...@@ -750,7 +750,7 @@ class GenericTrainingManager: ...@@ -750,7 +750,7 @@ class GenericTrainingManager:
def evaluate( def evaluate(
self, custom_name, sets_list, metric_names, mlflow_logging=False self, custom_name, sets_list, metric_names, mlflow_logging=False
) -> Dict[str, int | float]: ) -> Tuple[Dict[str, int | float], List[Inference]]:
""" """
Main loop for evaluation Main loop for evaluation
""" """
...@@ -768,6 +768,9 @@ class GenericTrainingManager: ...@@ -768,6 +768,9 @@ class GenericTrainingManager:
tokens=self.tokens, tokens=self.tokens,
) )
# Keep inferences in memory to evaluate with Nerval
inferences = []
with tqdm(total=len(loader.dataset)) as pbar: with tqdm(total=len(loader.dataset)) as pbar:
pbar.set_description("Evaluation") pbar.set_description("Evaluation")
with torch.no_grad(): with torch.no_grad():
...@@ -792,6 +795,10 @@ class GenericTrainingManager: ...@@ -792,6 +795,10 @@ class GenericTrainingManager:
pbar.set_postfix(values=str(display_values)) pbar.set_postfix(values=str(display_values))
pbar.update(len(batch_data["names"]) * self.nb_workers) pbar.update(len(batch_data["names"]) * self.nb_workers)
inferences.extend(
map(Inference, batch_values["str_y"], batch_values["str_x"])
)
# log metrics in MLflow # log metrics in MLflow
logging_name = custom_name.split("-")[1] logging_name = custom_name.split("-")[1]
logging_tags_metrics( logging_tags_metrics(
...@@ -810,7 +817,7 @@ class GenericTrainingManager: ...@@ -810,7 +817,7 @@ class GenericTrainingManager:
# Log mlflow artifacts # Log mlflow artifacts
mlflow.log_artifact(path, "predictions") mlflow.log_artifact(path, "predictions")
return metrics return metrics, inferences
def output_pred(self, name): def output_pred(self, name):
path = self.paths["results"] / "predict_{}_{}.yaml".format( path = self.paths["results"] / "predict_{}_{}.yaml".format(
......
...@@ -198,6 +198,9 @@ def split_text_and_confidences( ...@@ -198,6 +198,9 @@ def split_text_and_confidences(
return [], [], [] return [], [], []
indices = build_ner_indices(text, tokens) indices = build_ner_indices(text, tokens)
if not indices:
return [], [], []
texts, confidences = compute_prob_by_ner(text, confidences, indices) texts, confidences = compute_prob_by_ner(text, confidences, indices)
case _: case _:
logger.error(f"Level should be either {list(map(str, Level))}") logger.error(f"Level should be either {list(map(str, Level))}")
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
Use the `teklia-dan dataset analyze` command to analyze a dataset. This will display statistics in [Markdown](https://www.markdownguide.org/) format. Use the `teklia-dan dataset analyze` command to analyze a dataset. This will display statistics in [Markdown](https://www.markdownguide.org/) format.
The available arguments are:
| Parameter | Description | Type | Default | | Parameter | Description | Type | Default |
| --------------- | -------------------------------- | -------------- | ------- | | --------------- | -------------------------------- | -------------- | ------- |
| `--labels` | Path to the `labels.json` file. | `pathlib.Path` | | | `--labels` | Path to the `labels.json` file. | `pathlib.Path` | |
......
# Evaluation # Evaluation
## Description
Use the `teklia-dan evaluate` command to evaluate a trained DAN model. Use the `teklia-dan evaluate` command to evaluate a trained DAN model.
To evaluate DAN on your dataset: To evaluate DAN on your dataset:
1. Create a JSON configuration file. You can base the configuration file off the training one. Refer to the [dedicated page](../train/config.md) for a description of parameters. 1. Create a JSON configuration file. You can base the configuration file off the training one. Refer to the [dedicated page](../train/config.md) for a description of parameters.
1. Run `teklia-dan evaluate --config path/to/your/config.json`. 1. Run `teklia-dan evaluate --config path/to/your/config.json`.
1. Evaluation results for every split are available in the `results` subfolder of the output folder indicated in your configuration.
1. A metrics Markdown table, providing results for each evaluated split, is also printed in the console (see table example below).
### Example output - Metrics Markdown table This will, for each evaluated split:
1. Create a YAML file with the evaluation results in the `results` subfolder of the `training.output_folder` indicated in your configuration.
1. Print in the console a metrics Markdown table (see [table example below](#htr-evaluation)).
1. Print in the console a [Nerval](https://gitlab.teklia.com/ner/nerval) metrics Markdown table, if the `dataset.tokens` parameter in your configuration is defined (see [table example below](#htr-and-ner-evaluation)).
| Parameter | Description | Type | Default |
| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------- |
| `--config` | Path to the configuration file. | `pathlib.Path` | |
| `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float` | `0.3` |
## Example output
### HTR evaluation
```
#### DAN evaluation
| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) |
| :---: | :-----------: | :-------: | :-----------: | :-------: | :----------------: |
| train | x | x | x | x | x |
| val | x | x | x | x | x |
| test | x | x | x | x | x |
```
### HTR and NER evaluation
```
#### DAN evaluation
| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER | | Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER |
| :---: | :-----------: | :-------: | :-----------: | :-------: | :----------------: | :-: | | :---: | :-----------: | :-------: | :-----------: | :-------: | :----------------: | :-: |
| train | x | x | x | x | x | x | | train | x | x | x | x | x | x |
| val | x | x | x | x | x | x | | val | x | x | x | x | x | x |
| test | x | x | x | x | x | x | | test | x | x | x | x | x | x |
#### Nerval evaluation
##### train
| tag | predicted | matched | Precision | Recall | F1 | Support |
| :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: |
| Surname | x | x | x | x | x | x |
| All | x | x | x | x | x | x |
##### val
| tag | predicted | matched | Precision | Recall | F1 | Support |
| :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: |
| Surname | x | x | x | x | x | x |
| All | x | x | x | x | x | x |
##### test
| tag | predicted | matched | Precision | Recall | F1 | Support |
| :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: |
| Surname | x | x | x | x | x | x |
| All | x | x | x | x | x | x |
```
# Prediction # Prediction
Use the `teklia-dan predict` command to apply a trained DAN model on an image.
## Description ## Description
Use the `teklia-dan predict` command to apply a trained DAN model on an image.
| Parameter | Description | Type | Default | | Parameter | Description | Type | Default |
| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------------- | | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------------- |
| `--image-dir` | Path to the folder where the images to predict are stored. Must not be provided with `--image`. | `pathlib.Path` | | | `--image-dir` | Path to the folder where the images to predict are stored. Must not be provided with `--image`. | `pathlib.Path` | |
......
Subproject commit 525c1a9e6d5a33075669085148247e2604dd092f
-e ./nerval
albumentations==1.3.1 albumentations==1.3.1
arkindex-export==0.1.9 arkindex-export==0.1.9
boto3==1.26.124 boto3==1.26.124
editdistance==0.6.2
flashlight-text==0.0.4 flashlight-text==0.0.4
imageio==2.26.1 imageio==2.26.1
imagesize==1.4.1 imagesize==1.4.1
...@@ -9,7 +9,6 @@ lxml==4.9.3 ...@@ -9,7 +9,6 @@ lxml==4.9.3
mdutils==1.6.0 mdutils==1.6.0
nltk==3.8.1 nltk==3.8.1
numpy==1.24.3 numpy==1.24.3
prettytable==3.8.0
PyYAML==6.0 PyYAML==6.0
scipy==1.10.1 scipy==1.10.1
sentencepiece==0.1.99 sentencepiece==0.1.99
......
...@@ -21,7 +21,7 @@ from arkindex_export import ( ...@@ -21,7 +21,7 @@ from arkindex_export import (
WorkerVersion, WorkerVersion,
database, database,
) )
from dan.datasets.extract.arkindex import SPLIT_NAMES from dan.datasets.extract.arkindex import TEST_NAME, TRAIN_NAME, VAL_NAME
from tests import FIXTURES from tests import FIXTURES
...@@ -181,15 +181,16 @@ def mock_database(tmp_path_factory): ...@@ -181,15 +181,16 @@ def mock_database(tmp_path_factory):
) )
# Create dataset # Create dataset
split_names = [VAL_NAME, TEST_NAME, TRAIN_NAME]
dataset = Dataset.create( dataset = Dataset.create(
id="dataset_id", id="dataset_id",
name="Dataset", name="Dataset",
state="complete", state="complete",
sets=",".join(SPLIT_NAMES), sets=",".join(split_names),
) )
# Create dataset elements # Create dataset elements
for split in SPLIT_NAMES: for split in split_names:
element_path = (FIXTURES / "extraction" / "elements" / split).with_suffix( element_path = (FIXTURES / "extraction" / "elements" / split).with_suffix(
".json" ".json"
) )
......
#### DAN evaluation
| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER | | Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER |
|:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:----:| |:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:----:|
| train | 18.89 | 21.05 | 26.67 | 26.67 | 26.67 | 7.14 | | train | 18.89 | 21.05 | 26.67 | 26.67 | 26.67 | 7.14 |
| val | 8.82 | 11.54 | 50.0 | 50.0 | 50.0 | 0.0 | | val | 8.82 | 11.54 | 50.0 | 50.0 | 50.0 | 0.0 |
| test | 2.78 | 3.33 | 14.29 | 14.29 | 14.29 | 0.0 | | test | 2.78 | 3.33 | 14.29 | 14.29 | 14.29 | 0.0 |
#### Nerval evaluation
##### train
| tag | predicted | matched | Precision | Recall | F1 | Support |
|:---------:|:---------:|:-------:|:---------:|:------:|:-----:|:-------:|
| Surname | 2 | 2 | 100.0 | 100.0 | 100.0 | 2 |
| Patron | 2 | 0 | 0.0 | 0.0 | 0 | 1 |
| Operai | 2 | 2 | 100.0 | 100.0 | 100.0 | 2 |
| Louche | 2 | 1 | 50.0 | 50.0 | 50.0 | 2 |
| Koala | 2 | 2 | 100.0 | 100.0 | 100.0 | 2 |
| Firstname | 2 | 2 | 100.0 | 100.0 | 100.0 | 2 |
| Chalumeau | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Batiment | 2 | 2 | 100.0 | 100.0 | 100.0 | 2 |
| All | 15 | 12 | 80.0 | 85.71 | 82.76 | 14 |
##### val
| tag | predicted | matched | Precision | Recall | F1 | Support |
|:---------:|:---------:|:-------:|:---------:|:------:|:-----:|:-------:|
| Surname | 1 | 0 | 0.0 | 0.0 | 0 | 1 |
| Patron | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Operai | 1 | 0 | 0.0 | 0.0 | 0 | 1 |
| Louche | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Koala | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Firstname | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Chalumeau | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Batiment | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| All | 8 | 6 | 75.0 | 75.0 | 75.0 | 8 |
##### test
| tag | predicted | matched | Precision | Recall | F1 | Support |
|:---------:|:---------:|:-------:|:---------:|:------:|:-----:|:-------:|
| Surname | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Louche | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Koala | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Firstname | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| Chalumeau | 1 | 0 | 0.0 | 0.0 | 0 | 1 |
| Batiment | 1 | 1 | 100.0 | 100.0 | 100.0 | 1 |
| All | 6 | 5 | 83.33 | 83.33 | 83.33 | 6 |
...@@ -103,7 +103,7 @@ def test_evaluate(capsys, training_res, val_res, test_res, evaluate_config): ...@@ -103,7 +103,7 @@ def test_evaluate(capsys, training_res, val_res, test_res, evaluate_config):
# Use the tmp_path as base folder # Use the tmp_path as base folder
evaluate_config["training"]["output_folder"] = FIXTURES / "evaluate" evaluate_config["training"]["output_folder"] = FIXTURES / "evaluate"
evaluate.run(evaluate_config) evaluate.run(evaluate_config, evaluate.NERVAL_THRESHOLD)
# Check that the evaluation results are correct # Check that the evaluation results are correct
for split_name, expected_res in zip( for split_name, expected_res in zip(
...@@ -129,7 +129,7 @@ def test_evaluate(capsys, training_res, val_res, test_res, evaluate_config): ...@@ -129,7 +129,7 @@ def test_evaluate(capsys, training_res, val_res, test_res, evaluate_config):
# Check the metrics Markdown table # Check the metrics Markdown table
captured_std = capsys.readouterr() captured_std = capsys.readouterr()
last_printed_lines = captured_std.out.split("\n")[-6:] last_printed_lines = captured_std.out.split("\n")[10:]
assert ( assert (
"\n".join(last_printed_lines) "\n".join(last_printed_lines)
== Path(FIXTURES / "evaluate" / "metrics_table.md").read_text() == Path(FIXTURES / "evaluate" / "metrics_table.md").read_text()
......