Skip to content
Snippets Groups Projects

Evaluate predictions with nerval

Merged Manon Blanco requested to merge nerval-evaluate into main
4 files
+ 62
32
Compare changes
  • Side-by-side
  • Inline
Files
4
+ 41
25
@@ -6,12 +6,15 @@ Evaluate a trained DAN model.
import logging
import random
from argparse import ArgumentTypeError
from pathlib import Path
from typing import Dict, List
import numpy as np
import torch
import torch.multiprocessing as mp
from dan.bio import convert
from dan.ocr.manager.metrics import Inference
from dan.ocr.manager.training import Manager
from dan.ocr.utils import add_metrics_table_row, create_metrics_table, update_config
from dan.utils import parse_tokens, read_json
@@ -60,6 +63,37 @@ def add_evaluate_parser(subcommands) -> None:
parser.set_defaults(func=run)
def eval_nerval(
all_inferences: Dict[str, List[Inference]],
tokens: Path,
threshold: float,
):
print("\n#### Nerval evaluation")
def inferences_to_parsed_bio(attr: str):
bio_values = []
for inference in inferences:
value = getattr(inference, attr)
bio_value = convert(value, ner_tokens=tokens)
bio_values.extend(bio_value.split("\n"))
# Parse this BIO format
return parse_bio(bio_values)
# Evaluate with Nerval
tokens = parse_tokens(tokens)
for split_name, inferences in all_inferences.items():
ground_truths = inferences_to_parsed_bio("ground_truth")
predictions = inferences_to_parsed_bio("prediction")
if not (ground_truths and predictions):
continue
scores = evaluate(ground_truths, predictions, threshold)
print(f"\n##### {split_name}\n")
print_results(scores)
def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
torch.manual_seed(0)
torch.cuda.manual_seed(0)
@@ -105,33 +139,15 @@ def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
add_metrics_table_row(metrics_table, set_name, metrics)
all_inferences[set_name] = inferences
print("\n#### DAN evaluation\n")
print(metrics_table)
if "ner" not in metric_names:
return
def inferences_to_parsed_bio(attr: str):
bio_values = []
for inference in inferences:
value = getattr(inference, attr)
bio_value = convert(value, ner_tokens=tokens)
bio_values.extend(bio_value.split("\n"))
# Parse this BIO format
return parse_bio(bio_values)
# Evaluate with Nerval
tokens = parse_tokens(config["dataset"]["tokens"])
for set_name, inferences in all_inferences.items():
ground_truths = inferences_to_parsed_bio("ground_truth")
predictions = inferences_to_parsed_bio("prediction")
if not (ground_truths and predictions):
continue
scores = evaluate(ground_truths, predictions, nerval_threshold)
print(f"\n#### {set_name}\n")
print_results(scores)
if "ner" in metric_names:
eval_nerval(
all_inferences,
tokens=config["dataset"]["tokens"],
threshold=nerval_threshold,
)
def run(config: dict, nerval_threshold: float):
Loading