Manon Blanco · Manon Blanco
--- a/dan/ocr/evaluate.py

+ 41

− 25
+++ b/dan/ocr/evaluate.py

+ 41

− 25
 @@ -6,12 +6,15 @@ Evaluate a trained DAN model.
 import logging
 import random
 from argparse import ArgumentTypeError
+from pathlib import Path
+from typing import Dict, List

 import numpy as np
 import torch
 import torch.multiprocessing as mp

 from dan.bio import convert
+from dan.ocr.manager.metrics import Inference
 from dan.ocr.manager.training import Manager
 from dan.ocr.utils import add_metrics_table_row, create_metrics_table, update_config
 from dan.utils import parse_tokens, read_json
 @@ -60,6 +63,37 @@ def add_evaluate_parser(subcommands) -> None:
    parser.set_defaults(func=run)


+def eval_nerval(
+    all_inferences: Dict[str, List[Inference]],
+    tokens: Path,
+    threshold: float,
+):
+    print("\n#### Nerval evaluation")
+
+    def inferences_to_parsed_bio(attr: str):
+        bio_values = []
+        for inference in inferences:
+            value = getattr(inference, attr)
+            bio_value = convert(value, ner_tokens=tokens)
+            bio_values.extend(bio_value.split("\n"))
+
+        # Parse this BIO format
+        return parse_bio(bio_values)
+
+    # Evaluate with Nerval
+    tokens = parse_tokens(tokens)
+    for split_name, inferences in all_inferences.items():
+        ground_truths = inferences_to_parsed_bio("ground_truth")
+        predictions = inferences_to_parsed_bio("prediction")
+
+        if not (ground_truths and predictions):
+            continue
+
+        scores = evaluate(ground_truths, predictions, threshold)
+        print(f"\n##### {split_name}\n")
+        print_results(scores)
+
+
 def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
 @@ -105,33 +139,15 @@ def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
            add_metrics_table_row(metrics_table, set_name, metrics)
            all_inferences[set_name] = inferences

+    print("\n#### DAN evaluation\n")
    print(metrics_table)

-    if "ner" not in metric_names:
-        return
-
-    def inferences_to_parsed_bio(attr: str):
-        bio_values = []
-        for inference in inferences:
-            value = getattr(inference, attr)
-            bio_value = convert(value, ner_tokens=tokens)
-            bio_values.extend(bio_value.split("\n"))
-
-        # Parse this BIO format
-        return parse_bio(bio_values)
-
-    # Evaluate with Nerval
-    tokens = parse_tokens(config["dataset"]["tokens"])
-    for set_name, inferences in all_inferences.items():
-        ground_truths = inferences_to_parsed_bio("ground_truth")
-        predictions = inferences_to_parsed_bio("prediction")
-
-        if not (ground_truths and predictions):
-            continue
-
-        scores = evaluate(ground_truths, predictions, nerval_threshold)
-        print(f"\n#### {set_name}\n")
-        print_results(scores)
+    if "ner" in metric_names:
+        eval_nerval(
+            all_inferences,
+            tokens=config["dataset"]["tokens"],
+            threshold=nerval_threshold,
+        )


 def run(config: dict, nerval_threshold: float):