From 378d61773c579569ecff7d3ae1d404740c0ea310 Mon Sep 17 00:00:00 2001
From: kermorvant <kermorvant@gmail.com>
Date: Sun, 23 May 2021 09:46:08 +0200
Subject: [PATCH] add more info on output and reformat in md

---
 .isort.cfg                   |  2 +-
 nerval/evaluate.py           | 33 ++++++++++++++++++++++-----------
 requirements.txt             |  1 +
 tests/test_compute_scores.py | 31 ++++++++++++++++++++++++++-----
 tests/test_run.py            | 32 +++++++++++++++++++++++++++-----
 5 files changed, 77 insertions(+), 22 deletions(-)

diff --git a/.isort.cfg b/.isort.cfg
index 0b0cc35..5cf2203 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -7,4 +7,4 @@ use_parentheses = True
 line_length = 120
 
 default_section=FIRSTPARTY
-known_third_party = editdistance,edlib,pytest,setuptools
+known_third_party = editdistance,edlib,pytest,setuptools,termtables
diff --git a/nerval/evaluate.py b/nerval/evaluate.py
index f0cea43..e48d398 100644
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
@@ -10,6 +10,7 @@ import re
 
 import editdistance
 import edlib
+import termtables as tt
 
 THRESHOLD = 0.30
 NOT_ENTITY_TAG = "O"
@@ -22,7 +23,7 @@ def parse_bio(path: str) -> dict:
     Output format : { "words": str, "tags": list; "entity_count" : { tag : int} }
     """
 
-    assert os.path.exists(path)
+    assert os.path.exists(path), f"Error: Input file {path} does not exist"
 
     words = []
     tags = []
@@ -269,9 +270,12 @@ def compute_scores(
             else 2 * (prec * rec) / (prec + rec)
         )
 
+        scores[tag]["predicted"] = nb_predict
+        scores[tag]["matched"] = nb_match
         scores[tag]["P"] = prec
         scores[tag]["R"] = rec
         scores[tag]["F1"] = f1
+        scores[tag]["Support"] = nb_annot
 
     return scores
 
@@ -281,18 +285,25 @@ def print_results(scores: dict):
 
     None values are kept to indicate the absence of a certain tag in either annotation or prediction.
     """
-    logging.info("-- Results --")
-
+    header = ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"]
+    results = []
     for tag in sorted(scores.keys())[::-1]:
+        prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 3)
+        rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 3)
+        f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 3)
 
-        prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 2)
-        rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 2)
-        f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 2)
-
-        logging.info(f"{tag} :")
-        logging.info(f"P = {prec}")
-        logging.info(f"R = {rec}")
-        logging.info(f"F1 = {f1}")
+        results.append(
+            [
+                tag,
+                scores[tag]["predicted"],
+                scores[tag]["matched"],
+                prec,
+                rec,
+                f1,
+                scores[tag]["Support"],
+            ]
+        )
+    tt.print(results, header, style=tt.styles.markdown)
 
 
 def run(annotation: str, prediction: str) -> dict:
diff --git a/requirements.txt b/requirements.txt
index 65d95a6..d6af2d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 editdistance==0.5.3
 edlib==1.3.8.post2
+termtables==0.2.3
diff --git a/tests/test_compute_scores.py b/tests/test_compute_scores.py
index a6c86a1..02e26cf 100644
--- a/tests/test_compute_scores.py
+++ b/tests/test_compute_scores.py
@@ -8,11 +8,32 @@ fake_predict_entity_count = {"All": 3, "DAT": 1, "***": 1, "PER": 1}
 fake_matches = {"All": 1, "PER": 1, "LOC": 0, "DAT": 0}
 
 expected_scores = {
-    "***": {"P": 0.0, "R": None, "F1": None},
-    "DAT": {"P": 0.0, "R": 0.0, "F1": 0},
-    "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333},
-    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0},
-    "LOC": {"P": None, "R": 0.0, "F1": None},
+    "***": {
+        "P": 0.0,
+        "R": None,
+        "F1": None,
+        "predicted": 1,
+        "matched": 0,
+        "Support": None,
+    },
+    "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1},
+    "All": {
+        "P": 0.3333333333333333,
+        "R": 0.3333333333333333,
+        "F1": 0.3333333333333333,
+        "predicted": 3,
+        "matched": 1,
+        "Support": 3,
+    },
+    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1},
+    "LOC": {
+        "P": None,
+        "R": 0.0,
+        "F1": None,
+        "predicted": None,
+        "matched": 0,
+        "Support": 1,
+    },
 }
 
 
diff --git a/tests/test_run.py b/tests/test_run.py
index fd426aa..8cf9c84 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -8,11 +8,32 @@ FAKE_PREDICT_BIO = "tests/test_predict.bio"
 EMPTY_BIO = "tests/test_empty.bio"
 
 expected_scores = {
-    "***": {"P": 0.0, "R": None, "F1": None},
-    "DAT": {"P": 0.0, "R": 0.0, "F1": 0},
-    "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333},
-    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0},
-    "LOC": {"P": None, "R": 0.0, "F1": None},
+    "***": {
+        "P": 0.0,
+        "R": None,
+        "F1": None,
+        "predicted": 1,
+        "matched": 0,
+        "Support": None,
+    },
+    "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1},
+    "All": {
+        "P": 0.3333333333333333,
+        "R": 0.3333333333333333,
+        "F1": 0.3333333333333333,
+        "predicted": 3,
+        "matched": 1,
+        "Support": 3,
+    },
+    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1},
+    "LOC": {
+        "P": None,
+        "R": 0.0,
+        "F1": None,
+        "predicted": None,
+        "matched": 0,
+        "Support": 1,
+    },
 }
 
 
@@ -20,6 +41,7 @@ expected_scores = {
     "test_input, expected", [((FAKE_ANNOT_BIO, FAKE_PREDICT_BIO), expected_scores)]
 )
 def test_run(test_input, expected):
+    # print(evaluate.run(*test_input))
     assert evaluate.run(*test_input) == expected
 
 
-- 
GitLab