add more info on output and reformat in md

378d6177 · kermorvant · 18e03763 · 378d6177 · 378d6177 · 378d6177
Commit 378d6177 authored 3 years ago by kermorvant
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -7,4 +7,4 @@ use_parentheses = True
 line_length = 120
 default_section=FIRSTPARTY
-known_third_party = editdistance,edlib,pytest,setuptools
+known_third_party = editdistance,edlib,pytest,setuptools,termtables
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
@@ -10,6 +10,7 @@ import re
 import editdistance
 import edlib
+import termtables as tt
 THRESHOLD = 0.30
 NOT_ENTITY_TAG = "O"
@@ -22,7 +23,7 @@ def parse_bio(path: str) -> dict:
    Output format : { "words": str, "tags": list; "entity_count" : { tag : int} }
    """
-    assert os.path.exists(path)
+    assert os.path.exists(path), f"Error: Input file {path} does not exist"
    words = []
    tags = []
@@ -269,9 +270,12 @@ def compute_scores(
            else 2 * (prec * rec) / (prec + rec)
        )
+        scores[tag]["predicted"] = nb_predict
+        scores[tag]["matched"] = nb_match
        scores[tag]["P"] = prec
        scores[tag]["R"] = rec
        scores[tag]["F1"] = f1
+        scores[tag]["Support"] = nb_annot
    return scores
@@ -281,18 +285,25 @@ def print_results(scores: dict):
    None values are kept to indicate the absence of a certain tag in either annotation or prediction.
    """
-    logging.info("-- Results --")
+    header = ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"]
+    results = []
    for tag in sorted(scores.keys())[::-1]:
+        prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 3)
+        rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 3)
+        f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 3)
-        prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 2)
+        results.append(
-        rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 2)
+            [
-        f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 2)
+                tag,
+                scores[tag]["predicted"],
-        logging.info(f"{tag} :")
+                scores[tag]["matched"],
-        logging.info(f"P = {prec}")
+                prec,
-        logging.info(f"R = {rec}")
+                rec,
-        logging.info(f"F1 = {f1}")
+                f1,
+                scores[tag]["Support"],
+            ]
+        )
+    tt.print(results, header, style=tt.styles.markdown)
 def run(annotation: str, prediction: str) -> dict:

--- a/requirements.txt
+++ b/requirements.txt
 editdistance==0.5.3
 edlib==1.3.8.post2
+termtables==0.2.3
--- a/tests/test_compute_scores.py
+++ b/tests/test_compute_scores.py
@@ -8,11 +8,32 @@ fake_predict_entity_count = {"All": 3, "DAT": 1, "***": 1, "PER": 1}
 fake_matches = {"All": 1, "PER": 1, "LOC": 0, "DAT": 0}
 expected_scores = {
-    "***": {"P": 0.0, "R": None, "F1": None},
+    "***": {
-    "DAT": {"P": 0.0, "R": 0.0, "F1": 0},
+        "P": 0.0,
-    "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333},
+        "R": None,
-    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0},
+        "F1": None,
-    "LOC": {"P": None, "R": 0.0, "F1": None},
+        "predicted": 1,
+        "matched": 0,
+        "Support": None,
+    },
+    "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1},
+    "All": {
+        "P": 0.3333333333333333,
+        "R": 0.3333333333333333,
+        "F1": 0.3333333333333333,
+        "predicted": 3,
+        "matched": 1,
+        "Support": 3,
+    },
+    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1},
+    "LOC": {
+        "P": None,
+        "R": 0.0,
+        "F1": None,
+        "predicted": None,
+        "matched": 0,
+        "Support": 1,
+    },
 }

--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -8,11 +8,32 @@ FAKE_PREDICT_BIO = "tests/test_predict.bio"
 EMPTY_BIO = "tests/test_empty.bio"
 expected_scores = {
-    "***": {"P": 0.0, "R": None, "F1": None},
+    "***": {
-    "DAT": {"P": 0.0, "R": 0.0, "F1": 0},
+        "P": 0.0,
-    "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333},
+        "R": None,
-    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0},
+        "F1": None,
-    "LOC": {"P": None, "R": 0.0, "F1": None},
+        "predicted": 1,
+        "matched": 0,
+        "Support": None,
+    },
+    "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1},
+    "All": {
+        "P": 0.3333333333333333,
+        "R": 0.3333333333333333,
+        "F1": 0.3333333333333333,
+        "predicted": 3,
+        "matched": 1,
+        "Support": 3,
+    },
+    "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1},
+    "LOC": {
+        "P": None,
+        "R": 0.0,
+        "F1": None,
+        "predicted": None,
+        "matched": 0,
+        "Support": 1,
+    },
 }
@@ -20,6 +41,7 @@ expected_scores = {
    "test_input, expected", [((FAKE_ANNOT_BIO, FAKE_PREDICT_BIO), expected_scores)]
 )
 def test_run(test_input, expected):
+    # print(evaluate.run(*test_input))
    assert evaluate.run(*test_input) == expected