From 378d61773c579569ecff7d3ae1d404740c0ea310 Mon Sep 17 00:00:00 2001 From: kermorvant <kermorvant@gmail.com> Date: Sun, 23 May 2021 09:46:08 +0200 Subject: [PATCH] add more info on output and reformat in md --- .isort.cfg | 2 +- nerval/evaluate.py | 33 ++++++++++++++++++++++----------- requirements.txt | 1 + tests/test_compute_scores.py | 31 ++++++++++++++++++++++++++----- tests/test_run.py | 32 +++++++++++++++++++++++++++----- 5 files changed, 77 insertions(+), 22 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index 0b0cc35..5cf2203 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -7,4 +7,4 @@ use_parentheses = True line_length = 120 default_section=FIRSTPARTY -known_third_party = editdistance,edlib,pytest,setuptools +known_third_party = editdistance,edlib,pytest,setuptools,termtables diff --git a/nerval/evaluate.py b/nerval/evaluate.py index f0cea43..e48d398 100644 --- a/nerval/evaluate.py +++ b/nerval/evaluate.py @@ -10,6 +10,7 @@ import re import editdistance import edlib +import termtables as tt THRESHOLD = 0.30 NOT_ENTITY_TAG = "O" @@ -22,7 +23,7 @@ def parse_bio(path: str) -> dict: Output format : { "words": str, "tags": list; "entity_count" : { tag : int} } """ - assert os.path.exists(path) + assert os.path.exists(path), f"Error: Input file {path} does not exist" words = [] tags = [] @@ -269,9 +270,12 @@ def compute_scores( else 2 * (prec * rec) / (prec + rec) ) + scores[tag]["predicted"] = nb_predict + scores[tag]["matched"] = nb_match scores[tag]["P"] = prec scores[tag]["R"] = rec scores[tag]["F1"] = f1 + scores[tag]["Support"] = nb_annot return scores @@ -281,18 +285,25 @@ def print_results(scores: dict): None values are kept to indicate the absence of a certain tag in either annotation or prediction. """ - logging.info("-- Results --") - + header = ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"] + results = [] for tag in sorted(scores.keys())[::-1]: + prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 3) + rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 3) + f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 3) - prec = None if scores[tag]["P"] is None else round(scores[tag]["P"], 2) - rec = None if scores[tag]["R"] is None else round(scores[tag]["R"], 2) - f1 = None if scores[tag]["F1"] is None else round(scores[tag]["F1"], 2) - - logging.info(f"{tag} :") - logging.info(f"P = {prec}") - logging.info(f"R = {rec}") - logging.info(f"F1 = {f1}") + results.append( + [ + tag, + scores[tag]["predicted"], + scores[tag]["matched"], + prec, + rec, + f1, + scores[tag]["Support"], + ] + ) + tt.print(results, header, style=tt.styles.markdown) def run(annotation: str, prediction: str) -> dict: diff --git a/requirements.txt b/requirements.txt index 65d95a6..d6af2d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ editdistance==0.5.3 edlib==1.3.8.post2 +termtables==0.2.3 diff --git a/tests/test_compute_scores.py b/tests/test_compute_scores.py index a6c86a1..02e26cf 100644 --- a/tests/test_compute_scores.py +++ b/tests/test_compute_scores.py @@ -8,11 +8,32 @@ fake_predict_entity_count = {"All": 3, "DAT": 1, "***": 1, "PER": 1} fake_matches = {"All": 1, "PER": 1, "LOC": 0, "DAT": 0} expected_scores = { - "***": {"P": 0.0, "R": None, "F1": None}, - "DAT": {"P": 0.0, "R": 0.0, "F1": 0}, - "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333}, - "PER": {"P": 1.0, "R": 1.0, "F1": 1.0}, - "LOC": {"P": None, "R": 0.0, "F1": None}, + "***": { + "P": 0.0, + "R": None, + "F1": None, + "predicted": 1, + "matched": 0, + "Support": None, + }, + "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1}, + "All": { + "P": 0.3333333333333333, + "R": 0.3333333333333333, + "F1": 0.3333333333333333, + "predicted": 3, + "matched": 1, + "Support": 3, + }, + "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1}, + "LOC": { + "P": None, + "R": 0.0, + "F1": None, + "predicted": None, + "matched": 0, + "Support": 1, + }, } diff --git a/tests/test_run.py b/tests/test_run.py index fd426aa..8cf9c84 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -8,11 +8,32 @@ FAKE_PREDICT_BIO = "tests/test_predict.bio" EMPTY_BIO = "tests/test_empty.bio" expected_scores = { - "***": {"P": 0.0, "R": None, "F1": None}, - "DAT": {"P": 0.0, "R": 0.0, "F1": 0}, - "All": {"P": 0.3333333333333333, "R": 0.3333333333333333, "F1": 0.3333333333333333}, - "PER": {"P": 1.0, "R": 1.0, "F1": 1.0}, - "LOC": {"P": None, "R": 0.0, "F1": None}, + "***": { + "P": 0.0, + "R": None, + "F1": None, + "predicted": 1, + "matched": 0, + "Support": None, + }, + "DAT": {"P": 0.0, "R": 0.0, "F1": 0, "predicted": 1, "matched": 0, "Support": 1}, + "All": { + "P": 0.3333333333333333, + "R": 0.3333333333333333, + "F1": 0.3333333333333333, + "predicted": 3, + "matched": 1, + "Support": 3, + }, + "PER": {"P": 1.0, "R": 1.0, "F1": 1.0, "predicted": 1, "matched": 1, "Support": 1}, + "LOC": { + "P": None, + "R": 0.0, + "F1": None, + "predicted": None, + "matched": 0, + "Support": 1, + }, } @@ -20,6 +41,7 @@ expected_scores = { "test_input, expected", [((FAKE_ANNOT_BIO, FAKE_PREDICT_BIO), expected_scores)] ) def test_run(test_input, expected): + # print(evaluate.run(*test_input)) assert evaluate.run(*test_input) == expected -- GitLab