diff --git a/nerval/evaluate.py b/nerval/evaluate.py index 00d9e4343e1bc13098849aa208755edfab156a35..ab11dde34444b3ecbf8fcc3e7c5c694be8b63e76 100644 --- a/nerval/evaluate.py +++ b/nerval/evaluate.py @@ -492,7 +492,24 @@ def print_results(scores: dict): tt.print(results, header, style=tt.styles.markdown) -def run(annotation: str, prediction: str, threshold: int) -> dict: +def print_result_compact(scores: dict): + result = [] + header = ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"] + result.append( + [ + "ALl", + scores["All"]["predicted"], + scores["All"]["matched"], + round(scores["All"]["P"], 3), + round(scores["All"]["R"], 3), + round(scores["All"]["F1"], 3), + scores["All"]["Support"], + ] + ) + tt.print(result, header, style=tt.styles.markdown) + + +def run(annotation: str, prediction: str, threshold: int, verbose: bool) -> dict: """Compute recall and precision for each entity type found in annotation and/or prediction. Each measure is given at document level, global score is a micro-average across entity types. @@ -535,12 +552,15 @@ def run(annotation: str, prediction: str, threshold: int) -> dict: scores = compute_scores(annot["entity_count"], predict["entity_count"], matches) # Print results - print_results(scores) + if verbose: + print_results(scores) + else: + print_result_compact(scores) return scores -def run_multiple(file_csv, folder, threshold): +def run_multiple(file_csv, folder, threshold, verbose): """Run the program for multiple files (correlation indicated in the csv file)""" # Read the csv in a list with open(file_csv, "r") as read_obj: @@ -563,7 +583,7 @@ def run_multiple(file_csv, folder, threshold): if annot and predict: print(os.path.basename(predict)) - run(annot, predict, threshold) + run(annot, predict, threshold, verbose) print() else: raise f"No file found for files {annot}, {predict}" @@ -624,6 +644,12 @@ def main(): help="Folder containing the bio files referred to in the csv file", type=Path, ) + parser.add_argument( + "-v", + "--verbose", + help="Print only the recap if False", + action="store_false", + ) args = parser.parse_args() if args.multiple == 1 or args.multiple == 2: @@ -633,14 +659,14 @@ def main(): if not args.csv: raise argparse.ArgumentError(args.folder, "-c must be given if -m is 2") if args.folder and args.csv: - run_multiple(args.csv, args.folder, args.threshold) + run_multiple(args.csv, args.folder, args.threshold, args.verbose) if args.multiple == 1: if not args.annot: raise argparse.ArgumentError(args.folder, "-a must be given if -m is 1") if not args.predict: raise argparse.ArgumentError(args.folder, "-p must be given if -m is 1") if args.annot and args.predict: - run(args.annot, args.predict, args.threshold) + run(args.annot, args.predict, args.threshold, args.verbose) else: raise argparse.ArgumentTypeError("Value has to be 1 or 2") diff --git a/tests/test_run.py b/tests/test_run.py index cedbd0d169f8336763927606f744569ef940f1d3..4a6e9d598c758b3eb26e538f62407b5601747256 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -66,8 +66,8 @@ expected_scores = { @pytest.mark.parametrize( "test_input, expected", [ - ((FAKE_ANNOT_BIO, FAKE_PREDICT_BIO, THRESHOLD), expected_scores), - ((FAKE_BIO_NESTED, FAKE_BIO_NESTED, THRESHOLD), expected_scores_nested), + ((FAKE_ANNOT_BIO, FAKE_PREDICT_BIO, THRESHOLD, True), expected_scores), + ((FAKE_BIO_NESTED, FAKE_BIO_NESTED, THRESHOLD, True), expected_scores_nested), ], ) def test_run(test_input, expected):