diff --git a/nerval/evaluate.py b/nerval/evaluate.py index 6e2fef9d3631ce2545243188b68fb95224eb0a27..60c9a9906197545dcdf16a2c5eccb19500830ca8 100644 --- a/nerval/evaluate.py +++ b/nerval/evaluate.py @@ -485,7 +485,24 @@ def print_results(scores: dict): tt.print(results, header, style=tt.styles.markdown) -def run(annotation: str, prediction: str, threshold: int) -> dict: +def print_result_compact(scores: dict): + result = [] + header = ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"] + result.append( + [ + "ALl", + scores["All"]["predicted"], + scores["All"]["matched"], + round(scores["All"]["P"], 3), + round(scores["All"]["R"], 3), + round(scores["All"]["F1"], 3), + scores["All"]["Support"], + ] + ) + tt.print(result, header, style=tt.styles.markdown) + + +def run(annotation: str, prediction: str, threshold: int, verbose: bool) -> dict: """Compute recall and precision for each entity type found in annotation and/or prediction. Each measure is given at document level, global score is a micro-average across entity types. @@ -528,7 +545,10 @@ def run(annotation: str, prediction: str, threshold: int) -> dict: scores = compute_scores(annot["entity_count"], predict["entity_count"], matches) # Print results - print_results(scores) + if verbose: + print_results(scores) + else: + print_result_compact(scores) return scores @@ -564,9 +584,16 @@ def main(): default=THRESHOLD, type=threshold_float_type, ) + parser.add_argument( + "-v", + "--verbose", + help="Print only the recap if False", + action="store_false", + ) + args = parser.parse_args() - run(args.annot, args.predict, args.threshold) + run(args.annot, args.predict, args.threshold, args.verbose) if __name__ == "__main__":