diff --git a/.isort.cfg b/.isort.cfg index 5cf2203f71ccb398688527867bd52171af457f75..4fec7efb83e5dd9baca683fbc29282a494f4ec93 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -7,4 +7,4 @@ use_parentheses = True line_length = 120 default_section=FIRSTPARTY -known_third_party = editdistance,edlib,pytest,setuptools,termtables +known_third_party = editdistance,edlib,pytest,setuptools,prettytable diff --git a/README.md b/README.md index 31bb747ec3aed743ec38937032af3f47836b568b..be8e0ee694b4fc3ae814ac3a03ef6d3ce3f4acf0 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Counting the spaces, 7 characters differ over 24 characters in the reference ent ### Demo ``` -$ nerval -a demo/bio_folder/demo_annot.bio -p demo/bio_folder/demo_predict.bio +$ nerval -a demo/demo_annot.bio -p demo/demo_predict.bio ``` We also provide two annotation and prediction toy files, which are identical for now and produce perfect scores. Feel free to play with the the text and entity tags in the prediction file to see the impact on the score. @@ -62,13 +62,13 @@ $ nerval -a demo/toy_test_annot.bio -p demo/toy_test_predict.bio You can also indicate a folder and a csv file to have multiple evaluation at once. ``` -$ nerval -c demo/mapping_file.csv -f demo/bio_folder +$ nerval -c demo/mapping_file.csv -f demo ``` And with the verbose option that's triggered by -v ``` -$ nerval -a demo/bio_folder/demo_annot.bio -p demo/bio_folder/demo_predict.bio -v +$ nerval -a demo/demo_annot.bio -p demo/demo_predict.bio -v ``` ## Metric diff --git a/nerval/evaluate.py b/nerval/evaluate.py index 5cbe3abb11ef955955cbaa5a5142b3191373deb6..a2af0bd904f25b2e7d328cb1fb84c8f171a682be 100644 --- a/nerval/evaluate.py +++ b/nerval/evaluate.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import glob import logging import os from csv import reader @@ -8,7 +7,7 @@ from pathlib import Path import editdistance import edlib -import termtables as tt +from prettytable import MARKDOWN, PrettyTable from nerval.parse import ( BEGINNING_POS, @@ -324,7 +323,7 @@ def run_multiple(file_csv, folder, threshold, verbose): list_cor = list(csv_reader) if os.path.isdir(folder): - list_bio_file = glob.glob(str(folder) + "/**/*.bio", recursive=True) + list_bio_file = list(folder.rglob("*.bio")) count = 0 precision = 0 @@ -351,17 +350,19 @@ def run_multiple(file_csv, folder, threshold, verbose): raise Exception(f"No file found for files {annot}, {predict}") if count: logger.info("Average score on all corpus") - tt.print( + table = PrettyTable() + table.field_names = ["Precision", "Recall", "F1"] + table.set_style(MARKDOWN) + table.add_rows( [ [ round(precision / count, 3), round(recall / count, 3), round(f1 / count, 3), ] - ], - ["Precision", "Recall", "F1"], - style=tt.styles.markdown, + ] ) + print(table) else: raise Exception("No file were counted") else: diff --git a/nerval/utils.py b/nerval/utils.py index 595299e3712b856732892d09522f458c940ed844..21c6c20a9af7893c39353709f6645dbdfafefff6 100644 --- a/nerval/utils.py +++ b/nerval/utils.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -import termtables as tt +from prettytable import MARKDOWN, PrettyTable def print_results(scores: dict): @@ -25,7 +25,12 @@ def print_results(scores: dict): scores[tag]["Support"], ] ) - tt.print(results, header, style=tt.styles.markdown) + + table = PrettyTable() + table.field_names = header + table.set_style(MARKDOWN) + table.add_rows(results) + print(table) def print_result_compact(scores: dict): @@ -41,4 +46,9 @@ def print_result_compact(scores: dict): scores["All"]["Support"], ] ] - tt.print(result, header, style=tt.styles.markdown) + + table = PrettyTable() + table.field_names = header + table.set_style(MARKDOWN) + table.add_rows(result) + print(table) diff --git a/requirements.txt b/requirements.txt index 56ec7b8865a6acaae6aeb608b6801ab06baa7af3..ec0fabeefc9613bf3a18807cedb95a4f5f2fb6a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ editdistance==0.6.2 edlib==1.3.9 -termtables==0.2.4 +prettytable==3.9.0