From 282c315164d8147e69f386e88c3cd516c3858bc3 Mon Sep 17 00:00:00 2001 From: Charlotte Mauvezin <charlotte.mauvezin@irht.cnrs.fr> Date: Fri, 24 Dec 2021 13:38:19 +0100 Subject: [PATCH] Correction Chistopher --- README.md | 2 +- demo/{cor.csv => mapping_file.csv} | 1 - nerval/evaluate.py | 14 +++-- requirements.txt | 1 - tests/test_folder/test_demo_annot.bio | 82 +++++++++++++++++++++++++ tests/test_folder/test_demo_predict.bio | 81 ++++++++++++++++++++++++ tests/test_folder/test_toy_annot.bio | 39 ++++++++++++ tests/test_folder/test_toy_predict.bio | 39 ++++++++++++ tests/test_mapping_file.csv | 2 + tests/test_run.py | 7 +++ 10 files changed, 259 insertions(+), 9 deletions(-) rename demo/{cor.csv => mapping_file.csv} (83%) create mode 100644 tests/test_folder/test_demo_annot.bio create mode 100644 tests/test_folder/test_demo_predict.bio create mode 100644 tests/test_folder/test_toy_annot.bio create mode 100644 tests/test_folder/test_toy_predict.bio create mode 100644 tests/test_mapping_file.csv diff --git a/README.md b/README.md index f20bca8..2a3848e 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ $ nerval -a demo/toy_test_annot.bio -p demo/toy_test_predict.bio You can also indicate a folder and a csv file to have multiple evaluation at once. ``` -$ nerval -a demo/annot/ -p demo/pred/ -c demo/cor.csv +$ nerval -c demo/mapping_file.csv -f demo/bio_folder ``` ## Metric diff --git a/demo/cor.csv b/demo/mapping_file.csv similarity index 83% rename from demo/cor.csv rename to demo/mapping_file.csv index a0f41c6..5a2ce92 100644 --- a/demo/cor.csv +++ b/demo/mapping_file.csv @@ -1,3 +1,2 @@ -annot,predict demo_annot.bio,demo_predict.bio toy_test_annot.bio,toy_test_predict.bio \ No newline at end of file diff --git a/nerval/evaluate.py b/nerval/evaluate.py index ebdba10..7fa887f 100644 --- a/nerval/evaluate.py +++ b/nerval/evaluate.py @@ -5,11 +5,11 @@ import glob import logging import os import re +from csv import reader from pathlib import Path import editdistance import edlib -import pandas as pd import termtables as tt NOT_ENTITY_TAG = "O" @@ -542,13 +542,15 @@ def run(annotation: str, prediction: str, threshold: int) -> dict: def run_multiple(file_csv, folder, threshold): """Run the program for multiple files (correlation indicated in the csv file)""" - # Read the csv in a dataframe - df_cor = pd.read_csv(file_csv) + # Read the csv in a list + with open(file_csv, "r") as read_obj: + csv_reader = reader(read_obj) + list_cor = list(csv_reader) if os.path.isdir(folder): list_bio_file = glob.glob(str(folder) + "/**/*.bio", recursive=True) - for index, row in df_cor.iterrows(): + for row in list_cor: annot = None predict = None @@ -560,11 +562,11 @@ def run_multiple(file_csv, folder, threshold): predict = file if annot and predict: + print(os.path.basename(predict)) run(annot, predict, threshold) print() else: - raise f"No file found for row {index}" - + raise f"No file found for files {annot}, {predict}" else: raise "This is no folder" diff --git a/requirements.txt b/requirements.txt index 7ae8377..d6af2d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ editdistance==0.5.3 edlib==1.3.8.post2 -pandas==1.3.4 termtables==0.2.3 diff --git a/tests/test_folder/test_demo_annot.bio b/tests/test_folder/test_demo_annot.bio new file mode 100644 index 0000000..cf16200 --- /dev/null +++ b/tests/test_folder/test_demo_annot.bio @@ -0,0 +1,82 @@ +Césaire B-PER +Alphonse I-PER +Garon I-PER +marraine O +Adeline B-PER +Dionne I-PER +, O +soussignés O +Lecture O +faite O +Adéline O +Dionne O +Arsène O +Côté O +Arpin O +R O +Le O +onze B-DAT +aout I-DAT +mil I-DAT +neuf I-DAT +cent I-DAT +un I-DAT +nous O +prêtre O +soussigné O +avons O +baptisé O +Marie B-PER +Luce I-PER +Louise I-PER +, O +née O +la B-DAT +veille I-DAT +, O +fille O +légitime O +de O +Carmel B-PER +Côté I-PER +, O +cordonnier B-OCC +, O +pré O +- O +sent O +, O +déclarant O +ne O +savoir O +signer O +, O +et O +de O +Eugé B-PER +nie I-PER +Fréchette I-PER +, O +de O +cette B-LOC +paroisse I-LOC +. O +Parrain O +Napoléon B-PER +Fréchette I-PER +, O +marraine O +Adeline B-PER +Tremblay I-PER +, O +soussignés O +, O +de O +Ste B-LOC +Luce I-LOC +, O +Lec O +- O +ture O +faite O +. O diff --git a/tests/test_folder/test_demo_predict.bio b/tests/test_folder/test_demo_predict.bio new file mode 100644 index 0000000..7e01c2d --- /dev/null +++ b/tests/test_folder/test_demo_predict.bio @@ -0,0 +1,81 @@ +Césaire B-PER +Alphonse O +Garon B-PER +marraine O +Adeline B-PER +Dionne I-PER +, O +soussignés O +Lecture O +faite O +Adéline O +Dionne O +Arsène O +Côté O +Arpin O +R O +Le O +onze B-DAT +aout I-DAT +mil I-DAT +neuf I-DAT +cent I-DAT +un O +nous O +pretre O +soussigné O +avons O +baptisé O +Marie B-PER +Luce I-PER +Louise I-PER +, O +née O +la B-DAT +veille I-DAT +, O +fille O +légitime O +de O +Carmel B-PER +Côté I-PER +, O +cordonnier B-OCC +, O +pré O +- O +sent O +, O +déclarant O +ne O +savoir O +signer O +, O +et O +de O +Eugé B-PER +nie I-PER +Fréchette I-PER +, O +de O +cette B-LOC +paroisse I-LOC +. O +Parrain O +Napoléon B-PER +Fréchette I-PER +, O +marraine O +Adéline B-PER +Tremblay I-PER +, O +sousignés O +, O +de O +St B-LOC +. I-LOC +Luce O +, O +Lec O +ture O +faite O diff --git a/tests/test_folder/test_toy_annot.bio b/tests/test_folder/test_toy_annot.bio new file mode 100644 index 0000000..5a941ee --- /dev/null +++ b/tests/test_folder/test_toy_annot.bio @@ -0,0 +1,39 @@ +John B-PER +Ronald I-PER +Reuel I-PER +Tolkien I-PER +was O +born O +on O +three B-DAT +January I-DAT +eighteen I-DAT +ninety I-DAT +- I-DAT +two I-DAT +in O +Bloemfontein B-LOC +in O +the O +Orange B-LOC +Free I-LOC +State I-LOC +, O +to O +Arthur B-PER +Reuel I-PER +Tolkien I-PER +, O +an O +English O +bank B-OCC +manager I-OCC +, O +and O +his O +wife O +Mabel B-PER +, O +née O +Suffield B-PER +. O diff --git a/tests/test_folder/test_toy_predict.bio b/tests/test_folder/test_toy_predict.bio new file mode 100644 index 0000000..5a941ee --- /dev/null +++ b/tests/test_folder/test_toy_predict.bio @@ -0,0 +1,39 @@ +John B-PER +Ronald I-PER +Reuel I-PER +Tolkien I-PER +was O +born O +on O +three B-DAT +January I-DAT +eighteen I-DAT +ninety I-DAT +- I-DAT +two I-DAT +in O +Bloemfontein B-LOC +in O +the O +Orange B-LOC +Free I-LOC +State I-LOC +, O +to O +Arthur B-PER +Reuel I-PER +Tolkien I-PER +, O +an O +English O +bank B-OCC +manager I-OCC +, O +and O +his O +wife O +Mabel B-PER +, O +née O +Suffield B-PER +. O diff --git a/tests/test_mapping_file.csv b/tests/test_mapping_file.csv new file mode 100644 index 0000000..5a2ce92 --- /dev/null +++ b/tests/test_mapping_file.csv @@ -0,0 +1,2 @@ +demo_annot.bio,demo_predict.bio +toy_test_annot.bio,toy_test_predict.bio \ No newline at end of file diff --git a/tests/test_run.py b/tests/test_run.py index d0ea6d5..cedbd0d 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -9,6 +9,8 @@ FAKE_ANNOT_BIO = "tests/test_annot.bio" FAKE_PREDICT_BIO = "tests/test_predict.bio" EMPTY_BIO = "tests/test_empty.bio" FAKE_BIO_NESTED = "tests/test_nested.bio" +BIO_FOLDER = "test_folder" +CSV_FILE = "test_mapping_file.csv" expected_scores_nested = { "All": { @@ -81,3 +83,8 @@ def test_run_empty_bio(): def test_run_empty_entry(): with pytest.raises(TypeError): evaluate.run(None, None, THRESHOLD) + + +def test_run_multiple(): + with pytest.raises(Exception): + evaluate.run_multiple(CSV_FILE, BIO_FOLDER, THRESHOLD) -- GitLab