From 282c315164d8147e69f386e88c3cd516c3858bc3 Mon Sep 17 00:00:00 2001
From: Charlotte Mauvezin <charlotte.mauvezin@irht.cnrs.fr>
Date: Fri, 24 Dec 2021 13:38:19 +0100
Subject: [PATCH] Correction Chistopher

---
 README.md                               |  2 +-
 demo/{cor.csv => mapping_file.csv}      |  1 -
 nerval/evaluate.py                      | 14 +++--
 requirements.txt                        |  1 -
 tests/test_folder/test_demo_annot.bio   | 82 +++++++++++++++++++++++++
 tests/test_folder/test_demo_predict.bio | 81 ++++++++++++++++++++++++
 tests/test_folder/test_toy_annot.bio    | 39 ++++++++++++
 tests/test_folder/test_toy_predict.bio  | 39 ++++++++++++
 tests/test_mapping_file.csv             |  2 +
 tests/test_run.py                       |  7 +++
 10 files changed, 259 insertions(+), 9 deletions(-)
 rename demo/{cor.csv => mapping_file.csv} (83%)
 create mode 100644 tests/test_folder/test_demo_annot.bio
 create mode 100644 tests/test_folder/test_demo_predict.bio
 create mode 100644 tests/test_folder/test_toy_annot.bio
 create mode 100644 tests/test_folder/test_toy_predict.bio
 create mode 100644 tests/test_mapping_file.csv

diff --git a/README.md b/README.md
index f20bca8..2a3848e 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ $ nerval -a demo/toy_test_annot.bio -p demo/toy_test_predict.bio
 You can also indicate a folder and a csv file to have multiple evaluation at once.
 
 ```
-$ nerval -a demo/annot/ -p demo/pred/ -c demo/cor.csv
+$ nerval -c demo/mapping_file.csv -f demo/bio_folder
 ```
 
 ## Metric
diff --git a/demo/cor.csv b/demo/mapping_file.csv
similarity index 83%
rename from demo/cor.csv
rename to demo/mapping_file.csv
index a0f41c6..5a2ce92 100644
--- a/demo/cor.csv
+++ b/demo/mapping_file.csv
@@ -1,3 +1,2 @@
-annot,predict
 demo_annot.bio,demo_predict.bio
 toy_test_annot.bio,toy_test_predict.bio
\ No newline at end of file
diff --git a/nerval/evaluate.py b/nerval/evaluate.py
index ebdba10..7fa887f 100644
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
@@ -5,11 +5,11 @@ import glob
 import logging
 import os
 import re
+from csv import reader
 from pathlib import Path
 
 import editdistance
 import edlib
-import pandas as pd
 import termtables as tt
 
 NOT_ENTITY_TAG = "O"
@@ -542,13 +542,15 @@ def run(annotation: str, prediction: str, threshold: int) -> dict:
 
 def run_multiple(file_csv, folder, threshold):
     """Run the program for multiple files (correlation indicated in the csv file)"""
-    # Read the csv in a dataframe
-    df_cor = pd.read_csv(file_csv)
+    # Read the csv in a list
+    with open(file_csv, "r") as read_obj:
+        csv_reader = reader(read_obj)
+        list_cor = list(csv_reader)
 
     if os.path.isdir(folder):
         list_bio_file = glob.glob(str(folder) + "/**/*.bio", recursive=True)
 
-        for index, row in df_cor.iterrows():
+        for row in list_cor:
             annot = None
             predict = None
 
@@ -560,11 +562,11 @@ def run_multiple(file_csv, folder, threshold):
                     predict = file
 
             if annot and predict:
+                print(os.path.basename(predict))
                 run(annot, predict, threshold)
                 print()
             else:
-                raise f"No file found for row {index}"
-
+                raise f"No file found for files {annot}, {predict}"
     else:
         raise "This is no folder"
 
diff --git a/requirements.txt b/requirements.txt
index 7ae8377..d6af2d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
 editdistance==0.5.3
 edlib==1.3.8.post2
-pandas==1.3.4
 termtables==0.2.3
diff --git a/tests/test_folder/test_demo_annot.bio b/tests/test_folder/test_demo_annot.bio
new file mode 100644
index 0000000..cf16200
--- /dev/null
+++ b/tests/test_folder/test_demo_annot.bio
@@ -0,0 +1,82 @@
+Césaire B-PER
+Alphonse I-PER
+Garon I-PER
+marraine O
+Adeline B-PER
+Dionne I-PER
+, O
+soussignés O
+Lecture O
+faite O
+Adéline O
+Dionne O
+Arsène O
+Côté O
+Arpin O
+R O
+Le O
+onze B-DAT
+aout I-DAT
+mil I-DAT
+neuf I-DAT
+cent I-DAT
+un I-DAT
+nous O
+prêtre O
+soussigné O
+avons O
+baptisé O
+Marie B-PER
+Luce I-PER
+Louise I-PER
+, O
+née O
+la B-DAT
+veille I-DAT
+, O
+fille O
+légitime O
+de O
+Carmel B-PER
+Côté I-PER
+, O
+cordonnier B-OCC
+, O
+pré O
+- O
+sent O
+, O
+déclarant O
+ne O
+savoir O
+signer O
+, O
+et O
+de O
+Eugé B-PER
+nie I-PER
+Fréchette I-PER
+, O
+de O
+cette B-LOC
+paroisse I-LOC
+. O
+Parrain O
+Napoléon B-PER
+Fréchette I-PER
+, O
+marraine O
+Adeline B-PER
+Tremblay I-PER
+, O
+soussignés O
+, O
+de O
+Ste B-LOC
+Luce I-LOC
+, O
+Lec O
+- O
+ture O
+faite O
+. O
diff --git a/tests/test_folder/test_demo_predict.bio b/tests/test_folder/test_demo_predict.bio
new file mode 100644
index 0000000..7e01c2d
--- /dev/null
+++ b/tests/test_folder/test_demo_predict.bio
@@ -0,0 +1,81 @@
+Césaire B-PER
+Alphonse O
+Garon B-PER
+marraine O
+Adeline B-PER
+Dionne I-PER
+, O
+soussignés O
+Lecture O
+faite O
+Adéline O
+Dionne O
+Arsène O
+Côté O
+Arpin O
+R O
+Le O
+onze B-DAT
+aout I-DAT
+mil I-DAT
+neuf I-DAT
+cent I-DAT
+un O
+nous O
+pretre O
+soussigné O
+avons O
+baptisé O
+Marie B-PER
+Luce I-PER
+Louise I-PER
+, O
+née O
+la B-DAT
+veille I-DAT
+, O
+fille O
+légitime O
+de O
+Carmel B-PER
+Côté I-PER
+, O
+cordonnier B-OCC
+, O
+pré O
+- O
+sent O
+, O
+déclarant O
+ne O
+savoir O
+signer O
+, O
+et O
+de O
+Eugé B-PER
+nie I-PER
+Fréchette I-PER
+, O
+de O
+cette B-LOC
+paroisse I-LOC
+. O
+Parrain O
+Napoléon B-PER
+Fréchette I-PER
+, O
+marraine O
+Adéline B-PER
+Tremblay I-PER
+, O
+sousignés O
+, O
+de O
+St B-LOC
+. I-LOC
+Luce O
+, O
+Lec O
+ture O
+faite O
diff --git a/tests/test_folder/test_toy_annot.bio b/tests/test_folder/test_toy_annot.bio
new file mode 100644
index 0000000..5a941ee
--- /dev/null
+++ b/tests/test_folder/test_toy_annot.bio
@@ -0,0 +1,39 @@
+John B-PER
+Ronald I-PER
+Reuel I-PER
+Tolkien I-PER
+was O
+born O
+on O
+three B-DAT
+January I-DAT
+eighteen I-DAT
+ninety I-DAT
+- I-DAT
+two I-DAT
+in O
+Bloemfontein B-LOC
+in O
+the O
+Orange B-LOC
+Free I-LOC
+State I-LOC
+, O
+to O
+Arthur B-PER
+Reuel I-PER
+Tolkien I-PER
+, O
+an O
+English O
+bank B-OCC
+manager I-OCC
+, O
+and O
+his O
+wife O
+Mabel B-PER
+, O
+née O
+Suffield B-PER
+. O
diff --git a/tests/test_folder/test_toy_predict.bio b/tests/test_folder/test_toy_predict.bio
new file mode 100644
index 0000000..5a941ee
--- /dev/null
+++ b/tests/test_folder/test_toy_predict.bio
@@ -0,0 +1,39 @@
+John B-PER
+Ronald I-PER
+Reuel I-PER
+Tolkien I-PER
+was O
+born O
+on O
+three B-DAT
+January I-DAT
+eighteen I-DAT
+ninety I-DAT
+- I-DAT
+two I-DAT
+in O
+Bloemfontein B-LOC
+in O
+the O
+Orange B-LOC
+Free I-LOC
+State I-LOC
+, O
+to O
+Arthur B-PER
+Reuel I-PER
+Tolkien I-PER
+, O
+an O
+English O
+bank B-OCC
+manager I-OCC
+, O
+and O
+his O
+wife O
+Mabel B-PER
+, O
+née O
+Suffield B-PER
+. O
diff --git a/tests/test_mapping_file.csv b/tests/test_mapping_file.csv
new file mode 100644
index 0000000..5a2ce92
--- /dev/null
+++ b/tests/test_mapping_file.csv
@@ -0,0 +1,2 @@
+demo_annot.bio,demo_predict.bio
+toy_test_annot.bio,toy_test_predict.bio
\ No newline at end of file
diff --git a/tests/test_run.py b/tests/test_run.py
index d0ea6d5..cedbd0d 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -9,6 +9,8 @@ FAKE_ANNOT_BIO = "tests/test_annot.bio"
 FAKE_PREDICT_BIO = "tests/test_predict.bio"
 EMPTY_BIO = "tests/test_empty.bio"
 FAKE_BIO_NESTED = "tests/test_nested.bio"
+BIO_FOLDER = "test_folder"
+CSV_FILE = "test_mapping_file.csv"
 
 expected_scores_nested = {
     "All": {
@@ -81,3 +83,8 @@ def test_run_empty_bio():
 def test_run_empty_entry():
     with pytest.raises(TypeError):
         evaluate.run(None, None, THRESHOLD)
+
+
+def test_run_multiple():
+    with pytest.raises(Exception):
+        evaluate.run_multiple(CSV_FILE, BIO_FOLDER, THRESHOLD)
-- 
GitLab