Rebase

d9eb9063 · Charlotte Mauvezin · 0c57d17a · d9eb9063
Commit d9eb9063 authored 3 years ago by Charlotte Mauvezin
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
 # -*- coding: utf-8 -*-

 import argparse
+import glob
 import logging
 import os
 import re
+from csv import reader
+from pathlib import Path

 import editdistance
 import edlib
@@ -81,7 +84,11 @@ def parse_bio(path: str) -> dict:
        try:
            word, label = line.split()
        except ValueError:
-            raise (Exception(f"The file {path} given in input is not in BIO format."))
+            raise (
+                Exception(
+                    f"The file {path} given in input is not in BIO format: check line {index} ({line})"
+                )
+            )

        # Preserve hyphens to avoid confusion with the hyphens added later during alignment
        word = word.replace("-", "§")
@@ -553,6 +560,37 @@ def run(annotation: str, prediction: str, threshold: int, verbose: bool) -> dict
    return scores


+def run_multiple(file_csv, folder, threshold, verbose):
+    """Run the program for multiple files (correlation indicated in the csv file)"""
+    # Read the csv in a list
+    with open(file_csv, "r") as read_obj:
+        csv_reader = reader(read_obj)
+        list_cor = list(csv_reader)
+
+    if os.path.isdir(folder):
+        list_bio_file = glob.glob(str(folder) + "/**/*.bio", recursive=True)
+
+        for row in list_cor:
+            annot = None
+            predict = None
+
+            for file in list_bio_file:
+                if row[0] == os.path.basename(file):
+                    annot = file
+            for file in list_bio_file:
+                if row[1] == os.path.basename(file):
+                    predict = file
+
+            if annot and predict:
+                print(os.path.basename(predict))
+                run(annot, predict, threshold, verbose)
+                print()
+            else:
+                raise f"No file found for files {annot}, {predict}"
+    else:
+        raise Exception("the path indicated does not lead to a folder.")
+
+
 def threshold_float_type(arg):
    """Type function for argparse."""
    try:
@@ -571,30 +609,66 @@ def main():

    parser = argparse.ArgumentParser(description="Compute score of NER on predict.")
    parser.add_argument(
-        "-a", "--annot", help="Annotation in BIO format.", required=True
+        "-m",
+        "--multiple",
+        help="Single if 1, multiple 2",
+        type=int,
+        required=True,
+    )
+    parser.add_argument(
+        "-a",
+        "--annot",
+        help="Annotation in BIO format.",
    )
    parser.add_argument(
-        "-p", "--predict", help="Prediction in BIO format.", required=True
+        "-p",
+        "--predict",
+        help="Prediction in BIO format.",
    )
    parser.add_argument(
        "-t",
        "--threshold",
        help="Set a distance threshold for the match between gold and predicted entity.",
-        required=False,
        default=THRESHOLD,
        type=threshold_float_type,
    )
+    parser.add_argument(
+        "-c",
+        "--csv",
+        help="Csv with the correlation between the annotation bio files and the predict bio files",
+        type=Path,
+    )
+    parser.add_argument(
+        "-f",
+        "--folder",
+        help="Folder containing the bio files referred to in the csv file",
+        type=Path,
+    )
    parser.add_argument(
        "-v",
        "--verbose",
-        help="Print only the recap if False and detailed results if True (default)",
+        help="Print only the recap if False",
        action="store_false",
-        default="True",
    )
-
    args = parser.parse_args()

-    run(args.annot, args.predict, args.threshold, args.verbose)
+    if args.multiple == 1 or args.multiple == 2:
+        if args.multiple == 2:
+            if not args.folder:
+                raise argparse.ArgumentError(args.folder, "-f must be given if -m is 2")
+            if not args.csv:
+                raise argparse.ArgumentError(args.folder, "-c must be given if -m is 2")
+            if args.folder and args.csv:
+                run_multiple(args.csv, args.folder, args.threshold, args.verbose)
+        if args.multiple == 1:
+            if not args.annot:
+                raise argparse.ArgumentError(args.folder, "-a must be given if -m is 1")
+            if not args.predict:
+                raise argparse.ArgumentError(args.folder, "-p must be given if -m is 1")
+            if args.annot and args.predict:
+                run(args.annot, args.predict, args.threshold, args.verbose)
+    else:
+        raise argparse.ArgumentTypeError("Value has to be 1 or 2")


 if __name__ == "__main__":