Use csv.DictReader rather than csv.reader

e5bf32a6 · Eva Bardou · 525c1a9e · e5bf32a6 · e5bf32a6
Commit e5bf32a6 authored 1 year ago by Eva Bardou
--- a/demo/mapping_file.csv
+++ b/demo/mapping_file.csv
+Annotation,Prediction
 demo_annot.bio,demo_predict.bio
 toy_test_annot.bio,toy_test_predict.bio
\ No newline at end of file
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
+import csv
 import logging
-from csv import reader
 from pathlib import Path
 from typing import List

@@ -19,6 +19,10 @@ from nerval.utils import print_result_compact, print_results

 logger = logging.getLogger(__name__)

+ANNO_COLUMN = "Annotation"
+PRED_COLUMN = "Prediction"
+CSV_HEADER = [ANNO_COLUMN, PRED_COLUMN]
+

 def compute_matches(
    annotation: str,
@@ -346,7 +350,7 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):
    """Run the program for multiple files (correlation indicated in the csv file)"""
    # Read the csv in a list
    with file_csv.open() as read_obj:
-        csv_reader = reader(read_obj)
+        csv_reader = csv.DictReader(read_obj, fieldnames=CSV_HEADER)
        list_cor = list(csv_reader)

    if folder.is_dir():
@@ -361,10 +365,10 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):
            predict = None

            for file in list_bio_file:
-                if row[0] == file.name:
+                if row[ANNO_COLUMN] == file.name:
                    annot = file
            for file in list_bio_file:
-                if row[1] == file.name:
+                if row[PRED_COLUMN] == file.name:
                    predict = file

            if annot and predict:
@@ -374,7 +378,9 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):
                recall += scores["All"]["R"]
                f1 += scores["All"]["F1"]
            else:
-                raise Exception(f"No file found for files {row[0]}, {row[1]}")
+                raise Exception(
+                    f"No file found for files {row[ANNO_COLUMN]}, {row[PRED_COLUMN]}"
+                )
        if count:
            logger.info("Average score on all corpus")
            table = PrettyTable()