Skip to content
Snippets Groups Projects
Commit 1925e390 authored by Charlotte Mauvezin's avatar Charlotte Mauvezin
Browse files

Multiple input

parent 5940c8d9
No related branches found
No related tags found
1 merge request!10Multiple input
Pipeline #103815 failed
Césaire B-PER
Alphonse I-PER
Garon I-PER
marraine O
Adeline B-PER
Dionne I-PER
, O
soussignés O
Lecture O
faite O
Adéline O
Dionne O
Arsène O
Côté O
Arpin O
R O
Le O
onze B-DAT
aout I-DAT
mil I-DAT
neuf I-DAT
cent I-DAT
un I-DAT
nous O
prêtre O
soussigné O
avons O
baptisé O
Marie B-PER
Luce I-PER
Louise I-PER
, O
née O
la B-DAT
veille I-DAT
, O
fille O
légitime O
de O
Carmel B-PER
Côté I-PER
, O
cordonnier B-OCC
, O
pré O
- O
sent O
, O
déclarant O
ne O
savoir O
signer O
, O
et O
de O
Eugé B-PER
nie I-PER
Fréchette I-PER
, O
de O
cette B-LOC
paroisse I-LOC
. O
Parrain O
Napoléon B-PER
Fréchette I-PER
, O
marraine O
Adeline B-PER
Tremblay I-PER
, O
soussignés O
, O
de O
Ste B-LOC
Luce I-LOC
, O
Lec O
- O
ture O
faite O
. O
John B-PER
Ronald I-PER
Reuel I-PER
Tolkien I-PER
was O
born O
on O
three B-DAT
January I-DAT
eighteen I-DAT
ninety I-DAT
- I-DAT
two I-DAT
in O
Bloemfontein B-LOC
in O
the O
Orange B-LOC
Free I-LOC
State I-LOC
, O
to O
Arthur B-PER
Reuel I-PER
Tolkien I-PER
, O
an O
English O
bank B-OCC
manager I-OCC
, O
and O
his O
wife O
Mabel B-PER
, O
née O
Suffield B-PER
. O
annot,predict
demo_annot.bio,demo_predict.bio
toy_test_annot.bio,toy_test_predict.bio
\ No newline at end of file
Césaire B-PER
Alphonse O
Garon B-PER
marraine O
Adeline B-PER
Dionne I-PER
, O
soussignés O
Lecture O
faite O
Adéline O
Dionne O
Arsène O
Côté O
Arpin O
R O
Le O
onze B-DAT
aout I-DAT
mil I-DAT
neuf I-DAT
cent I-DAT
un O
nous O
pretre O
soussigné O
avons O
baptisé O
Marie B-PER
Luce I-PER
Louise I-PER
, O
née O
la B-DAT
veille I-DAT
, O
fille O
légitime O
de O
Carmel B-PER
Côté I-PER
, O
cordonnier B-OCC
, O
pré O
- O
sent O
, O
déclarant O
ne O
savoir O
signer O
, O
et O
de O
Eugé B-PER
nie I-PER
Fréchette I-PER
, O
de O
cette B-LOC
paroisse I-LOC
. O
Parrain O
Napoléon B-PER
Fréchette I-PER
, O
marraine O
Adéline B-PER
Tremblay I-PER
, O
sousignés O
, O
de O
St B-LOC
. I-LOC
Luce O
, O
Lec O
ture O
faite O
John B-PER
Ronald I-PER
Reuel I-PER
Tolkien I-PER
was O
born O
on O
three B-DAT
January I-DAT
eighteen I-DAT
ninety I-DAT
- I-DAT
two I-DAT
in O
Bloemfontein B-LOC
in O
the O
Orange B-LOC
Free I-LOC
State I-LOC
, O
to O
Arthur B-PER
Reuel I-PER
Tolkien I-PER
, O
an O
English O
bank B-OCC
manager I-OCC
, O
and O
his O
wife O
Mabel B-PER
, O
née O
Suffield B-PER
. O
......@@ -9,6 +9,11 @@ import editdistance
import edlib
import termtables as tt
import glob
from pathlib import Path
import pandas as pd
NOT_ENTITY_TAG = "O"
THRESHOLD = 0.30
......@@ -533,6 +538,39 @@ def run(annotation: str, prediction: str, threshold: int) -> dict:
return scores
def run_multiple(file_csv, annot_folder, predict_folder, threshold):
"""Run the program for multiple file (correlation indicated in the csv file)"""
# Read the csv in a dataframe
df_cor = pd.read_csv(file_csv)
# Check if the variable given are folder
if os.path.isdir(annot_folder) and os.path.isdir(predict_folder):
list_bio_annot = glob.glob(annot_folder + "/**/*.bio", recursive=True)
list_bio_predict = glob.glob(predict_folder + "/**/*.bio", recursive=True)
for index, row in df_cor.iterrows():
annot = None
predict = None
# Check if the file exist
for file_annot in list_bio_annot:
if row["annot"] == os.path.basename(file_annot):
annot = file_annot
for file_predict in list_bio_predict:
if row["predict"] == os.path.basename(file_predict):
predict = file_predict
# Apply the evaluation
if annot and predict:
run(annot, predict, threshold)
print("")
else:
print(f"No file found for row {index}")
else:
print("Error this is no folder")
def threshold_float_type(arg):
"""Type function for argparse."""
try:
......@@ -564,9 +602,19 @@ def main():
default=THRESHOLD,
type=threshold_float_type,
)
parser.add_argument(
"-c",
"--csv",
help="csv with the correlation between the annotation bio file and the predict bio file",
required=False,
type=Path,
)
args = parser.parse_args()
run(args.annot, args.predict, args.threshold)
if args.csv:
run_multiple(args.csv, args.annot, args.predict, args.threshold)
else:
run(args.annot, args.predict, args.threshold)
if __name__ == "__main__":
......
editdistance==0.5.3
edlib==1.3.8.post2
termtables==0.2.3
pandas==1.3.4
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment