Skip to content
Snippets Groups Projects
Commit 61aec3df authored by Charlotte Mauvezin's avatar Charlotte Mauvezin Committed by Blanche Miret
Browse files

Better parser

parent a3752ba3
No related branches found
No related tags found
1 merge request!13Better parser
......@@ -50,7 +50,7 @@ Counting the spaces, 7 characters differ over 24 characters in the reference ent
### Demo
```
$ nerval -a demo/demo_annot.bio -p demo/demo_predict.bio
$ nerval -a demo/bio_folder/demo_annot.bio -p demo/bio_folder/demo_predict.bio
```
We also provide two annotation and prediction toy files, which are identical for now and produce perfect scores. Feel free to play with the the text and entity tags in the prediction file to see the impact on the score.
......@@ -65,6 +65,12 @@ You can also indicate a folder and a csv file to have multiple evaluation at onc
$ nerval -c demo/mapping_file.csv -f demo/bio_folder
```
And with the verbose option that's triggered by -v
```
$ nerval -a demo/bio_folder/demo_annot.bio -p demo/bio_folder/demo_predict.bio -v
```
## Metric
This metric uses string alignment at character level.
......
Césaire B-PER
Alphonse I-PER
Garon I-PER
marraine O
Adeline B-PER
Dionne I-PER
, O
soussignés O
Lecture O
faite O
Adéline O
Dionne O
Arsène O
Côté O
Arpin O
R O
Le O
onze B-DAT
aout I-DAT
mil I-DAT
neuf I-DAT
cent I-DAT
un I-DAT
nous O
prêtre O
soussigné O
avons O
baptisé O
Marie B-PER
Luce I-PER
Louise I-PER
, O
née O
la B-DAT
veille I-DAT
, O
fille O
légitime O
de O
Carmel B-PER
Côté I-PER
, O
cordonnier B-OCC
, O
pré O
- O
sent O
, O
déclarant O
ne O
savoir O
signer O
, O
et O
de O
Eugé B-PER
nie I-PER
Fréchette I-PER
, O
de O
cette B-LOC
paroisse I-LOC
. O
Parrain O
Napoléon B-PER
Fréchette I-PER
, O
marraine O
Adeline B-PER
Tremblay I-PER
, O
soussignés O
, O
de O
Ste B-LOC
Luce I-LOC
, O
Lec O
- O
ture O
faite O
. O
Césaire B-PER
Alphonse O
Garon B-PER
marraine O
Adeline B-PER
Dionne I-PER
, O
soussignés O
Lecture O
faite O
Adéline O
Dionne O
Arsène O
Côté O
Arpin O
R O
Le O
onze B-DAT
aout I-DAT
mil I-DAT
neuf I-DAT
cent I-DAT
un O
nous O
pretre O
soussigné O
avons O
baptisé O
Marie B-PER
Luce I-PER
Louise I-PER
, O
née O
la B-DAT
veille I-DAT
, O
fille O
légitime O
de O
Carmel B-PER
Côté I-PER
, O
cordonnier B-OCC
, O
pré O
- O
sent O
, O
déclarant O
ne O
savoir O
signer O
, O
et O
de O
Eugé B-PER
nie I-PER
Fréchette I-PER
, O
de O
cette B-LOC
paroisse I-LOC
. O
Parrain O
Napoléon B-PER
Fréchette I-PER
, O
marraine O
Adéline B-PER
Tremblay I-PER
, O
sousignés O
, O
de O
St B-LOC
. I-LOC
Luce O
, O
Lec O
ture O
faite O
John B-PER
Ronald I-PER
Reuel I-PER
Tolkien I-PER
was O
born O
on O
three B-DAT
January I-DAT
eighteen I-DAT
ninety I-DAT
- I-DAT
two I-DAT
in O
Bloemfontein B-LOC
in O
the O
Orange B-LOC
Free I-LOC
State I-LOC
, O
to O
Arthur B-PER
Reuel I-PER
Tolkien I-PER
, O
an O
English O
bank B-OCC
manager I-OCC
, O
and O
his O
wife O
Mabel B-PER
, O
née O
Suffield B-PER
. O
John B-PER
Ronald I-PER
Reuel I-PER
Tolkien I-PER
was O
born O
on O
three B-DAT
January I-DAT
eighteen I-DAT
ninety I-DAT
- I-DAT
two I-DAT
in O
Bloemfontein B-LOC
in O
the O
Orange B-LOC
Free I-LOC
State I-LOC
, O
to O
Arthur B-PER
Reuel I-PER
Tolkien I-PER
, O
an O
English O
bank B-OCC
manager I-OCC
, O
and O
his O
wife O
Mabel B-PER
, O
née O
Suffield B-PER
. O
......@@ -595,11 +595,17 @@ def run_multiple(file_csv, folder, threshold, verbose):
else:
raise Exception(f"No file found for files {annot}, {predict}")
if count:
print(
"Average scores in all corpus (mean of final files scores)\n"
f" * Precision: {round(precision/count, 3)}\n"
f" * Recall: {round(recall/count, 3)}\n"
f" * F1: {round(f1/count, 3)}\n"
print("Average score on all corpus")
tt.print(
[
[
round(precision / count, 3),
round(recall / count, 3),
round(f1 / count, 3),
]
],
["Precision", "Recall", "F1"],
style=tt.styles.markdown,
)
else:
raise Exception("No file were counted")
......@@ -624,36 +630,24 @@ def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(description="Compute score of NER on predict.")
parser.add_argument(
"-m",
"--multiple",
help="Single if 1, multiple 2",
type=int,
required=True,
)
parser.add_argument(
group = parser.add_mutually_exclusive_group()
group.add_argument(
"-a",
"--annot",
help="Annotation in BIO format.",
)
parser.add_argument(
"-p",
"--predict",
help="Prediction in BIO format.",
)
parser.add_argument(
"-t",
"--threshold",
help="Set a distance threshold for the match between gold and predicted entity.",
default=THRESHOLD,
type=threshold_float_type,
)
parser.add_argument(
group.add_argument(
"-c",
"--csv",
help="Csv with the correlation between the annotation bio files and the predict bio files",
type=Path,
)
parser.add_argument(
"-p",
"--predict",
help="Prediction in BIO format.",
)
parser.add_argument(
"-f",
"--folder",
......@@ -666,25 +660,30 @@ def main():
help="Print only the recap if False",
action="store_false",
)
parser.add_argument(
"-t",
"--threshold",
help="Set a distance threshold for the match between gold and predicted entity.",
default=THRESHOLD,
type=threshold_float_type,
)
args = parser.parse_args()
if args.multiple == 1 or args.multiple == 2:
if args.multiple == 2:
if not args.folder:
raise argparse.ArgumentError(args.folder, "-f must be given if -m is 2")
if not args.csv:
raise argparse.ArgumentError(args.folder, "-c must be given if -m is 2")
if args.folder and args.csv:
run_multiple(args.csv, args.folder, args.threshold, args.verbose)
if args.multiple == 1:
if not args.annot:
raise argparse.ArgumentError(args.folder, "-a must be given if -m is 1")
if not args.predict:
raise argparse.ArgumentError(args.folder, "-p must be given if -m is 1")
if args.annot and args.predict:
run(args.annot, args.predict, args.threshold, args.verbose)
if args.annot:
if not args.predict:
raise parser.error("You need to specify the path to a predict file with -p")
if args.annot and args.predict:
run(args.annot, args.predict, args.threshold, args.verbose)
elif args.csv:
if not args.folder:
raise parser.error(
"You need to specify the path to a folder of bio files with -f"
)
if args.folder and args.csv:
run_multiple(args.csv, args.folder, args.threshold, args.verbose)
else:
raise argparse.ArgumentTypeError("Value has to be 1 or 2")
raise parser.error("You need to specify the argument of input file")
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment