Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
nerval
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Named Entity Recognition
nerval
Commits
d9eb9063
Commit
d9eb9063
authored
3 years ago
by
Charlotte Mauvezin
Browse files
Options
Downloads
Patches
Plain Diff
Rebase
parent
0c57d17a
No related branches found
No related tags found
1 merge request
!11
Adding verbose option lunched by adding -v
Pipeline
#103826
passed
3 years ago
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
nerval/evaluate.py
+82
-8
82 additions, 8 deletions
nerval/evaluate.py
with
82 additions
and
8 deletions
nerval/evaluate.py
+
82
−
8
View file @
d9eb9063
# -*- coding: utf-8 -*-
import
argparse
import
glob
import
logging
import
os
import
re
from
csv
import
reader
from
pathlib
import
Path
import
editdistance
import
edlib
...
...
@@ -81,7 +84,11 @@ def parse_bio(path: str) -> dict:
try
:
word
,
label
=
line
.
split
()
except
ValueError
:
raise
(
Exception
(
f
"
The file
{
path
}
given in input is not in BIO format.
"
))
raise
(
Exception
(
f
"
The file
{
path
}
given in input is not in BIO format: check line
{
index
}
(
{
line
}
)
"
)
)
# Preserve hyphens to avoid confusion with the hyphens added later during alignment
word
=
word
.
replace
(
"
-
"
,
"
§
"
)
...
...
@@ -553,6 +560,37 @@ def run(annotation: str, prediction: str, threshold: int, verbose: bool) -> dict
return
scores
def
run_multiple
(
file_csv
,
folder
,
threshold
,
verbose
):
"""
Run the program for multiple files (correlation indicated in the csv file)
"""
# Read the csv in a list
with
open
(
file_csv
,
"
r
"
)
as
read_obj
:
csv_reader
=
reader
(
read_obj
)
list_cor
=
list
(
csv_reader
)
if
os
.
path
.
isdir
(
folder
):
list_bio_file
=
glob
.
glob
(
str
(
folder
)
+
"
/**/*.bio
"
,
recursive
=
True
)
for
row
in
list_cor
:
annot
=
None
predict
=
None
for
file
in
list_bio_file
:
if
row
[
0
]
==
os
.
path
.
basename
(
file
):
annot
=
file
for
file
in
list_bio_file
:
if
row
[
1
]
==
os
.
path
.
basename
(
file
):
predict
=
file
if
annot
and
predict
:
print
(
os
.
path
.
basename
(
predict
))
run
(
annot
,
predict
,
threshold
,
verbose
)
print
()
else
:
raise
f
"
No file found for files
{
annot
}
,
{
predict
}
"
else
:
raise
Exception
(
"
the path indicated does not lead to a folder.
"
)
def
threshold_float_type
(
arg
):
"""
Type function for argparse.
"""
try
:
...
...
@@ -571,30 +609,66 @@ def main():
parser
=
argparse
.
ArgumentParser
(
description
=
"
Compute score of NER on predict.
"
)
parser
.
add_argument
(
"
-a
"
,
"
--annot
"
,
help
=
"
Annotation in BIO format.
"
,
required
=
True
"
-m
"
,
"
--multiple
"
,
help
=
"
Single if 1, multiple 2
"
,
type
=
int
,
required
=
True
,
)
parser
.
add_argument
(
"
-a
"
,
"
--annot
"
,
help
=
"
Annotation in BIO format.
"
,
)
parser
.
add_argument
(
"
-p
"
,
"
--predict
"
,
help
=
"
Prediction in BIO format.
"
,
required
=
True
"
-p
"
,
"
--predict
"
,
help
=
"
Prediction in BIO format.
"
,
)
parser
.
add_argument
(
"
-t
"
,
"
--threshold
"
,
help
=
"
Set a distance threshold for the match between gold and predicted entity.
"
,
required
=
False
,
default
=
THRESHOLD
,
type
=
threshold_float_type
,
)
parser
.
add_argument
(
"
-c
"
,
"
--csv
"
,
help
=
"
Csv with the correlation between the annotation bio files and the predict bio files
"
,
type
=
Path
,
)
parser
.
add_argument
(
"
-f
"
,
"
--folder
"
,
help
=
"
Folder containing the bio files referred to in the csv file
"
,
type
=
Path
,
)
parser
.
add_argument
(
"
-v
"
,
"
--verbose
"
,
help
=
"
Print only the recap if False
and detailed results if True (default)
"
,
help
=
"
Print only the recap if False
"
,
action
=
"
store_false
"
,
default
=
"
True
"
,
)
args
=
parser
.
parse_args
()
run
(
args
.
annot
,
args
.
predict
,
args
.
threshold
,
args
.
verbose
)
if
args
.
multiple
==
1
or
args
.
multiple
==
2
:
if
args
.
multiple
==
2
:
if
not
args
.
folder
:
raise
argparse
.
ArgumentError
(
args
.
folder
,
"
-f must be given if -m is 2
"
)
if
not
args
.
csv
:
raise
argparse
.
ArgumentError
(
args
.
folder
,
"
-c must be given if -m is 2
"
)
if
args
.
folder
and
args
.
csv
:
run_multiple
(
args
.
csv
,
args
.
folder
,
args
.
threshold
,
args
.
verbose
)
if
args
.
multiple
==
1
:
if
not
args
.
annot
:
raise
argparse
.
ArgumentError
(
args
.
folder
,
"
-a must be given if -m is 1
"
)
if
not
args
.
predict
:
raise
argparse
.
ArgumentError
(
args
.
folder
,
"
-p must be given if -m is 1
"
)
if
args
.
annot
and
args
.
predict
:
run
(
args
.
annot
,
args
.
predict
,
args
.
threshold
,
args
.
verbose
)
else
:
raise
argparse
.
ArgumentTypeError
(
"
Value has to be 1 or 2
"
)
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment