Skip to content
Snippets Groups Projects
Commit b903e7f7 authored by Eva Bardou's avatar Eva Bardou :frog: Committed by Yoann Schneider
Browse files

Set default distance threshold to `0` instead of `0.3`

parent 0dbbb0b5
No related branches found
No related tags found
1 merge request!58Set default distance threshold to `0` instead of `0.3`
Pipeline #207116 passed
......@@ -35,7 +35,7 @@ $ nerval -a/--annot <annot_file.bio> -p/--predict <predict-file.bio> \
[-t/--threshold <threshold_value>] [-c/--csv <correspondence_file.csv>]
```
The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0.30. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match.
The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match.
For instance, if we consider the following case:
......
......@@ -3,7 +3,7 @@ from pathlib import Path
from nerval.evaluate import run, run_multiple
THRESHOLD = 0.30
THRESHOLD = 0
def threshold_float_type(arg):
......
TEST_THRESHOLD = 0.3
import pytest
from nerval import ALL_ENTITIES, evaluate
THRESHOLD = 0.30
from tests import TEST_THRESHOLD
fake_tags_aligned_nested_perfect = [
# Labels 1
......@@ -368,7 +366,7 @@ fake_predict_tags_bk_boundary_2 = [
"G*rard de *N*erval ----bo*rn in Paris in 1833 *.",
fake_annot_tags_aligned,
fake_predict_tags_aligned,
THRESHOLD,
TEST_THRESHOLD,
),
{ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
),
......@@ -378,7 +376,7 @@ fake_predict_tags_bk_boundary_2 = [
"Louis par la grâce de Dieu roy de France et de Navarre.",
fake_tags_aligned_nested_perfect,
fake_tags_aligned_nested_perfect,
THRESHOLD,
TEST_THRESHOLD,
),
{ALL_ENTITIES: 3, "PER": 1, "LOC": 2},
),
......@@ -388,7 +386,7 @@ fake_predict_tags_bk_boundary_2 = [
"Louis par la grâce de Dieu roy de France et de Navarre.",
fake_tags_aligned_nested_perfect,
fake_tags_aligned_nested_false,
THRESHOLD,
TEST_THRESHOLD,
),
{ALL_ENTITIES: 2, "PER": 1, "LOC": 1},
),
......@@ -398,7 +396,7 @@ fake_predict_tags_bk_boundary_2 = [
"The red dragon",
fake_annot_tags_bk_boundary,
fake_predict_tags_bk_boundary,
THRESHOLD,
TEST_THRESHOLD,
),
{ALL_ENTITIES: 0, "PER": 0},
),
......@@ -408,7 +406,7 @@ fake_predict_tags_bk_boundary_2 = [
"A red dragon",
fake_annot_tags_bk_boundary_2,
fake_predict_tags_bk_boundary_2,
THRESHOLD,
TEST_THRESHOLD,
),
{ALL_ENTITIES: 1, "PER": 1},
),
......
......@@ -4,6 +4,7 @@ from pathlib import Path
import pytest
from nerval import ALL_ENTITIES, evaluate
from tests import TEST_THRESHOLD
@pytest.mark.parametrize(
......@@ -92,7 +93,7 @@ def test_run(annotation, prediction, expected):
evaluate.run(
annotation=annotation,
prediction=prediction,
threshold=0.3,
threshold=TEST_THRESHOLD,
verbose=False,
)
== expected
......@@ -104,7 +105,7 @@ def test_run_empty_bio(empty_bio):
Exception,
match="No content found in annotation or prediction files.",
):
evaluate.run(empty_bio, empty_bio, 0.3, False)
evaluate.run(empty_bio, empty_bio, TEST_THRESHOLD, False)
def test_run_empty_entry():
......@@ -112,7 +113,7 @@ def test_run_empty_entry():
AssertionError,
match=re.escape("Error: Input file invalid.bio does not exist"),
):
evaluate.run(Path("invalid.bio"), Path("invalid.bio"), 0.3, False)
evaluate.run(Path("invalid.bio"), Path("invalid.bio"), TEST_THRESHOLD, False)
def test_run_invalid_header(csv_file_error, folder_bio):
......@@ -120,7 +121,7 @@ def test_run_invalid_header(csv_file_error, folder_bio):
Exception,
match="Columns in the CSV mapping should be: Annotation,Prediction",
):
evaluate.run_multiple(csv_file_error, folder_bio, 0.3, False)
evaluate.run_multiple(csv_file_error, folder_bio, TEST_THRESHOLD, False)
def test_run_multiple(csv_file, folder_bio):
......@@ -128,4 +129,4 @@ def test_run_multiple(csv_file, folder_bio):
Exception,
match="No file found for files demo_annot.bio, demo_predict.bio",
):
evaluate.run_multiple(csv_file, folder_bio, 0.3, False)
evaluate.run_multiple(csv_file, folder_bio, TEST_THRESHOLD, False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment