Skip to content
Snippets Groups Projects
Commit b903e7f7 authored by Eva Bardou's avatar Eva Bardou :frog: Committed by Yoann Schneider
Browse files

Set default distance threshold to `0` instead of `0.3`

parent 0dbbb0b5
No related branches found
No related tags found
1 merge request!58Set default distance threshold to `0` instead of `0.3`
Pipeline #207116 passed
...@@ -35,7 +35,7 @@ $ nerval -a/--annot <annot_file.bio> -p/--predict <predict-file.bio> \ ...@@ -35,7 +35,7 @@ $ nerval -a/--annot <annot_file.bio> -p/--predict <predict-file.bio> \
[-t/--threshold <threshold_value>] [-c/--csv <correspondence_file.csv>] [-t/--threshold <threshold_value>] [-c/--csv <correspondence_file.csv>]
``` ```
The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0.30. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match. The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match.
For instance, if we consider the following case: For instance, if we consider the following case:
......
...@@ -3,7 +3,7 @@ from pathlib import Path ...@@ -3,7 +3,7 @@ from pathlib import Path
from nerval.evaluate import run, run_multiple from nerval.evaluate import run, run_multiple
THRESHOLD = 0.30 THRESHOLD = 0
def threshold_float_type(arg): def threshold_float_type(arg):
......
TEST_THRESHOLD = 0.3
import pytest import pytest
from nerval import ALL_ENTITIES, evaluate from nerval import ALL_ENTITIES, evaluate
from tests import TEST_THRESHOLD
THRESHOLD = 0.30
fake_tags_aligned_nested_perfect = [ fake_tags_aligned_nested_perfect = [
# Labels 1 # Labels 1
...@@ -368,7 +366,7 @@ fake_predict_tags_bk_boundary_2 = [ ...@@ -368,7 +366,7 @@ fake_predict_tags_bk_boundary_2 = [
"G*rard de *N*erval ----bo*rn in Paris in 1833 *.", "G*rard de *N*erval ----bo*rn in Paris in 1833 *.",
fake_annot_tags_aligned, fake_annot_tags_aligned,
fake_predict_tags_aligned, fake_predict_tags_aligned,
THRESHOLD, TEST_THRESHOLD,
), ),
{ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0}, {ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
), ),
...@@ -378,7 +376,7 @@ fake_predict_tags_bk_boundary_2 = [ ...@@ -378,7 +376,7 @@ fake_predict_tags_bk_boundary_2 = [
"Louis par la grâce de Dieu roy de France et de Navarre.", "Louis par la grâce de Dieu roy de France et de Navarre.",
fake_tags_aligned_nested_perfect, fake_tags_aligned_nested_perfect,
fake_tags_aligned_nested_perfect, fake_tags_aligned_nested_perfect,
THRESHOLD, TEST_THRESHOLD,
), ),
{ALL_ENTITIES: 3, "PER": 1, "LOC": 2}, {ALL_ENTITIES: 3, "PER": 1, "LOC": 2},
), ),
...@@ -388,7 +386,7 @@ fake_predict_tags_bk_boundary_2 = [ ...@@ -388,7 +386,7 @@ fake_predict_tags_bk_boundary_2 = [
"Louis par la grâce de Dieu roy de France et de Navarre.", "Louis par la grâce de Dieu roy de France et de Navarre.",
fake_tags_aligned_nested_perfect, fake_tags_aligned_nested_perfect,
fake_tags_aligned_nested_false, fake_tags_aligned_nested_false,
THRESHOLD, TEST_THRESHOLD,
), ),
{ALL_ENTITIES: 2, "PER": 1, "LOC": 1}, {ALL_ENTITIES: 2, "PER": 1, "LOC": 1},
), ),
...@@ -398,7 +396,7 @@ fake_predict_tags_bk_boundary_2 = [ ...@@ -398,7 +396,7 @@ fake_predict_tags_bk_boundary_2 = [
"The red dragon", "The red dragon",
fake_annot_tags_bk_boundary, fake_annot_tags_bk_boundary,
fake_predict_tags_bk_boundary, fake_predict_tags_bk_boundary,
THRESHOLD, TEST_THRESHOLD,
), ),
{ALL_ENTITIES: 0, "PER": 0}, {ALL_ENTITIES: 0, "PER": 0},
), ),
...@@ -408,7 +406,7 @@ fake_predict_tags_bk_boundary_2 = [ ...@@ -408,7 +406,7 @@ fake_predict_tags_bk_boundary_2 = [
"A red dragon", "A red dragon",
fake_annot_tags_bk_boundary_2, fake_annot_tags_bk_boundary_2,
fake_predict_tags_bk_boundary_2, fake_predict_tags_bk_boundary_2,
THRESHOLD, TEST_THRESHOLD,
), ),
{ALL_ENTITIES: 1, "PER": 1}, {ALL_ENTITIES: 1, "PER": 1},
), ),
......
...@@ -4,6 +4,7 @@ from pathlib import Path ...@@ -4,6 +4,7 @@ from pathlib import Path
import pytest import pytest
from nerval import ALL_ENTITIES, evaluate from nerval import ALL_ENTITIES, evaluate
from tests import TEST_THRESHOLD
@pytest.mark.parametrize( @pytest.mark.parametrize(
...@@ -92,7 +93,7 @@ def test_run(annotation, prediction, expected): ...@@ -92,7 +93,7 @@ def test_run(annotation, prediction, expected):
evaluate.run( evaluate.run(
annotation=annotation, annotation=annotation,
prediction=prediction, prediction=prediction,
threshold=0.3, threshold=TEST_THRESHOLD,
verbose=False, verbose=False,
) )
== expected == expected
...@@ -104,7 +105,7 @@ def test_run_empty_bio(empty_bio): ...@@ -104,7 +105,7 @@ def test_run_empty_bio(empty_bio):
Exception, Exception,
match="No content found in annotation or prediction files.", match="No content found in annotation or prediction files.",
): ):
evaluate.run(empty_bio, empty_bio, 0.3, False) evaluate.run(empty_bio, empty_bio, TEST_THRESHOLD, False)
def test_run_empty_entry(): def test_run_empty_entry():
...@@ -112,7 +113,7 @@ def test_run_empty_entry(): ...@@ -112,7 +113,7 @@ def test_run_empty_entry():
AssertionError, AssertionError,
match=re.escape("Error: Input file invalid.bio does not exist"), match=re.escape("Error: Input file invalid.bio does not exist"),
): ):
evaluate.run(Path("invalid.bio"), Path("invalid.bio"), 0.3, False) evaluate.run(Path("invalid.bio"), Path("invalid.bio"), TEST_THRESHOLD, False)
def test_run_invalid_header(csv_file_error, folder_bio): def test_run_invalid_header(csv_file_error, folder_bio):
...@@ -120,7 +121,7 @@ def test_run_invalid_header(csv_file_error, folder_bio): ...@@ -120,7 +121,7 @@ def test_run_invalid_header(csv_file_error, folder_bio):
Exception, Exception,
match="Columns in the CSV mapping should be: Annotation,Prediction", match="Columns in the CSV mapping should be: Annotation,Prediction",
): ):
evaluate.run_multiple(csv_file_error, folder_bio, 0.3, False) evaluate.run_multiple(csv_file_error, folder_bio, TEST_THRESHOLD, False)
def test_run_multiple(csv_file, folder_bio): def test_run_multiple(csv_file, folder_bio):
...@@ -128,4 +129,4 @@ def test_run_multiple(csv_file, folder_bio): ...@@ -128,4 +129,4 @@ def test_run_multiple(csv_file, folder_bio):
Exception, Exception,
match="No file found for files demo_annot.bio, demo_predict.bio", match="No file found for files demo_annot.bio, demo_predict.bio",
): ):
evaluate.run_multiple(csv_file, folder_bio, 0.3, False) evaluate.run_multiple(csv_file, folder_bio, TEST_THRESHOLD, False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment