diff --git a/README.md b/README.md index be8e0ee694b4fc3ae814ac3a03ef6d3ce3f4acf0..d87feac6199fc70c9cf3b3a855eed1d8789b2dff 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ $ nerval -a/--annot <annot_file.bio> -p/--predict <predict-file.bio> \ [-t/--threshold <threshold_value>] [-c/--csv <correspondence_file.csv>] ``` -The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0.30. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match. +The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match. For instance, if we consider the following case: diff --git a/nerval/cli.py b/nerval/cli.py index 0b838a18b059cd3ea6a1af4fc6dda8b56b0c4c64..72b45548200aadf237a753b3a9cf7ffb17ec28be 100644 --- a/nerval/cli.py +++ b/nerval/cli.py @@ -3,7 +3,7 @@ from pathlib import Path from nerval.evaluate import run, run_multiple -THRESHOLD = 0.30 +THRESHOLD = 0 def threshold_float_type(arg): diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3a86a59974b371882d428d98075e506afb1252e8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1 @@ +TEST_THRESHOLD = 0.3 diff --git a/tests/test_compute_matches.py b/tests/test_compute_matches.py index c230f1366e4c782cf27241a80342498232ca435f..2708417c9fc8e7fb48538dee833e83d50741e093 100644 --- a/tests/test_compute_matches.py +++ b/tests/test_compute_matches.py @@ -1,9 +1,7 @@ import pytest from nerval import ALL_ENTITIES, evaluate - -THRESHOLD = 0.30 - +from tests import TEST_THRESHOLD fake_tags_aligned_nested_perfect = [ # Labels 1 @@ -368,7 +366,7 @@ fake_predict_tags_bk_boundary_2 = [ "G*rard de *N*erval ----bo*rn in Paris in 1833 *.", fake_annot_tags_aligned, fake_predict_tags_aligned, - THRESHOLD, + TEST_THRESHOLD, ), {ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0}, ), @@ -378,7 +376,7 @@ fake_predict_tags_bk_boundary_2 = [ "Louis par la grâce de Dieu roy de France et de Navarre.", fake_tags_aligned_nested_perfect, fake_tags_aligned_nested_perfect, - THRESHOLD, + TEST_THRESHOLD, ), {ALL_ENTITIES: 3, "PER": 1, "LOC": 2}, ), @@ -388,7 +386,7 @@ fake_predict_tags_bk_boundary_2 = [ "Louis par la grâce de Dieu roy de France et de Navarre.", fake_tags_aligned_nested_perfect, fake_tags_aligned_nested_false, - THRESHOLD, + TEST_THRESHOLD, ), {ALL_ENTITIES: 2, "PER": 1, "LOC": 1}, ), @@ -398,7 +396,7 @@ fake_predict_tags_bk_boundary_2 = [ "The red dragon", fake_annot_tags_bk_boundary, fake_predict_tags_bk_boundary, - THRESHOLD, + TEST_THRESHOLD, ), {ALL_ENTITIES: 0, "PER": 0}, ), @@ -408,7 +406,7 @@ fake_predict_tags_bk_boundary_2 = [ "A red dragon", fake_annot_tags_bk_boundary_2, fake_predict_tags_bk_boundary_2, - THRESHOLD, + TEST_THRESHOLD, ), {ALL_ENTITIES: 1, "PER": 1}, ), diff --git a/tests/test_run.py b/tests/test_run.py index 421c51cf7a681faa5b6c7ad3c16fc06cc12736f4..f8b22fd6f25ef2414ffd78fc205589a0258a520a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -4,6 +4,7 @@ from pathlib import Path import pytest from nerval import ALL_ENTITIES, evaluate +from tests import TEST_THRESHOLD @pytest.mark.parametrize( @@ -92,7 +93,7 @@ def test_run(annotation, prediction, expected): evaluate.run( annotation=annotation, prediction=prediction, - threshold=0.3, + threshold=TEST_THRESHOLD, verbose=False, ) == expected @@ -104,7 +105,7 @@ def test_run_empty_bio(empty_bio): Exception, match="No content found in annotation or prediction files.", ): - evaluate.run(empty_bio, empty_bio, 0.3, False) + evaluate.run(empty_bio, empty_bio, TEST_THRESHOLD, False) def test_run_empty_entry(): @@ -112,7 +113,7 @@ def test_run_empty_entry(): AssertionError, match=re.escape("Error: Input file invalid.bio does not exist"), ): - evaluate.run(Path("invalid.bio"), Path("invalid.bio"), 0.3, False) + evaluate.run(Path("invalid.bio"), Path("invalid.bio"), TEST_THRESHOLD, False) def test_run_invalid_header(csv_file_error, folder_bio): @@ -120,7 +121,7 @@ def test_run_invalid_header(csv_file_error, folder_bio): Exception, match="Columns in the CSV mapping should be: Annotation,Prediction", ): - evaluate.run_multiple(csv_file_error, folder_bio, 0.3, False) + evaluate.run_multiple(csv_file_error, folder_bio, TEST_THRESHOLD, False) def test_run_multiple(csv_file, folder_bio): @@ -128,4 +129,4 @@ def test_run_multiple(csv_file, folder_bio): Exception, match="No file found for files demo_annot.bio, demo_predict.bio", ): - evaluate.run_multiple(csv_file, folder_bio, 0.3, False) + evaluate.run_multiple(csv_file, folder_bio, TEST_THRESHOLD, False)