Set default distance threshold to `0` instead of `0.3`

b903e7f7 · Eva Bardou · Yoann Schneider · 0dbbb0b5 · b903e7f7 · b903e7f7
Commit b903e7f7 authored 2 months ago by Eva Bardou Committed by Yoann Schneider 2 months ago
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ $ nerval -a/--annot <annot_file.bio> -p/--predict <predict-file.bio> \
 		 [-t/--threshold <threshold_value>] [-c/--csv <correspondence_file.csv>]
 ```

-The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0.30. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match.
+The threshold value should be between 0 and 1. It designates the acceptable number of characters differing between an annotated and a predicted entity - over the number of characters in the annotated entity - to consider it as a match. Default value is 0. 0 would impose perfect matches, 1 would allow completely different strings to be considered as a match.

 For instance, if we consider the following case:


--- a/nerval/cli.py
+++ b/nerval/cli.py
@@ -3,7 +3,7 @@ from pathlib import Path

 from nerval.evaluate import run, run_multiple

-THRESHOLD = 0.30
+THRESHOLD = 0


 def threshold_float_type(arg):

--- a/tests/__init__.py
+++ b/tests/__init__.py
+TEST_THRESHOLD = 0.3
--- a/tests/test_compute_matches.py
+++ b/tests/test_compute_matches.py
 import pytest

 from nerval import ALL_ENTITIES, evaluate
-
-THRESHOLD = 0.30
-
+from tests import TEST_THRESHOLD

 fake_tags_aligned_nested_perfect = [
    # Labels 1
@@ -368,7 +366,7 @@ fake_predict_tags_bk_boundary_2 = [
                "G*rard de *N*erval ----bo*rn in Paris in 1833 *.",
                fake_annot_tags_aligned,
                fake_predict_tags_aligned,
-                THRESHOLD,
+                TEST_THRESHOLD,
            ),
            {ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
        ),
@@ -378,7 +376,7 @@ fake_predict_tags_bk_boundary_2 = [
                "Louis par la grâce de Dieu roy de France et de Navarre.",
                fake_tags_aligned_nested_perfect,
                fake_tags_aligned_nested_perfect,
-                THRESHOLD,
+                TEST_THRESHOLD,
            ),
            {ALL_ENTITIES: 3, "PER": 1, "LOC": 2},
        ),
@@ -388,7 +386,7 @@ fake_predict_tags_bk_boundary_2 = [
                "Louis par la grâce de Dieu roy de France et de Navarre.",
                fake_tags_aligned_nested_perfect,
                fake_tags_aligned_nested_false,
-                THRESHOLD,
+                TEST_THRESHOLD,
            ),
            {ALL_ENTITIES: 2, "PER": 1, "LOC": 1},
        ),
@@ -398,7 +396,7 @@ fake_predict_tags_bk_boundary_2 = [
                "The red dragon",
                fake_annot_tags_bk_boundary,
                fake_predict_tags_bk_boundary,
-                THRESHOLD,
+                TEST_THRESHOLD,
            ),
            {ALL_ENTITIES: 0, "PER": 0},
        ),
@@ -408,7 +406,7 @@ fake_predict_tags_bk_boundary_2 = [
                "A red dragon",
                fake_annot_tags_bk_boundary_2,
                fake_predict_tags_bk_boundary_2,
-                THRESHOLD,
+                TEST_THRESHOLD,
            ),
            {ALL_ENTITIES: 1, "PER": 1},
        ),

--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -4,6 +4,7 @@ from pathlib import Path
 import pytest

 from nerval import ALL_ENTITIES, evaluate
+from tests import TEST_THRESHOLD


 @pytest.mark.parametrize(
@@ -92,7 +93,7 @@ def test_run(annotation, prediction, expected):
        evaluate.run(
            annotation=annotation,
            prediction=prediction,
-            threshold=0.3,
+            threshold=TEST_THRESHOLD,
            verbose=False,
        )
        == expected
@@ -104,7 +105,7 @@ def test_run_empty_bio(empty_bio):
        Exception,
        match="No content found in annotation or prediction files.",
    ):
-        evaluate.run(empty_bio, empty_bio, 0.3, False)
+        evaluate.run(empty_bio, empty_bio, TEST_THRESHOLD, False)


 def test_run_empty_entry():
@@ -112,7 +113,7 @@ def test_run_empty_entry():
        AssertionError,
        match=re.escape("Error: Input file invalid.bio does not exist"),
    ):
-        evaluate.run(Path("invalid.bio"), Path("invalid.bio"), 0.3, False)
+        evaluate.run(Path("invalid.bio"), Path("invalid.bio"), TEST_THRESHOLD, False)


 def test_run_invalid_header(csv_file_error, folder_bio):
@@ -120,7 +121,7 @@ def test_run_invalid_header(csv_file_error, folder_bio):
        Exception,
        match="Columns in the CSV mapping should be: Annotation,Prediction",
    ):
-        evaluate.run_multiple(csv_file_error, folder_bio, 0.3, False)
+        evaluate.run_multiple(csv_file_error, folder_bio, TEST_THRESHOLD, False)


 def test_run_multiple(csv_file, folder_bio):
@@ -128,4 +129,4 @@ def test_run_multiple(csv_file, folder_bio):
        Exception,
        match="No file found for files demo_annot.bio, demo_predict.bio",
    ):
-        evaluate.run_multiple(csv_file, folder_bio, 0.3, False)
+        evaluate.run_multiple(csv_file, folder_bio, TEST_THRESHOLD, False)