From 21fc63db60a895e304908c06de2328fabd33be5e Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Thu, 8 Feb 2024 10:43:14 +0100 Subject: [PATCH] Support empty lines in BIO parser --- nerval/parse.py | 3 +++ tests/conftest.py | 5 +++++ tests/fixtures/test_annot_with_empty_lines.bio | 13 +++++++++++++ tests/test_parse_bio.py | 1 + 4 files changed, 22 insertions(+) create mode 100644 tests/fixtures/test_annot_with_empty_lines.bio diff --git a/nerval/parse.py b/nerval/parse.py index fda2b66..65f8d94 100644 --- a/nerval/parse.py +++ b/nerval/parse.py @@ -72,6 +72,9 @@ def parse_bio(lines: List[str]) -> dict: containing_tag = None for index, line in enumerate(lines): + if not line: + continue + word, label = parse_line(index, line) # Preserve hyphens to avoid confusion with the hyphens added later during alignment diff --git a/tests/conftest.py b/tests/conftest.py index 2d934c3..fbce7f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,11 @@ def fake_annot_bio(): return FIXTURES / "test_annot.bio" +@pytest.fixture() +def fake_annot_with_empty_lines_bio(): + return FIXTURES / "test_annot_with_empty_lines.bio" + + @pytest.fixture() def fake_predict_bio(): return FIXTURES / "test_predict.bio" diff --git a/tests/fixtures/test_annot_with_empty_lines.bio b/tests/fixtures/test_annot_with_empty_lines.bio new file mode 100644 index 0000000..6988d56 --- /dev/null +++ b/tests/fixtures/test_annot_with_empty_lines.bio @@ -0,0 +1,13 @@ +Gérard B-PER +de I-PER + +Nerval I-PER +was O +born O + +in O +Paris B-LOC +in O + +1808 B-DAT +. O diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py index 5f80734..e50f6af 100644 --- a/tests/test_parse_bio.py +++ b/tests/test_parse_bio.py @@ -175,6 +175,7 @@ expected_parsed_end_of_file = { (pytest.lazy_fixture("fake_annot_bio"), expected_parsed_annot), (pytest.lazy_fixture("fake_predict_bio"), expected_parsed_predict), (pytest.lazy_fixture("empty_bio"), None), + (pytest.lazy_fixture("fake_annot_with_empty_lines_bio"), expected_parsed_annot), (pytest.lazy_fixture("bioeslu_bio"), expected_parsed_annot), (pytest.lazy_fixture("end_of_file_bio"), expected_parsed_end_of_file), ], -- GitLab