diff --git a/nerval/parse.py b/nerval/parse.py index fda2b66cbbd8057942ede7911f82e925b33fdd11..65f8d943897e87bbfe50cced07fb5cfbe2ad5c0f 100644 --- a/nerval/parse.py +++ b/nerval/parse.py @@ -72,6 +72,9 @@ def parse_bio(lines: List[str]) -> dict: containing_tag = None for index, line in enumerate(lines): + if not line: + continue + word, label = parse_line(index, line) # Preserve hyphens to avoid confusion with the hyphens added later during alignment diff --git a/tests/conftest.py b/tests/conftest.py index 2d934c3add4f78e9762e6abc35a7acf722e55111..fbce7f1a477bf0f4c128cc37d82aa53c902399d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,11 @@ def fake_annot_bio(): return FIXTURES / "test_annot.bio" +@pytest.fixture() +def fake_annot_with_empty_lines_bio(): + return FIXTURES / "test_annot_with_empty_lines.bio" + + @pytest.fixture() def fake_predict_bio(): return FIXTURES / "test_predict.bio" diff --git a/tests/fixtures/test_annot_with_empty_lines.bio b/tests/fixtures/test_annot_with_empty_lines.bio new file mode 100644 index 0000000000000000000000000000000000000000..6988d56cf6f272d747d6e9eda65e01da030fe1e8 --- /dev/null +++ b/tests/fixtures/test_annot_with_empty_lines.bio @@ -0,0 +1,13 @@ +Gérard B-PER +de I-PER + +Nerval I-PER +was O +born O + +in O +Paris B-LOC +in O + +1808 B-DAT +. O diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py index 5f807341a0bb05c5b20bd839c5a77c5eb30c5a7d..e50f6af54dfd49be4fce01d9416a465f6a7d2a5a 100644 --- a/tests/test_parse_bio.py +++ b/tests/test_parse_bio.py @@ -175,6 +175,7 @@ expected_parsed_end_of_file = { (pytest.lazy_fixture("fake_annot_bio"), expected_parsed_annot), (pytest.lazy_fixture("fake_predict_bio"), expected_parsed_predict), (pytest.lazy_fixture("empty_bio"), None), + (pytest.lazy_fixture("fake_annot_with_empty_lines_bio"), expected_parsed_annot), (pytest.lazy_fixture("bioeslu_bio"), expected_parsed_annot), (pytest.lazy_fixture("end_of_file_bio"), expected_parsed_end_of_file), ],