From 21fc63db60a895e304908c06de2328fabd33be5e Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Thu, 8 Feb 2024 10:43:14 +0100
Subject: [PATCH] Support empty lines in BIO parser

---
 nerval/parse.py                                |  3 +++
 tests/conftest.py                              |  5 +++++
 tests/fixtures/test_annot_with_empty_lines.bio | 13 +++++++++++++
 tests/test_parse_bio.py                        |  1 +
 4 files changed, 22 insertions(+)
 create mode 100644 tests/fixtures/test_annot_with_empty_lines.bio

diff --git a/nerval/parse.py b/nerval/parse.py
index fda2b66..65f8d94 100644
--- a/nerval/parse.py
+++ b/nerval/parse.py
@@ -72,6 +72,9 @@ def parse_bio(lines: List[str]) -> dict:
     containing_tag = None
 
     for index, line in enumerate(lines):
+        if not line:
+            continue
+
         word, label = parse_line(index, line)
 
         # Preserve hyphens to avoid confusion with the hyphens added later during alignment
diff --git a/tests/conftest.py b/tests/conftest.py
index 2d934c3..fbce7f1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,6 +10,11 @@ def fake_annot_bio():
     return FIXTURES / "test_annot.bio"
 
 
+@pytest.fixture()
+def fake_annot_with_empty_lines_bio():
+    return FIXTURES / "test_annot_with_empty_lines.bio"
+
+
 @pytest.fixture()
 def fake_predict_bio():
     return FIXTURES / "test_predict.bio"
diff --git a/tests/fixtures/test_annot_with_empty_lines.bio b/tests/fixtures/test_annot_with_empty_lines.bio
new file mode 100644
index 0000000..6988d56
--- /dev/null
+++ b/tests/fixtures/test_annot_with_empty_lines.bio
@@ -0,0 +1,13 @@
+Gérard B-PER
+de I-PER
+
+Nerval I-PER
+was O
+born O
+
+in O
+Paris B-LOC
+in O
+
+1808 B-DAT
+. O
diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py
index 5f80734..e50f6af 100644
--- a/tests/test_parse_bio.py
+++ b/tests/test_parse_bio.py
@@ -175,6 +175,7 @@ expected_parsed_end_of_file = {
         (pytest.lazy_fixture("fake_annot_bio"), expected_parsed_annot),
         (pytest.lazy_fixture("fake_predict_bio"), expected_parsed_predict),
         (pytest.lazy_fixture("empty_bio"), None),
+        (pytest.lazy_fixture("fake_annot_with_empty_lines_bio"), expected_parsed_annot),
         (pytest.lazy_fixture("bioeslu_bio"), expected_parsed_annot),
         (pytest.lazy_fixture("end_of_file_bio"), expected_parsed_end_of_file),
     ],
-- 
GitLab