diff --git a/nerval/parse.py b/nerval/parse.py index 5b9276301da07918b503f823a96bcfe4f5efff17..edf27155bda3036bb4c23e7c6adf84b6e6cf34b4 100644 --- a/nerval/parse.py +++ b/nerval/parse.py @@ -45,11 +45,7 @@ def parse_line(index: int, line: str, path: Path): assert match_iob - word, label = match_iob.group(1, 2) - # We should have either one - (BLIU-) or none at all (O) - assert label.count("-") <= 1 - - return word, label + return match_iob.group(1, 2) except AssertionError: raise ( Exception( diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py index 625e9a25f30228b0d3d19cab9a0ca7e6cf14b1e2..09c3cb589f80ccec8386bd8794e441447299d619 100644 --- a/tests/test_parse_bio.py +++ b/tests/test_parse_bio.py @@ -200,6 +200,7 @@ def test_parse_bio_no_input(): ("Hi B-ORG", "Hi", "B-ORG"), ("Hi B-Org or maybe not org", "Hi", "B-Org or maybe not org"), ("1258 B-Date et Lieu", "1258", "B-Date et Lieu"), + ("Devoti B-Sous-titre", "Devoti", "B-Sous-titre"), ), ) def test_parse_line(line, word, label): @@ -208,7 +209,7 @@ def test_parse_line(line, word, label): @pytest.mark.parametrize( "line", - (("HiB-ORG"), ("HiB-ORG or maybe not"), ("Hello B-surname and L-ocation")), + (("HiB-ORG"), ("HiB-ORG or maybe not")), ) def test_parse_line_crash(line): with pytest.raises(Exception): @@ -221,6 +222,7 @@ def test_parse_line_crash(line): ("B-ORG", "ORG"), ("B-Date et Lieu", "Date et Lieu"), ("I-Date et Lieu", "Date et Lieu"), + ("B-Sous-titre", "Sous-titre"), ), ) def test_get_type_label(label, expected_type):