From 68c879317b2d6ee9946dcc4d803d752b4e2da779 Mon Sep 17 00:00:00 2001 From: Yoann Schneider <yschneider@teklia.com> Date: Mon, 6 Mar 2023 15:19:53 +0000 Subject: [PATCH] Allow - in labels again --- nerval/parse.py | 6 +----- tests/test_parse_bio.py | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/nerval/parse.py b/nerval/parse.py index 5b92763..edf2715 100644 --- a/nerval/parse.py +++ b/nerval/parse.py @@ -45,11 +45,7 @@ def parse_line(index: int, line: str, path: Path): assert match_iob - word, label = match_iob.group(1, 2) - # We should have either one - (BLIU-) or none at all (O) - assert label.count("-") <= 1 - - return word, label + return match_iob.group(1, 2) except AssertionError: raise ( Exception( diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py index 625e9a2..09c3cb5 100644 --- a/tests/test_parse_bio.py +++ b/tests/test_parse_bio.py @@ -200,6 +200,7 @@ def test_parse_bio_no_input(): ("Hi B-ORG", "Hi", "B-ORG"), ("Hi B-Org or maybe not org", "Hi", "B-Org or maybe not org"), ("1258 B-Date et Lieu", "1258", "B-Date et Lieu"), + ("Devoti B-Sous-titre", "Devoti", "B-Sous-titre"), ), ) def test_parse_line(line, word, label): @@ -208,7 +209,7 @@ def test_parse_line(line, word, label): @pytest.mark.parametrize( "line", - (("HiB-ORG"), ("HiB-ORG or maybe not"), ("Hello B-surname and L-ocation")), + (("HiB-ORG"), ("HiB-ORG or maybe not")), ) def test_parse_line_crash(line): with pytest.raises(Exception): @@ -221,6 +222,7 @@ def test_parse_line_crash(line): ("B-ORG", "ORG"), ("B-Date et Lieu", "Date et Lieu"), ("I-Date et Lieu", "Date et Lieu"), + ("B-Sous-titre", "Sous-titre"), ), ) def test_get_type_label(label, expected_type): -- GitLab