From 68c879317b2d6ee9946dcc4d803d752b4e2da779 Mon Sep 17 00:00:00 2001
From: Yoann Schneider <yschneider@teklia.com>
Date: Mon, 6 Mar 2023 15:19:53 +0000
Subject: [PATCH] Allow - in labels again

---
 nerval/parse.py         | 6 +-----
 tests/test_parse_bio.py | 4 +++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/nerval/parse.py b/nerval/parse.py
index 5b92763..edf2715 100644
--- a/nerval/parse.py
+++ b/nerval/parse.py
@@ -45,11 +45,7 @@ def parse_line(index: int, line: str, path: Path):
 
         assert match_iob
 
-        word, label = match_iob.group(1, 2)
-        # We should have either one - (BLIU-) or none at all (O)
-        assert label.count("-") <= 1
-
-        return word, label
+        return match_iob.group(1, 2)
     except AssertionError:
         raise (
             Exception(
diff --git a/tests/test_parse_bio.py b/tests/test_parse_bio.py
index 625e9a2..09c3cb5 100644
--- a/tests/test_parse_bio.py
+++ b/tests/test_parse_bio.py
@@ -200,6 +200,7 @@ def test_parse_bio_no_input():
         ("Hi B-ORG", "Hi", "B-ORG"),
         ("Hi B-Org or maybe not org", "Hi", "B-Org or maybe not org"),
         ("1258 B-Date et Lieu", "1258", "B-Date et Lieu"),
+        ("Devoti B-Sous-titre", "Devoti", "B-Sous-titre"),
     ),
 )
 def test_parse_line(line, word, label):
@@ -208,7 +209,7 @@ def test_parse_line(line, word, label):
 
 @pytest.mark.parametrize(
     "line",
-    (("HiB-ORG"), ("HiB-ORG or maybe not"), ("Hello B-surname and L-ocation")),
+    (("HiB-ORG"), ("HiB-ORG or maybe not")),
 )
 def test_parse_line_crash(line):
     with pytest.raises(Exception):
@@ -221,6 +222,7 @@ def test_parse_line_crash(line):
         ("B-ORG", "ORG"),
         ("B-Date et Lieu", "Date et Lieu"),
         ("I-Date et Lieu", "Date et Lieu"),
+        ("B-Sous-titre", "Sous-titre"),
     ),
 )
 def test_get_type_label(label, expected_type):
-- 
GitLab