Skip to content
Snippets Groups Projects
Commit 032c2267 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Allow spaces in label (followup)

parent de1275d2
No related branches found
No related tags found
1 merge request!25Allow spaces in label (followup)
Pipeline #104020 passed
......@@ -5,7 +5,7 @@ from pathlib import Path
NOT_ENTITY_TAG = "O"
BEGINNING_POS = ["B", "S", "U"]
REGEX_IOB_LINE = re.compile(r"^(.*) ([BIESLUO]-?.*)$")
REGEX_IOB_LINE = re.compile(r"^(\S*) ((?:[BIESLU]-|O).*)$")
REGEX_LABEL = re.compile(r"[BIESLU]-(.*)$")
......@@ -45,7 +45,11 @@ def parse_line(index: int, line: str, path: Path):
assert match_iob
return match_iob.group(1, 2)
word, label = match_iob.group(1, 2)
# We should have either one - (BLIU-) or none at all (O)
assert label.count("-") <= 1
return word, label
except AssertionError:
raise (
Exception(
......
......@@ -4,7 +4,7 @@ from pathlib import Path
import pytest
from nerval import evaluate
from nerval.parse import parse_line
from nerval.parse import get_type_label, parse_line
expected_parsed_annot = {
"entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
......@@ -199,6 +199,7 @@ def test_parse_bio_no_input():
(
("Hi B-ORG", "Hi", "B-ORG"),
("Hi B-Org or maybe not org", "Hi", "B-Org or maybe not org"),
("1258 B-Date et Lieu", "1258", "B-Date et Lieu"),
),
)
def test_parse_line(line, word, label):
......@@ -207,11 +208,20 @@ def test_parse_line(line, word, label):
@pytest.mark.parametrize(
"line",
(
("HiB-ORG"),
("HiB-ORG or maybe not"),
),
(("HiB-ORG"), ("HiB-ORG or maybe not"), ("Hello B-surname and L-ocation")),
)
def test_parse_line_crash(line):
with pytest.raises(Exception):
parse_line(index=0, line=line, path=Path(""))
@pytest.mark.parametrize(
"label, expected_type",
(
("B-ORG", "ORG"),
("B-Date et Lieu", "Date et Lieu"),
("I-Date et Lieu", "Date et Lieu"),
),
)
def test_get_type_label(label, expected_type):
assert get_type_label(label) == expected_type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment