Skip to content
Snippets Groups Projects

Allow spaces in label (followup)

Merged Yoann Schneider requested to merge better-iob-parsing into master
2 files
+ 7
6
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 6
2
@@ -5,7 +5,7 @@ from pathlib import Path
NOT_ENTITY_TAG = "O"
BEGINNING_POS = ["B", "S", "U"]
REGEX_IOB_LINE = re.compile(r"^(.*) ((?:[BIESLU]-|O).*)$")
REGEX_IOB_LINE = re.compile(r"^(\S*) ((?:[BIESLU]-|O).*)$")
REGEX_LABEL = re.compile(r"[BIESLU]-(.*)$")
@@ -45,7 +45,11 @@ def parse_line(index: int, line: str, path: Path):
assert match_iob
return match_iob.group(1, 2)
word, label = match_iob.group(1, 2)
# We should have either one - (BLIU-) or none at all (O)
assert label.count("-") <= 1
return word, label
except AssertionError:
raise (
Exception(
Loading