-
Yoann Schneider authoredYoann Schneider authored
test_parse_bio.py 4.10 KiB
# -*- coding: utf-8 -*-
from pathlib import Path
import pytest
from nerval import evaluate
from nerval.parse import parse_line
expected_parsed_annot = {
"entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
"labels": [
"B-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"B-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"O",
"O",
"O",
"O",
"B-DAT",
"I-DAT",
"I-DAT",
"I-DAT",
"O",
"O",
],
"words": "Gérard de Nerval was born in Paris in 1808 .",
}
expected_parsed_predict = {
"entity_count": {"All": 3, "DAT": 1, "***": 1, "PER": 1},
"labels": [
"B-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"O",
"B-***",
"I-***",
"I-***",
"I-***",
"I-***",
"O",
"O",
"O",
"O",
"B-DAT",
"I-DAT",
"I-DAT",
"I-DAT",
"O",
"O",
"O",
],
"words": "G*rard de *N*erval bo*rn in Paris in 1833 *.",
}
expected_parsed_end_of_file = {
"entity_count": {"All": 3, "LOC": 2, "PER": 1},
"labels": [
"B-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"B-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"I-PER",
"O",
"B-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
"I-LOC",
],
"words": "Louis par la grâce de Dieu roy de France et de Navarre",
}
@pytest.mark.parametrize(
"test_input, expected",
[
(pytest.lazy_fixture("fake_annot_bio"), expected_parsed_annot),
(pytest.lazy_fixture("fake_predict_bio"), expected_parsed_predict),
(pytest.lazy_fixture("empty_bio"), None),
(pytest.lazy_fixture("bioeslu_bio"), expected_parsed_annot),
(pytest.lazy_fixture("end_of_file_bio"), expected_parsed_end_of_file),
],
)
def test_parse_bio(test_input, expected):
assert evaluate.parse_bio(test_input) == expected
def test_parse_bio_bad_input(bad_bio):
with pytest.raises(Exception):
evaluate.parse_bio(bad_bio)
def test_parse_bio_no_input():
with pytest.raises(AssertionError):
evaluate.parse_bio(Path("not_a_bio"))
@pytest.mark.parametrize(
"line, word, label",
(
("Hi B-ORG", "Hi", "B-ORG"),
("Hi B-Org or maybe not org", "Hi", "B-Org or maybe not org"),
),
)
def test_parse_line(line, word, label):
assert parse_line(index=0, line=line, path=Path("")) == (word, label)
@pytest.mark.parametrize(
"line",
(
("HiB-ORG"),
("HiB-ORG or maybe not"),
),
)
def test_parse_line_crash(line):
with pytest.raises(Exception):
parse_line(index=0, line=line, path=Path(""))