# -*- coding: utf-8 -*-
import pytest

from nerval import evaluate

NO_EXIST_BIO = "no_exist.bio"
EMPTY_BIO = "tests/test_empty.bio"
BAD_BIO = "tests/test_bad.bio"
FAKE_ANNOT_BIO = "tests/test_annot.bio"
FAKE_PREDICT_BIO = "tests/test_predict.bio"
BIOESLU_BIO = "tests/bioeslu.bio"
END_OF_FILE_BIO = "tests/end_of_file.bio"


expected_parsed_annot = {
    "entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "B-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "O",
        "O",
        "O",
        "O",
        "B-DAT",
        "I-DAT",
        "I-DAT",
        "I-DAT",
        "O",
        "O",
    ],
    "words": "Gérard de Nerval was born in Paris in 1808 .",
}

expected_parsed_predict = {
    "entity_count": {"All": 3, "DAT": 1, "***": 1, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "O",
        "B-***",
        "I-***",
        "I-***",
        "I-***",
        "I-***",
        "O",
        "O",
        "O",
        "O",
        "B-DAT",
        "I-DAT",
        "I-DAT",
        "I-DAT",
        "O",
        "O",
        "O",
    ],
    "words": "G*rard de *N*erval bo*rn in Paris in 1833 *.",
}

expected_parsed_end_of_file = {
    "entity_count": {"All": 3, "LOC": 2, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "B-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "I-PER",
        "O",
        "B-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
        "I-LOC",
    ],
    "words": "Louis par la grâce de Dieu roy de France et de Navarre",
}


@pytest.mark.parametrize(
    "test_input, expected",
    [
        (FAKE_ANNOT_BIO, expected_parsed_annot),
        (FAKE_PREDICT_BIO, expected_parsed_predict),
        (EMPTY_BIO, None),
        (BIOESLU_BIO, expected_parsed_annot),
        (END_OF_FILE_BIO, expected_parsed_end_of_file),
    ],
)
def test_parse_bio(test_input, expected):
    assert evaluate.parse_bio(test_input) == expected


def test_parse_bio_bad_input():
    with pytest.raises(Exception):
        evaluate.parse_bio(BAD_BIO)


def test_parse_bio_no_input():
    with pytest.raises(AssertionError):
        evaluate.parse_bio(NO_EXIST_BIO)