# Copyright Teklia (contact@teklia.com) & Denis Coquenet
# This code is licensed under CeCILL-C

import logging

import pytest

from dan.bio import convert
from dan.utils import EntityType

ST_TEXT = """ⒶBryan B ⒷParis ⒸJanuary 1st, 1987
ⒶJoe J ⒷGrenoble ⒸAugust 24, 1995
ⒶHannah H ⒷLille ⒸSeptember 15, 2002"""

ST_ET_TEXT = """ⒶBryanⒷ and ⒶJoeⒷ will visit the ⒸEiffel TowerⒹ in ⒸParisⒹ next ⒺTuesdayⒻ.
ⒶHannahⒷ will visit the ⒸPlace ⒶCharles de GaulleⒷ étoileⒹ on ⒺWednesdayⒻ."""


def test_convert_with_error():
    ner_tokens = {
        "Person": EntityType(start="Ⓐ", end="Ⓑ"),
        "Location": EntityType(start="Ⓒ", end="Ⓓ"),
    }

    with pytest.raises(
        AssertionError, match="Ending token Ⓓ doesn't match the starting token Ⓐ"
    ):
        convert("ⒶFredⒹ", ner_tokens)


def test_convert_with_warnings(caplog):
    ner_tokens = {
        "Person": EntityType(start="Ⓐ", end="Ⓑ"),
        "Location": EntityType(start="Ⓒ", end="Ⓓ"),
    }

    assert convert("BryanⒷ and ⒶJoeⒷ will visit the Eiffel TowerⒹ", ner_tokens).split(
        "\n"
    ) == [
        "Bryan O",
        "and O",
        "Joe B-Person",
        "will O",
        "visit O",
        "the O",
        "Eiffel O",
        "Tower O",
    ]
    assert [(level, message) for _, level, message in caplog.record_tuples] == [
        (
            logging.WARNING,
            "Missing starting token for ending token Ⓑ, skipping the entity",
        ),
        (
            logging.WARNING,
            "Missing starting token for ending token Ⓓ, skipping the entity",
        ),
    ]


def test_convert_starting_tokens():
    ner_tokens = {
        "Person": EntityType(start="Ⓐ"),
        "Location": EntityType(start="Ⓑ"),
        "Date": EntityType(start="Ⓒ"),
    }

    assert convert(ST_TEXT, ner_tokens).split("\n") == [
        "Bryan B-Person",
        "B I-Person",
        "Paris B-Location",
        "January B-Date",
        "1st, I-Date",
        "1987 I-Date",
        "Joe B-Person",
        "J I-Person",
        "Grenoble B-Location",
        "August B-Date",
        "24, I-Date",
        "1995 I-Date",
        "Hannah B-Person",
        "H I-Person",
        "Lille B-Location",
        "September B-Date",
        "15, I-Date",
        "2002 I-Date",
    ]


def test_convert_starting_and_ending_tokens():
    ner_tokens = {
        "Person": EntityType(start="Ⓐ", end="Ⓑ"),
        "Location": EntityType(start="Ⓒ", end="Ⓓ"),
        "Date": EntityType(start="Ⓔ", end="Ⓕ"),
    }

    assert convert(ST_ET_TEXT, ner_tokens).split("\n") == [
        "Bryan B-Person",
        "and O",
        "Joe B-Person",
        "will O",
        "visit O",
        "the O",
        "Eiffel B-Location",
        "Tower I-Location",
        "in O",
        "Paris B-Location",
        "next O",
        "Tuesday B-Date",
        ". O",
        "Hannah B-Person",
        "will O",
        "visit O",
        "the O",
        "Place B-Location",
        "Charles B-Person",
        "de I-Person",
        "Gaulle I-Person",
        "étoile I-Location",
        "on O",
        "Wednesday B-Date",
        ". O",
    ]