# -*- coding: utf-8 -*- import pytest from nerval import evaluate NO_EXIST_BIO = "no_exist.bio" EMPTY_BIO = "tests/test_empty.bio" BAD_BIO = "tests/test_bad.bio" FAKE_ANNOT_BIO = "tests/test_annot.bio" FAKE_PREDICT_BIO = "tests/test_predict.bio" BIOESLU_BIO = "tests/bioeslu.bio" END_OF_FILE_BIO = "tests/end_of_file.bio" expected_parsed_annot = { "entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1}, "labels": [ "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "O", "O", "O", "O", "B-DAT", "I-DAT", "I-DAT", "I-DAT", "O", "O", ], "words": "Gérard de Nerval was born in Paris in 1808 .", } expected_parsed_predict = { "entity_count": {"All": 3, "DAT": 1, "***": 1, "PER": 1}, "labels": [ "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-***", "I-***", "I-***", "I-***", "I-***", "O", "O", "O", "O", "B-DAT", "I-DAT", "I-DAT", "I-DAT", "O", "O", "O", ], "words": "G*rard de *N*erval bo*rn in Paris in 1833 *.", } expected_parsed_end_of_file = { "entity_count": {"All": 3, "LOC": 2, "PER": 1}, "labels": [ "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "O", "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", ], "words": "Louis par la grâce de Dieu roy de France et de Navarre", } @pytest.mark.parametrize( "test_input, expected", [ (FAKE_ANNOT_BIO, expected_parsed_annot), (FAKE_PREDICT_BIO, expected_parsed_predict), (EMPTY_BIO, None), (BIOESLU_BIO, expected_parsed_annot), (END_OF_FILE_BIO, expected_parsed_end_of_file), ], ) def test_parse_bio(test_input, expected): assert evaluate.parse_bio(test_input) == expected def test_parse_bio_bad_input(): with pytest.raises(Exception): evaluate.parse_bio(BAD_BIO) def test_parse_bio_no_input(): with pytest.raises(AssertionError): evaluate.parse_bio(NO_EXIST_BIO)