Skip to content
Snippets Groups Projects
Commit 044ba51c authored by Solene Tarride's avatar Solene Tarride
Browse files

Implement tests

parent 7b32a134
No related branches found
No related tags found
1 merge request!8Draft: Support nested entities
Pipeline #203241 failed
import pytest
from bio_parser.parse.document import Document, Span, Tag, Token, _make_ner_label
from bio_parser.parse.document import Document, Span, Tag, Token
from bio_parser.parse.nested_document import NestedDocument, NestedToken
from tests.parse import DATA_DIR
......@@ -33,7 +33,8 @@ def test_parse_document(nested_document: NestedDocument):
# Check word entities
assert nested_document.word_entities == [
(["child", "name"], "Charles"),
(["child"], ""), (["child"], "à"),
(["child"], ""),
(["child"], "à"),
(["child", "location"], "Beaune"),
(["child"], "en"),
(["child", "date"], "1836"),
......@@ -54,3 +55,74 @@ def test_parse_document(nested_document: NestedDocument):
"Charles né à Beaune en 1836 père Jean Bigre charpentier de cette paroisse mère Marie"
)
def test_parse_nested_token(nested_document: NestedDocument):
nested_token: NestedToken = nested_document.nested_tokens[0]
# Check word
assert nested_token.word == "Charles"
# Check label
assert nested_token.labels == ["child", "name"]
# Check label
assert nested_token.tags == [Tag.BEGINNING, Tag.BEGINNING]
# Check IOB Label
assert nested_token.iob_labels == ["B-child", "B-name"]
# Check labels
assert nested_token.char_labels == [
['B-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child'],
['B-name', 'I-name', 'I-name', 'I-name', 'I-name', 'I-name', 'I-name']
]
# Check chars
assert nested_token.chars == ["C", "h", "a", "r", "l", "e", "s"]
# I- token
nested_token: NestedToken = nested_document.nested_tokens[3]
# Check word
assert nested_token.word == "Beaune"
# Check label
assert nested_token.labels == ["child", "location"]
# Check label
assert nested_token.tags == [Tag.INSIDE, Tag.BEGINNING]
# Check IOB Label
assert nested_token.iob_labels == ["I-child", "B-location"]
# Check labels
assert nested_token.char_labels == [
['I-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child'],
['B-location', 'I-location', 'I-location', 'I-location', 'I-location', 'I-location']
]
# Check chars
assert nested_token.chars == ["B", "e", "a", "u", "n", "e"]
# O token
nested_token: NestedToken = nested_document.nested_tokens[-2]
# Check word
assert nested_token.word == "mère"
# Check label
assert nested_token.labels == [None]
# Check label
assert nested_token.tags == [Tag.OUTSIDE]
# Check IOB Label
assert nested_token.iob_labels == ["O"]
# Check labels
assert nested_token.char_labels == [['O', 'O', 'O', 'O']]
# Check chars
assert nested_token.chars == ["m", "è", "r", "e"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment