From 044ba51cbcde0f7b451c6d2253e8144e6b0caaac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sol=C3=A8ne=20Tarride?= <starride@teklia.com>
Date: Thu, 12 Dec 2024 19:06:41 +0100
Subject: [PATCH] Implement tests

---
 tests/parse/test_nested_document.py | 76 ++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/tests/parse/test_nested_document.py b/tests/parse/test_nested_document.py
index 8bb7d21..d2474e4 100644
--- a/tests/parse/test_nested_document.py
+++ b/tests/parse/test_nested_document.py
@@ -1,5 +1,5 @@
 import pytest
-from bio_parser.parse.document import Document, Span, Tag, Token, _make_ner_label
+from bio_parser.parse.document import Document, Span, Tag, Token
 from bio_parser.parse.nested_document import NestedDocument, NestedToken
 
 from tests.parse import DATA_DIR
@@ -33,7 +33,8 @@ def test_parse_document(nested_document: NestedDocument):
     # Check word entities
     assert nested_document.word_entities == [
         (["child", "name"], "Charles"), 
-        (["child"], "né"), (["child"], "à"), 
+        (["child"], "né"), 
+        (["child"], "à"), 
         (["child", "location"], "Beaune"), 
         (["child"], "en"), 
         (["child", "date"], "1836"), 
@@ -54,3 +55,74 @@ def test_parse_document(nested_document: NestedDocument):
         "Charles né à Beaune en 1836 père Jean Bigre charpentier de cette paroisse mère Marie"
     )
 
+
+
+def test_parse_nested_token(nested_document: NestedDocument):
+    nested_token: NestedToken = nested_document.nested_tokens[0]
+
+    # Check word
+    assert nested_token.word == "Charles"
+
+    # Check label
+    assert nested_token.labels == ["child", "name"]
+
+    # Check label
+    assert nested_token.tags == [Tag.BEGINNING, Tag.BEGINNING]
+
+    # Check IOB Label
+    assert nested_token.iob_labels == ["B-child", "B-name"]
+
+    # Check labels
+    assert nested_token.char_labels == [
+        ['B-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child'], 
+        ['B-name', 'I-name', 'I-name', 'I-name', 'I-name', 'I-name', 'I-name']
+        ]
+
+    # Check chars
+    assert nested_token.chars == ["C", "h", "a", "r", "l", "e", "s"]
+
+    # I- token
+    nested_token: NestedToken = nested_document.nested_tokens[3]
+
+    # Check word
+    assert nested_token.word == "Beaune"
+
+    # Check label
+    assert nested_token.labels == ["child", "location"]
+
+    # Check label
+    assert nested_token.tags == [Tag.INSIDE, Tag.BEGINNING]
+
+    # Check IOB Label
+    assert nested_token.iob_labels == ["I-child", "B-location"]
+
+    # Check labels
+    assert nested_token.char_labels == [
+        ['I-child', 'I-child', 'I-child', 'I-child', 'I-child', 'I-child'], 
+        ['B-location', 'I-location', 'I-location', 'I-location', 'I-location', 'I-location']
+        ]
+
+    # Check chars
+    assert nested_token.chars == ["B", "e", "a", "u", "n", "e"]
+
+    # O token
+    nested_token: NestedToken = nested_document.nested_tokens[-2]
+
+    # Check word
+    assert nested_token.word == "mère"
+
+    # Check label
+    assert nested_token.labels == [None]
+
+    # Check label
+    assert nested_token.tags == [Tag.OUTSIDE]
+
+    # Check IOB Label
+    assert nested_token.iob_labels == ["O"]
+
+    # Check labels
+    assert nested_token.char_labels == [['O', 'O', 'O', 'O']]
+
+    # Check chars
+    assert nested_token.chars == ["m", "è", "r", "e"]
+
-- 
GitLab