From 9af2a5cb18a26f2b37e85d3201c13e56fc7b3f6b Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Thu, 20 Jul 2023 11:17:20 +0200 Subject: [PATCH] Test with only start token AND separators --- tests/test_extract.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index e6f999ee..c20c4674 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -170,9 +170,12 @@ def test_reconstruct_text_joined_entities(entity_separators, text_before, text_a ) +@pytest.mark.parametrize("joined", (True, False)) @pytest.mark.parametrize("text_before", ("", "text before ")) @pytest.mark.parametrize("text_after", ("", " text after")) -def test_reconstruct_text_only_start_token(text_before, text_after): +def test_reconstruct_text_only_start_token(joined, text_before, text_after): + separator = " " if not joined else "" + arkindex_extractor = ArkindexExtractor(entity_separators=[" ", "\n"]) arkindex_extractor.tokens = { "P": EntityType(start="ⓟ"), @@ -180,7 +183,7 @@ def test_reconstruct_text_only_start_token(text_before, text_after): } assert ( arkindex_extractor.reconstruct_text( - text_before + "LouisXIV" + text_after, + text_before + "Louis" + separator + "XIV" + text_after, [ Entity( offset=0 + len(text_before), @@ -189,12 +192,12 @@ def test_reconstruct_text_only_start_token(text_before, text_after): value="Louis", ), Entity( - offset=5 + len(text_before), + offset=5 + len(separator) + len(text_before), length=3, type="I", value="XIV", ), ], ) - == "ⓟLouisⓘXIV" + == "ⓟLouis" + separator + "ⓘXIV" ) -- GitLab