diff --git a/tests/test_extract.py b/tests/test_extract.py index e6f999eea6a6c3e98c14f5a04fdd73eb8694046f..c20c4674e1158b1be8ef5d84a232a38faf589c52 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -170,9 +170,12 @@ def test_reconstruct_text_joined_entities(entity_separators, text_before, text_a ) +@pytest.mark.parametrize("joined", (True, False)) @pytest.mark.parametrize("text_before", ("", "text before ")) @pytest.mark.parametrize("text_after", ("", " text after")) -def test_reconstruct_text_only_start_token(text_before, text_after): +def test_reconstruct_text_only_start_token(joined, text_before, text_after): + separator = " " if not joined else "" + arkindex_extractor = ArkindexExtractor(entity_separators=[" ", "\n"]) arkindex_extractor.tokens = { "P": EntityType(start="ⓟ"), @@ -180,7 +183,7 @@ def test_reconstruct_text_only_start_token(text_before, text_after): } assert ( arkindex_extractor.reconstruct_text( - text_before + "LouisXIV" + text_after, + text_before + "Louis" + separator + "XIV" + text_after, [ Entity( offset=0 + len(text_before), @@ -189,12 +192,12 @@ def test_reconstruct_text_only_start_token(text_before, text_after): value="Louis", ), Entity( - offset=5 + len(text_before), + offset=5 + len(separator) + len(text_before), length=3, type="I", value="XIV", ), ], ) - == "ⓟLouisⓘXIV" + == "ⓟLouis" + separator + "ⓘXIV" )