Skip to content
Snippets Groups Projects
Commit b57d5979 authored by Manon Blanco's avatar Manon Blanco Committed by Manon Blanco
Browse files

Test with only start token AND separators

parent a0369be2
No related branches found
No related tags found
1 merge request!214Filter entities by name when extracting data from Arkindex
This commit is part of merge request !214. Comments created here will be created in the context of that merge request.
......@@ -170,9 +170,12 @@ def test_reconstruct_text_joined_entities(entity_separators, text_before, text_a
)
@pytest.mark.parametrize("joined", (True, False))
@pytest.mark.parametrize("text_before", ("", "text before "))
@pytest.mark.parametrize("text_after", ("", " text after"))
def test_reconstruct_text_only_start_token(text_before, text_after):
def test_reconstruct_text_only_start_token(joined, text_before, text_after):
separator = " " if not joined else ""
arkindex_extractor = ArkindexExtractor(entity_separators=[" ", "\n"])
arkindex_extractor.tokens = {
"P": EntityType(start=""),
......@@ -180,7 +183,7 @@ def test_reconstruct_text_only_start_token(text_before, text_after):
}
assert (
arkindex_extractor.reconstruct_text(
text_before + "LouisXIV" + text_after,
text_before + "Louis" + separator + "XIV" + text_after,
[
Entity(
offset=0 + len(text_before),
......@@ -189,12 +192,12 @@ def test_reconstruct_text_only_start_token(text_before, text_after):
value="Louis",
),
Entity(
offset=5 + len(text_before),
offset=5 + len(separator) + len(text_before),
length=3,
type="I",
value="XIV",
),
],
)
== "ⓟLouisⓘXIV"
== "ⓟLouis" + separator + "ⓘXIV"
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment