Skip to content
Snippets Groups Projects
Commit 64af952c authored by Solene Tarride's avatar Solene Tarride
Browse files

Update tests for data extraction

parent 02851fd3
No related branches found
No related tags found
1 merge request!287Support subword and word language models
......@@ -186,7 +186,6 @@ def test_normalize_spaces(text, trimmed):
("\rcarriage_return", "carriage_return"),
("\r\ncarriage_return+linebreak", "carriage_return+linebreak"),
("\n\r\r\n\ncarriage_return+linebreak", "carriage_return+linebreak"),
("no|linebreaks", "no|linebreaks"),
),
)
def test_normalize_linebreaks(text, trimmed):
......@@ -397,10 +396,9 @@ def test_extract(
VAL_DIR / "val-page_1-line_2.jpg",
VAL_DIR / "val-page_1-line_3.jpg",
output / "labels.json",
# Language resources
output / "language_model" / "corpus.txt",
output / "language_model" / "lexicon.txt",
output / "language_model" / "tokens.txt",
output / "language_corpus.txt",
output / "language_lexicon.txt",
output / "language_tokens.txt",
]
assert sorted(filter(methodcaller("is_file"), output.rglob("*"))) == expected_paths
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment