From 64af952caf19cd9cb5d6c16cea63111001d004bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sol=C3=A8ne=20Tarride?= <starride@teklia.com> Date: Mon, 18 Sep 2023 09:23:36 +0200 Subject: [PATCH] Update tests for data extraction --- tests/test_extract.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 4e2672e5..2d4653ef 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -186,7 +186,6 @@ def test_normalize_spaces(text, trimmed): ("\rcarriage_return", "carriage_return"), ("\r\ncarriage_return+linebreak", "carriage_return+linebreak"), ("\n\r\r\n\ncarriage_return+linebreak", "carriage_return+linebreak"), - ("no|linebreaks", "no|linebreaks"), ), ) def test_normalize_linebreaks(text, trimmed): @@ -397,10 +396,9 @@ def test_extract( VAL_DIR / "val-page_1-line_2.jpg", VAL_DIR / "val-page_1-line_3.jpg", output / "labels.json", - # Language resources - output / "language_model" / "corpus.txt", - output / "language_model" / "lexicon.txt", - output / "language_model" / "tokens.txt", + output / "language_corpus.txt", + output / "language_lexicon.txt", + output / "language_tokens.txt", ] assert sorted(filter(methodcaller("is_file"), output.rglob("*"))) == expected_paths -- GitLab