From 98dce1d8b392c38f36c137688a0f8c6d1a397e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sol=C3=A8ne=20Tarride?= <starride@teklia.com> Date: Mon, 18 Sep 2023 09:23:36 +0200 Subject: [PATCH] Update tests for data extraction --- tests/test_extract.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index cfd78846..483324b5 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -185,7 +185,6 @@ def test_normalize_spaces(text, trimmed): ("\rcarriage_return", "carriage_return"), ("\r\ncarriage_return+linebreak", "carriage_return+linebreak"), ("\n\r\r\n\ncarriage_return+linebreak", "carriage_return+linebreak"), - ("no|linebreaks", "no|linebreaks"), ), ) def test_normalize_linebreaks(text, trimmed): @@ -396,10 +395,9 @@ def test_extract( VAL_DIR / "val-page_1-line_2.jpg", VAL_DIR / "val-page_1-line_3.jpg", output / "labels.json", - # Language resources - output / "language_model" / "corpus.txt", - output / "language_model" / "lexicon.txt", - output / "language_model" / "tokens.txt", + output / "language_corpus.txt", + output / "language_lexicon.txt", + output / "language_tokens.txt", ] assert sorted(filter(methodcaller("is_file"), output.rglob("*"))) == expected_paths -- GitLab