diff --git a/tests/test_extract.py b/tests/test_extract.py index 483324b56d716e9c5fcc46d874c5009ca366ca66..fb9fb2521172da3f2a6732be60cae2e023b12a8d 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -468,8 +468,8 @@ def test_extract( ⓢ B a r e y r e ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 2 8 . 3 . 1 1 ⓢ R o u s s y ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 4 . 1 1 . 1 4 ⓢ M a r i n ⎵ ⎵ ⓕ M a r c e l ⎵ ⎵ ⓑ 1 0 . 8 . 0 6 -ⓢ A m i c a l ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4 -ⓢ B i r o s ⎵ ⎵ ⓕ M a e l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0""" +ⓢ R o q u e s ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4 +ⓢ G i r o s ⎵ ⎵ ⓕ P a u l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0""" # Transcriptions with worker version are in lowercase if transcription_entities_worker_version: @@ -486,22 +486,20 @@ def test_extract( "⎵", expected_language_corpus ) - assert ( - output / "language_model" / "corpus.txt" - ).read_text() == expected_language_corpus + assert (output / "language_corpus.txt").read_text() == expected_language_corpus # Check "language_tokens.txt" expected_language_tokens = [ t if t != " " else "⎵" for t in sorted(list(expected_charset)) ] expected_language_tokens.append("◌") - assert (output / "language_model" / "tokens.txt").read_text() == "\n".join( + assert (output / "language_tokens.txt").read_text() == "\n".join( expected_language_tokens ) # Check "language_lexicon.txt" expected_language_lexicon = [f"{t} {t}" for t in expected_language_tokens] - assert (output / "language_model" / "lexicon.txt").read_text() == "\n".join( + assert (output / "language_lexicon.txt").read_text() == "\n".join( expected_language_lexicon )