Skip to content
Snippets Groups Projects
Commit a72f3bea authored by Solene Tarride's avatar Solene Tarride
Browse files

Write tests for data extraction

parent 7cb926f1
No related branches found
No related tags found
No related merge requests found
......@@ -468,8 +468,8 @@ def test_extract(
ⓢ B a r e y r e ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 2 8 . 3 . 1 1
ⓢ R o u s s y ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 4 . 1 1 . 1 4
ⓢ M a r i n ⎵ ⎵ ⓕ M a r c e l ⎵ ⎵ ⓑ 1 0 . 8 . 0 6
A m i c a l ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4
B i r o s ⎵ ⎵ ⓕ M a e l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0"""
R o q u e s ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4
G i r o s ⎵ ⎵ ⓕ P a u l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0"""
# Transcriptions with worker version are in lowercase
if transcription_entities_worker_version:
......@@ -486,22 +486,20 @@ def test_extract(
"", expected_language_corpus
)
assert (
output / "language_model" / "corpus.txt"
).read_text() == expected_language_corpus
assert (output / "language_corpus.txt").read_text() == expected_language_corpus
# Check "language_tokens.txt"
expected_language_tokens = [
t if t != " " else "" for t in sorted(list(expected_charset))
]
expected_language_tokens.append("")
assert (output / "language_model" / "tokens.txt").read_text() == "\n".join(
assert (output / "language_tokens.txt").read_text() == "\n".join(
expected_language_tokens
)
# Check "language_lexicon.txt"
expected_language_lexicon = [f"{t} {t}" for t in expected_language_tokens]
assert (output / "language_model" / "lexicon.txt").read_text() == "\n".join(
assert (output / "language_lexicon.txt").read_text() == "\n".join(
expected_language_lexicon
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment