Skip to content
Snippets Groups Projects
Commit dd279dab authored by Solene Tarride's avatar Solene Tarride
Browse files

Use CTCHypothesis.tokens instead og CTCHypothesis.words

parent bf71ddcb
No related branches found
No related tags found
No related merge requests found
......@@ -495,6 +495,7 @@ class CTCLanguageDecoder:
self.tokens_to_index = {
token: i for i, token in enumerate(read_txt(tokens_path).split("\n"))
}
self.index_to_token = {i: token for token, i in self.tokens_to_index.items()}
self.blank_token_id = self.tokens_to_index[self.mapping.ctc.encoded]
self.decoder = ctc_decoder(
lm=language_model_path,
......@@ -516,7 +517,6 @@ class CTCLanguageDecoder:
high_prob = batch_features.max()
low_prob = batch_features.min()
batch_size, n_frames, n_tokens = batch_features.shape
# Reset probabilities for the CTC token
batch_features[:, :, -1] = (
torch.ones((batch_size, n_frames), dtype=torch.float32) * low_prob
......@@ -553,10 +553,10 @@ class CTCLanguageDecoder:
out["text"] = [
"".join(
[
self.mapping.display[token]
if token in self.mapping.display
else token
for token in hypothesis[0].words
self.mapping.display[self.index_to_token[token]]
if self.index_to_token[token] in self.mapping.display
else self.index_to_token[token]
for token in hypothesis[0].tokens.tolist()
]
)
for hypothesis in hypotheses
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment