Skip to content
Snippets Groups Projects

Catch runtimeError when formatting LM files

Merged Manon Blanco requested to merge catch-error-when-formatting-lm-file into main
All threads resolved!
@@ -193,10 +193,12 @@ class Tokenizer:
vocab_size=self.subword_vocab_size,
model_prefix=self.prefix,
user_defined_symbols=self.special_tokens,
minloglevel=1,
)
except Exception as e:
logger.warning(
f"Failed to train a sentencepiece model for subword tokenization: {e}"
f"Failed to train a sentencepiece model for subword tokenization: {e} "
"Try again by editing the `--subword-vocab-size` parameter."
)
self.sentencepiece_model = None
return
Loading