Skip to content
Snippets Groups Projects

Catch runtimeError when formatting LM files

Merged Manon Blanco requested to merge catch-error-when-formatting-lm-file into main
All threads resolved!
Files
2
@@ -384,6 +384,9 @@ class ArkindexExtractor:
subword_vocab_size=self.subword_vocab_size,
)
if not tokenizer.sentencepiece_model:
return
for level, tokenize in (
("characters", tokenizer.char_tokenize),
("words", tokenizer.word_tokenize),
@@ -454,7 +457,7 @@ class ArkindexExtractor:
)
if failed_downloads:
logger.error(f"Failed to download {len(failed_downloads)} image(s).")
logger.warning(f"Failed to download {len(failed_downloads)} image(s).")
print(*list(map(": ".join, failed_downloads)), sep="\n")
def run(self):
Loading