From b84e25c3a831f9a98bd3de1a37651ce93c401b53 Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Mon, 6 Nov 2023 17:05:09 +0100 Subject: [PATCH] Update logs --- dan/datasets/extract/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dan/datasets/extract/utils.py b/dan/datasets/extract/utils.py index dd57da22..6bd3693c 100644 --- a/dan/datasets/extract/utils.py +++ b/dan/datasets/extract/utils.py @@ -193,10 +193,12 @@ class Tokenizer: vocab_size=self.subword_vocab_size, model_prefix=self.prefix, user_defined_symbols=self.special_tokens, + minloglevel=1, ) except Exception as e: logger.warning( - f"Failed to train a sentencepiece model for subword tokenization: {e}" + f"Failed to train a sentencepiece model for subword tokenization: {e} " + "Try again by editing the `--subword-vocab-size` parameter." ) self.sentencepiece_model = None return -- GitLab