From 2cd63f5fa6c8715b0388e4ab41ea028b30b772a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sol=C3=A8ne=20Tarride?= <starride@teklia.com> Date: Thu, 12 Oct 2023 14:00:30 +0200 Subject: [PATCH] Use the same space token as sentencepiece --- dan/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dan/utils.py b/dan/utils.py index 69e7d82a..e86e08f5 100644 --- a/dan/utils.py +++ b/dan/utils.py @@ -22,7 +22,7 @@ class Token(NamedTuple): class LMTokenMapping(NamedTuple): - space: Token = Token("⎵", " ") + space: Token = Token("â–", " ") linebreak: Token = Token("↵", "\n") ctc: Token = Token("â—Œ", "<ctc>") -- GitLab