From 2cd63f5fa6c8715b0388e4ab41ea028b30b772a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sol=C3=A8ne=20Tarride?= <starride@teklia.com>
Date: Thu, 12 Oct 2023 14:00:30 +0200
Subject: [PATCH] Use the same space token as sentencepiece

---
 dan/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dan/utils.py b/dan/utils.py
index 69e7d82a..e86e08f5 100644
--- a/dan/utils.py
+++ b/dan/utils.py
@@ -22,7 +22,7 @@ class Token(NamedTuple):
 
 
 class LMTokenMapping(NamedTuple):
-    space: Token = Token("⎵", " ")
+    space: Token = Token("▁", " ")
     linebreak: Token = Token("↵", "\n")
     ctc: Token = Token("◌", "<ctc>")
 
-- 
GitLab