diff --git a/dan/datasets/extract/extract.py b/dan/datasets/extract/extract.py
index 0d7002b58e26d705b1578678764414d0d3176402..a7fa8d32e32c70ac8cef0c7b95523141e19bc26f 100644
--- a/dan/datasets/extract/extract.py
+++ b/dan/datasets/extract/extract.py
@@ -36,13 +36,13 @@ from dan.datasets.extract.utils import (
     normalize_linebreaks,
     normalize_spaces,
 )
+from dan.utils import EntityType, LMTokenMapping, parse_tokens
+from line_image_extractor.extractor import extract
 from line_image_extractor.image_utils import (
     BoundingBox,
     Extraction,
     polygon_to_bbox,
 )
-from dan.utils import EntityType, LMTokenMapping, parse_tokens
-from line_image_extractor.extractor import extract
 
 IMAGES_DIR = "images"  # Subpath to the images directory.
 LANGUAGE_DIR = "language_model"  # Subpath to the language model directory.
diff --git a/dan/datasets/extract/utils.py b/dan/datasets/extract/utils.py
index 5e867a39e4c9bde0b94d7989da7e2439deba889f..a2184f0777c13d5aca2b15d4005b9b2c10f0a804 100644
--- a/dan/datasets/extract/utils.py
+++ b/dan/datasets/extract/utils.py
@@ -81,6 +81,7 @@ def insert_token(text: str, entity_type: EntityType, offset: int, length: int) -
         + (entity_type.end if entity_type else "")
     )
 
+
 def normalize_linebreaks(text: str) -> str:
     """
     Remove begin/ending linebreaks
@@ -106,4 +107,3 @@ def get_bbox(polygon: List[List[int]]) -> str:
     x, y = min(all_x), min(all_y)
     width, height = max(all_x) - x, max(all_y) - y
     return ",".join(list(map(str, [int(x), int(y), int(width), int(height)])))
-
diff --git a/dan/ocr/decoder.py b/dan/ocr/decoder.py
index bd858c16a56e551157d4d497cfe8ba22a85fea91..b4da94d0464c37d6543b8b7310faa5fbed06776b 100644
--- a/dan/ocr/decoder.py
+++ b/dan/ocr/decoder.py
@@ -505,8 +505,8 @@ class CTCLanguageDecoder:
             tokens=tokens_path,
             lm_weight=self.language_model_weight,
             blank_token=self.mapping.ctc.encoded,
-            unk_word=self.mapping.unknown.encoded,
             sil_token=self.mapping.space.encoded,
+            unk_word="⁇",
             nbest=1,
         )
         # No GPU support
diff --git a/dan/utils.py b/dan/utils.py
index c65df263f1789e2799e1cc9c8cadf1b63dfea9f7..69e7d82ac2610d5fbdfedf336df69177b3fd8471 100644
--- a/dan/utils.py
+++ b/dan/utils.py
@@ -25,7 +25,6 @@ class LMTokenMapping(NamedTuple):
     space: Token = Token("⎵", " ")
     linebreak: Token = Token("↵", "\n")
     ctc: Token = Token("◌", "<ctc>")
-    unknown: Token = Token("⁇", "<unk>")
 
     @property
     def display(self):
diff --git a/tests/test_extract.py b/tests/test_extract.py
index 0a5e09559e6b81b12d5972c142642e67aa629ee6..cfd78846d19351d206e4fc689b6ac742e9c759ab 100644
--- a/tests/test_extract.py
+++ b/tests/test_extract.py
@@ -470,8 +470,8 @@ def test_extract(
 ⓢ B a r e y r e ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 2 8 . 3 . 1 1
 ⓢ R o u s s y ⎵ ⎵ ⓕ J e a n ⎵ ⎵ ⓑ 4 . 1 1 . 1 4
 ⓢ M a r i n ⎵ ⎵ ⓕ M a r c e l ⎵ ⎵ ⓑ 1 0 . 8 . 0 6
-ⓢ R o q u e s ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4
-ⓢ G i r o s ⎵ ⎵ ⓕ P a u l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0"""
+ⓢ A m i c a l ⎵ ⎵ ⓕ E l o i ⎵ ⎵ ⓑ 1 1 . 1 0 . 0 4
+ⓢ B i r o s ⎵ ⎵ ⓕ M a e l ⎵ ⎵ ⓑ 3 0 . 1 0 . 1 0"""
 
     # Transcriptions with worker version are in lowercase
     if transcription_entities_worker_version: