From 9d66f70044455459d9cd0f54d09c21586c584efd Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Tue, 19 Dec 2023 09:06:14 +0100
Subject: [PATCH] Keep NER text as is

---
 dan/ocr/predict/attention.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/dan/ocr/predict/attention.py b/dan/ocr/predict/attention.py
index 7df72e6e..399f72b6 100644
--- a/dan/ocr/predict/attention.py
+++ b/dan/ocr/predict/attention.py
@@ -93,7 +93,7 @@ def compute_prob_by_ner(
     return zip(
         *[
             (
-                f"{characters[current: next_token]}".replace("\n", " "),
+                characters[current:next_token],
                 np.mean(probabilities[current:next_token]),
             )
             for current, next_token in indices
@@ -154,10 +154,7 @@ def split_text(
                 return [], []
 
             indices = build_ner_indices(text, tokens)
-            text_split = [
-                f"{text[current: next_token]}".replace("\n", " ")
-                for current, next_token in indices
-            ]
+            text_split = [text[current:next_token] for current, next_token in indices]
         case _:
             logger.error(f"Level should be either {list(map(str, Level))}")
             return [], []
-- 
GitLab