From 9d66f70044455459d9cd0f54d09c21586c584efd Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Tue, 19 Dec 2023 09:06:14 +0100 Subject: [PATCH] Keep NER text as is --- dan/ocr/predict/attention.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dan/ocr/predict/attention.py b/dan/ocr/predict/attention.py index 7df72e6e..399f72b6 100644 --- a/dan/ocr/predict/attention.py +++ b/dan/ocr/predict/attention.py @@ -93,7 +93,7 @@ def compute_prob_by_ner( return zip( *[ ( - f"{characters[current: next_token]}".replace("\n", " "), + characters[current:next_token], np.mean(probabilities[current:next_token]), ) for current, next_token in indices @@ -154,10 +154,7 @@ def split_text( return [], [] indices = build_ner_indices(text, tokens) - text_split = [ - f"{text[current: next_token]}".replace("\n", " ") - for current, next_token in indices - ] + text_split = [text[current:next_token] for current, next_token in indices] case _: logger.error(f"Level should be either {list(map(str, Level))}") return [], [] -- GitLab