From 829ad8f78e3b2c028a9ac4730cfd0679306c3609 Mon Sep 17 00:00:00 2001
From: Yoann Schneider <yschneider@teklia.com>
Date: Thu, 29 Jun 2023 16:56:23 +0200
Subject: [PATCH] Fix entity splitting

---
 dan/predict/prediction.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dan/predict/prediction.py b/dan/predict/prediction.py
index bf82b19d..ebda14f9 100644
--- a/dan/predict/prediction.py
+++ b/dan/predict/prediction.py
@@ -326,7 +326,8 @@ def process_image(
                 "text": f"{text[current: next_token]}".replace("\n", " "),
                 "confidence_ner": f"{np.around(np.mean(char_confidences[current : next_token]), 2)}",
             }
-            for current, next_token in pairwise(index + [0])
+            # We go up to -1 so that the last token matches until the end of the text
+            for current, next_token in pairwise(index + [-1])
         ]
         result["confidences"]["total"] = np.around(np.mean(char_confidences), 2)
 
-- 
GitLab