From 829ad8f78e3b2c028a9ac4730cfd0679306c3609 Mon Sep 17 00:00:00 2001 From: Yoann Schneider <yschneider@teklia.com> Date: Thu, 29 Jun 2023 16:56:23 +0200 Subject: [PATCH] Fix entity splitting --- dan/predict/prediction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dan/predict/prediction.py b/dan/predict/prediction.py index bf82b19d..ebda14f9 100644 --- a/dan/predict/prediction.py +++ b/dan/predict/prediction.py @@ -326,7 +326,8 @@ def process_image( "text": f"{text[current: next_token]}".replace("\n", " "), "confidence_ner": f"{np.around(np.mean(char_confidences[current : next_token]), 2)}", } - for current, next_token in pairwise(index + [0]) + # We go up to -1 so that the last token matches until the end of the text + for current, next_token in pairwise(index + [-1]) ] result["confidences"]["total"] = np.around(np.mean(char_confidences), 2) -- GitLab