diff --git a/dan/predict/prediction.py b/dan/predict/prediction.py index 70a60b4c918e3d8f39b4242bc50202d7124316d0..a9500c0e4ba5ac305701dba2670a7870f67bee48 100644 --- a/dan/predict/prediction.py +++ b/dan/predict/prediction.py @@ -2,6 +2,7 @@ import os import pickle +from itertools import pairwise from pathlib import Path import cv2 @@ -351,14 +352,13 @@ def run( index = [pos for pos, char in enumerate(text) if char in ["â“", "ⓟ", "â““", "â“¡"]] # calculates scores by token - score_by_token = [] - for rang, position in enumerate(index[:-1]): - score_by_token.append( - { - "text": f"{text[position: index[rang+1]-1]}", - "confidence_ner": f"{np.around(np.mean(char_confidences[position : index[rang+1]-1]), 2)}", - } - ) + score_by_token = [ + { + "text": f"{text[current: next_token-1]}", + "confidence_ner": f"{np.around(np.mean(char_confidences[current : next_token-1]), 2)}", + } + for current, next_token in pairwise(index) + ] result["confidences"]["total"] = np.around(np.mean(char_confidences), 2) result["confidences"]["by ner token"] = []