Skip to content
Snippets Groups Projects
Commit bf51947e authored by Solene Tarride's avatar Solene Tarride
Browse files

Simplify compute_prob_by_separator

parent 69a55059
No related branches found
No related tags found
1 merge request!66Compute confidence scores by char, word or line
......@@ -218,14 +218,11 @@ def compute_prob_by_separator(characters, probabilities, separator):
# match anything except separators, get start and end index
pattern = re.compile(f"[^{separator.pattern}]+")
matches = [(m.start(), m.end()) for m in re.finditer(pattern, characters)]
return [
np.mean(probabilities[start:end])
for (start, end) in matches
]
# Iterate over text pieces and compute mean confidence
probs = []
for match in matches:
start = match[0]
end = match[1]
probs.append(np.mean(probabilities[start:end]))
return probs
def run(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment