Skip to content
Snippets Groups Projects
Verified Commit 7f9934ae authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

better way of computing confidences

parent 6b221b16
No related branches found
No related tags found
No related merge requests found
......@@ -1036,7 +1036,9 @@ class Manager(OCRManager):
enhanced_features = torch.flatten(
enhanced_features, start_dim=2, end_dim=3
).permute(2, 0, 1)
output, pred, hidden_predict, cache, weights, temperature = self.models["decoder"](
output, pred, hidden_predict, cache, weights, temperature = self.models[
"decoder"
](
features,
enhanced_features,
simulated_y_pred[:, :-1],
......@@ -1133,7 +1135,9 @@ class Manager(OCRManager):
).permute(2, 0, 1)
for i in range(0, max_chars):
output, pred, hidden_predict, cache, weights, temperature = self.models["decoder"](
output, pred, hidden_predict, cache, weights, temperature = self.models[
"decoder"
](
features,
enhanced_features,
predicted_tokens,
......
......@@ -167,7 +167,7 @@ def get_config():
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
"temperature": 1, # temperature scaling scalar parameter
"temperature": 1, # temperature scaling scalar parameter
"attention_win": 100, # length of attention window
# Curriculum dropout
"dropout_scheduler": {
......
......@@ -2,6 +2,7 @@
import os
import pickle
from itertools import pairwise
from pathlib import Path
import cv2
......@@ -344,21 +345,13 @@ def run(
index = [pos for pos, char in enumerate(text) if char in ["", "", "", ""]]
# calculates scores by token
score_by_token = []
for rang, position in enumerate(index[:-1]):
score_by_token.append(
{
"text": f"{text[position: index[rang+1]-1]}",
"confidence_ner": f"{np.around(np.mean(char_confidences[position : index[rang+1]-1]), 2)}",
}
)
score_by_token.append(
score_by_token = [
{
"text": f"{text[index[-2]: index[-1]]}",
"confidence_ner": f"{np.around(np.mean(char_confidences[index[-2] : index[-1]]), 2)}",
"text": f"{text[current: next_token-1]}",
"confidence_ner": f"{np.around(np.mean(char_confidences[current : next_token-1]), 2)}",
}
)
for current, next_token in pairwise(index)
]
result["confidences"]["total"] = np.around(np.mean(char_confidences), 2)
result["confidences"]["by ner token"] = []
......
......@@ -107,7 +107,7 @@ def training_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"temperature": 1, #temperature scaling scalar parameter
"temperature": 1, # temperature scaling scalar parameter
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
......
......@@ -79,8 +79,8 @@ def test_train_and_test(
expected_param,
expected_tensor,
) in zip(trained.items(), expected.items()):
print(f'trained tensor is {trained_tensor}')
print(f'expected tensor os {expected_tensor}')
print(f"trained tensor is {trained_tensor}")
print(f"expected tensor os {expected_tensor}")
assert trained_param == expected_param
assert torch.allclose(trained_tensor, expected_tensor, atol=1e-03)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment