Skip to content
Snippets Groups Projects

Remove metrics

Merged Mélodie Boillet requested to merge remove-metrics into main
2 files
+ 4
299
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 4
298
@@ -2,7 +2,6 @@
import re
import editdistance
import networkx as nx
import numpy as np
from dan.post_processing import PostProcessingModuleSIMARA
@@ -31,19 +30,6 @@ class MetricManager:
"cer": ["edit_chars", "nb_chars"],
"wer": ["edit_words", "nb_words"],
"wer_no_punct": ["edit_words_no_punct", "nb_words_no_punct"],
"loer": [
"edit_graph",
"nb_nodes_and_edges",
"nb_pp_op_layout",
"nb_gt_layout_token",
],
"precision": ["precision", "weights"],
"map_cer_per_class": [
"map_cer",
],
"layout_precision_per_class_per_threshold": [
"map_cer",
],
}
self.init_metrics()
@@ -84,41 +70,12 @@ class MetricManager:
for metric_name in metric_names:
value = None
if output:
if metric_name in ["nb_samples", "weights"]:
if metric_name == "nb_samples":
value = int(np.sum(self.epoch_metrics[metric_name]))
elif metric_name in [
"time",
]:
elif metric_name == "time":
value = int(np.sum(self.epoch_metrics[metric_name]))
sample_time = value / np.sum(self.epoch_metrics["nb_samples"])
display_values["sample_time"] = float(round(sample_time, 4))
elif metric_name == "loer":
display_values["pper"] = float(
round(
np.sum(self.epoch_metrics["nb_pp_op_layout"])
/ np.sum(self.epoch_metrics["nb_gt_layout_token"]),
4,
)
)
elif metric_name == "map_cer_per_class":
value = float(
compute_global_mAP_per_class(self.epoch_metrics["map_cer"])
)
for key in value.keys():
display_values["map_cer_" + key] = float(round(value[key], 4))
continue
elif metric_name == "layout_precision_per_class_per_threshold":
value = float(
compute_global_precision_per_class_per_threshold(
self.epoch_metrics["map_cer"]
)
)
for key_class in value.keys():
for threshold in value[key_class].keys():
display_values[
"map_cer_{}_{}".format(key_class, threshold)
] = float(round(value[key_class][threshold], 4))
continue
if metric_name == "cer":
value = float(
np.sum(self.epoch_metrics["edit_chars"])
@@ -156,13 +113,6 @@ class MetricManager:
weights=np.array(self.epoch_metrics["nb_samples"]),
)
)
elif metric_name == "map_cer":
value = float(compute_global_mAP(self.epoch_metrics[metric_name]))
elif metric_name == "loer":
value = float(
np.sum(self.epoch_metrics["edit_graph"])
/ np.sum(self.epoch_metrics["nb_nodes_and_edges"])
)
elif value is None:
continue
@@ -175,9 +125,8 @@ class MetricManager:
values["nb_samples"],
],
}
for v in ["weights", "time"]:
if v in values:
metrics[v] = [values[v]]
if "time" in values:
metrics["time"] = [values["time"]]
for metric_name in metric_names:
if metric_name == "cer":
metrics["edit_chars"] = [
@@ -223,35 +172,6 @@ class MetricManager:
metrics[metric_name] = [
values[metric_name],
]
elif metric_name == "map_cer":
pp_pred = list()
pp_score = list()
for pred, score in zip(values["str_x"], values["confidence_score"]):
pred_score = self.post_processing_module().post_process(pred, score)
pp_pred.append(pred_score[0])
pp_score.append(pred_score[1])
metrics[metric_name] = [
compute_layout_mAP_per_class(y, x, conf, self.matching_tokens)
for x, conf, y in zip(pp_pred, pp_score, values["str_y"])
]
elif metric_name == "loer":
pp_pred = list()
metrics["nb_pp_op_layout"] = list()
for pred in values["str_x"]:
pp_module = self.post_processing_module()
pp_pred.append(pp_module.post_process(pred))
metrics["nb_pp_op_layout"].append(pp_module.num_op)
metrics["nb_gt_layout_token"] = [
len(keep_only_ner_tokens(str_x, self.layout_tokens))
for str_x in values["str_x"]
]
edit_and_num_items = [
self.edit_and_num_edge_nodes(y, x)
for x, y in zip(pp_pred, values["str_y"])
]
metrics["edit_graph"], metrics["nb_nodes_and_edges"] = [
ei[0] for ei in edit_and_num_items
], [ei[1] for ei in edit_and_num_items]
return metrics
def get(self, name):
@@ -331,217 +251,3 @@ def edit_wer_from_formatted_split_text(gt, pred):
Compute edit distance at word level from formatted string as list
"""
return editdistance.eval(gt, pred)
def extract_by_tokens(
input_str, begin_token, end_token, associated_score=None, order_by_score=False
):
"""
Extract list of text regions by begin and end tokens
Order the list by confidence score
"""
if order_by_score:
assert associated_score is not None
res = list()
for match in re.finditer(
"{}[^{}]*{}".format(begin_token, end_token, end_token), input_str
):
begin, end = match.regs[0]
if order_by_score:
res.append(
{
"confidence": np.mean(
[associated_score[begin], associated_score[end - 1]]
),
"content": input_str[begin + 1 : end - 1],
}
)
else:
res.append(input_str[begin + 1 : end - 1])
if order_by_score:
res = sorted(res, key=lambda x: x["confidence"], reverse=True)
res = [r["content"] for r in res]
return res
def compute_layout_precision_per_threshold(
gt, pred, score, begin_token, end_token, layout_tokens, return_weight=True
):
"""
Compute average precision of a given class for CER threshold from 5% to 50% with a step of 5%
"""
pred_list = extract_by_tokens(
pred, begin_token, end_token, associated_score=score, order_by_score=True
)
gt_list = extract_by_tokens(gt, begin_token, end_token)
pred_list = [keep_all_but_ner_tokens(p, layout_tokens) for p in pred_list]
gt_list = [keep_all_but_ner_tokens(gt, layout_tokens) for gt in gt_list]
precision_per_threshold = [
compute_layout_AP_for_given_threshold(gt_list, pred_list, threshold / 100)
for threshold in range(5, 51, 5)
]
if return_weight:
return precision_per_threshold, len(gt_list)
return precision_per_threshold
def compute_layout_AP_for_given_threshold(gt_list, pred_list, threshold):
"""
Compute average precision of a given class for a given CER threshold
"""
remaining_gt_list = gt_list.copy()
num_true = len(gt_list)
correct = np.zeros((len(pred_list)), dtype=np.bool)
for i, pred in enumerate(pred_list):
if len(remaining_gt_list) == 0:
break
cer_with_gt = [
edit_cer_from_string(gt, pred) / nb_chars_cer_from_string(gt)
for gt in remaining_gt_list
]
cer, ind = np.min(cer_with_gt), np.argmin(cer_with_gt)
if cer <= threshold:
correct[i] = True
del remaining_gt_list[ind]
precision = np.cumsum(correct, dtype=np.int) / np.arange(1, len(pred_list) + 1)
recall = np.cumsum(correct, dtype=np.int) / num_true
max_precision_from_recall = np.maximum.accumulate(precision[::-1])[::-1]
recall_diff = recall - np.concatenate(
[
np.array(
[
0,
]
),
recall[:-1],
]
)
P = np.sum(recall_diff * max_precision_from_recall)
return P
def compute_layout_mAP_per_class(gt, pred, score, tokens):
"""
Compute the mAP_cer for each class for a given sample
"""
layout_tokens = "".join(list(tokens.keys()))
AP_per_class = dict()
for token in tokens.keys():
if token in gt:
AP_per_class[token] = compute_layout_precision_per_threshold(
gt, pred, score, token, tokens[token], layout_tokens=layout_tokens
)
return AP_per_class
def compute_global_mAP(list_AP_per_class):
"""
Compute the global mAP_cer for several samples
"""
weights_per_doc = list()
mAP_per_doc = list()
for doc_AP_per_class in list_AP_per_class:
APs = np.array(
[np.mean(doc_AP_per_class[key][0]) for key in doc_AP_per_class.keys()]
)
weights = np.array(
[doc_AP_per_class[key][1] for key in doc_AP_per_class.keys()]
)
if np.sum(weights) == 0:
mAP_per_doc.append(0)
else:
mAP_per_doc.append(np.average(APs, weights=weights))
weights_per_doc.append(np.sum(weights))
if np.sum(weights_per_doc) == 0:
return 0
return np.average(mAP_per_doc, weights=weights_per_doc)
def compute_global_mAP_per_class(list_AP_per_class):
"""
Compute the mAP_cer per class for several samples
"""
mAP_per_class = dict()
for doc_AP_per_class in list_AP_per_class:
for key in doc_AP_per_class.keys():
if key not in mAP_per_class:
mAP_per_class[key] = {"AP": list(), "weights": list()}
mAP_per_class[key]["AP"].append(np.mean(doc_AP_per_class[key][0]))
mAP_per_class[key]["weights"].append(doc_AP_per_class[key][1])
for key in mAP_per_class.keys():
mAP_per_class[key] = np.average(
mAP_per_class[key]["AP"], weights=mAP_per_class[key]["weights"]
)
return mAP_per_class
def compute_global_precision_per_class_per_threshold(list_AP_per_class):
"""
Compute the mAP_cer per class and per threshold for several samples
"""
mAP_per_class = dict()
for doc_AP_per_class in list_AP_per_class:
for key in doc_AP_per_class.keys():
if key not in mAP_per_class:
mAP_per_class[key] = dict()
for threshold in range(5, 51, 5):
mAP_per_class[key][threshold] = {
"precision": list(),
"weights": list(),
}
for i, threshold in enumerate(range(5, 51, 5)):
mAP_per_class[key][threshold]["precision"].append(
np.mean(doc_AP_per_class[key][0][i])
)
mAP_per_class[key][threshold]["weights"].append(
doc_AP_per_class[key][1]
)
for key_class in mAP_per_class.keys():
for threshold in mAP_per_class[key_class]:
mAP_per_class[key_class][threshold] = np.average(
mAP_per_class[key_class][threshold]["precision"],
weights=mAP_per_class[key_class][threshold]["weights"],
)
return mAP_per_class
def str_to_graph_simara(str):
"""
Compute graph from string of layout tokens for the SIMARA dataset at page level
"""
begin_layout_tokens = "".join(list(SIMARA_MATCHING_TOKENS.keys()))
layout_token_sequence = keep_only_ner_tokens(str, begin_layout_tokens)
g = nx.DiGraph()
g.add_node("D", type="document", level=2, page=0)
token_name_dict = {"": "I", "": "D", "": "S", "": "C", "": "P", "": "A"}
num = dict()
previous_node = None
for token in begin_layout_tokens:
num[token] = 0
for ind, c in enumerate(layout_token_sequence):
num[c] += 1
node_name = "{}_{}".format(token_name_dict[c], num[c])
g.add_node(node_name, type=token_name_dict[c], level=1, page=0)
g.add_edge("D", node_name)
if previous_node:
g.add_edge(previous_node, node_name)
previous_node = node_name
return g
def graph_edit_distance(g1, g2):
"""
Compute graph edit distance between two graphs
"""
for v in nx.optimize_graph_edit_distance(
g1,
g2,
node_ins_cost=lambda node: 1,
node_del_cost=lambda node: 1,
node_subst_cost=lambda node1, node2: 0 if node1["type"] == node2["type"] else 1,
edge_ins_cost=lambda edge: 1,
edge_del_cost=lambda edge: 1,
edge_subst_cost=lambda edge1, edge2: 0 if edge1 == edge2 else 1,
):
new_edit = v
return new_edit
Loading