Skip to content
Snippets Groups Projects
Commit ede796fc authored by Solene Tarride's avatar Solene Tarride Committed by Yoann Schneider
Browse files

Fix score computation when threshold=1.0

parent b6d65f0f
No related branches found
No related tags found
1 merge request!54Fix score computation when threshold=1.0
Pipeline #185561 passed
......@@ -23,6 +23,26 @@ PRED_COLUMN = "Prediction"
CSV_HEADER = [ANNO_COLUMN, PRED_COLUMN]
def match(annotation: str, prediction: str, threshold: float) -> bool:
"""Test if two entities match based on their character edit distance.
Entities should be matched if both entity exist (e.g. not empty strings) and their Character Error Rate is below the threshold.
Otherwise they should not be matched.
Args:
annotation (str): ground-truth entity.
prediction (str): predicted entity.
threshold (float): matching threshold.
Returns:
bool: Whether to match these two entities.
"""
return (
annotation != ""
and prediction != ""
and editdistance.eval(annotation, prediction) / len(annotation) <= threshold
)
def compute_matches(
annotation: str,
prediction: str,
......@@ -158,24 +178,17 @@ def compute_matches(
# Normalize collected strings
entity_ref = "".join(current_ref)
entity_ref = entity_ref.replace("-", "")
len_entity = len(entity_ref)
entity_compar = "".join(current_compar)
entity_compar = entity_compar.replace("-", "")
# One entity is counted as recognized (score of 1) if the Levenhstein distance between the expected and predicted entities
# represents less than 30% (THRESHOLD) of the length of the expected entity.
# Precision and recall will be computed for each category in comparing the numbers of recognized entities and expected entities
score = (
1
if editdistance.eval(entity_ref, entity_compar) / len_entity
<= threshold
else 0
)
score = int(match(entity_ref, entity_compar, threshold))
entity_count[last_tag] = entity_count.get(last_tag, 0) + score
entity_count[ALL_ENTITIES] += score
current_ref = []
current_compar = []
return entity_count
......@@ -263,7 +276,6 @@ def compute_scores(
if (prec + rec == 0)
else 2 * (prec * rec) / (prec + rec)
)
scores[tag]["predicted"] = nb_predict
scores[tag]["matched"] = nb_match
scores[tag]["P"] = prec
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment