Skip to content
Snippets Groups Projects

Expose parsing/evaluation code

Merged Manon Blanco requested to merge parse-evaluate-without-files into master
All threads resolved!
3 files
+ 46
45
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 33
17
@@ -4,6 +4,7 @@ import logging
import os
from csv import reader
from pathlib import Path
from typing import List
import editdistance
import edlib
@@ -265,22 +266,11 @@ def compute_scores(
return scores
def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> dict:
"""Compute recall and precision for each entity type found in annotation and/or prediction.
Each measure is given at document level, global score is a micro-average across entity types.
"""
# Get string and list of labels per character
annot = parse_bio(annotation)
predict = parse_bio(prediction)
if not annot or not predict:
raise Exception("No content found in annotation or prediction files.")
def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Align annotation and prediction
align_result = edlib.align(annot["words"], predict["words"], task="path")
align_result = edlib.align(annotation["words"], prediction["words"], task="path")
nice_alignment = edlib.getNiceAlignment(
align_result, annot["words"], predict["words"]
align_result, annotation["words"], prediction["words"]
)
annot_aligned = nice_alignment["query_aligned"]
@@ -288,10 +278,10 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
# Align labels from string alignment
labels_annot_aligned = get_labels_aligned(
annot["words"], annot_aligned, annot["labels"]
annotation["words"], annot_aligned, annotation["labels"]
)
labels_predict_aligned = get_labels_aligned(
predict["words"], predict_aligned, predict["labels"]
prediction["words"], predict_aligned, prediction["labels"]
)
# Get nb match
@@ -304,7 +294,33 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
)
# Compute scores
scores = compute_scores(annot["entity_count"], predict["entity_count"], matches)
scores = compute_scores(
annotation["entity_count"], prediction["entity_count"], matches
)
return scores
def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> dict:
"""Compute recall and precision for each entity type found in annotation and/or prediction.
Each measure is given at document level, global score is a micro-average across entity types.
"""
# Get string and list of labels per character
def read_file(path: Path) -> List[str]:
assert path.exists(), f"Error: Input file {path} does not exist"
return path.read_text().strip().splitlines()
logger.info(f"Parsing file @ {annotation}")
annot = parse_bio(read_file(annotation))
logger.info(f"Parsing file @ {prediction}")
predict = parse_bio(read_file(prediction))
if not (annot and predict):
raise Exception("No content found in annotation or prediction files.")
scores = evaluate(annot, predict, threshold)
# Print results
if verbose:
Loading