Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ner/nerval
1 result
Show changes
Commits on Source (6)
......@@ -63,14 +63,9 @@ deploy-pypi:
only:
- tags
variables:
TWINE_USERNAME: gitlab-ci-token
TWINE_PASSWORD: ${CI_JOB_TOKEN}
TWINE_REPOSITORY_URL: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi
before_script:
- pip install twine build
script:
- python -m build
- twine upload --repository-url ${TWINE_REPOSITORY_URL} dist/*
- twine upload -r pypi dist/*
......@@ -5,3 +5,5 @@ logging.basicConfig(
format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
ALL_ENTITIES = "ALL"
import csv
import logging
from pathlib import Path
from typing import List
import editdistance
import edlib
from nerval import ALL_ENTITIES
from nerval.parse import (
BEGINNING_POS,
NOT_ENTITY_TAG,
......@@ -23,6 +23,26 @@ PRED_COLUMN = "Prediction"
CSV_HEADER = [ANNO_COLUMN, PRED_COLUMN]
def match(annotation: str, prediction: str, threshold: float) -> bool:
"""Test if two entities match based on their character edit distance.
Entities should be matched if both entity exist (e.g. not empty strings) and their Character Error Rate is below the threshold.
Otherwise they should not be matched.
Args:
annotation (str): ground-truth entity.
prediction (str): predicted entity.
threshold (float): matching threshold.
Returns:
bool: Whether to match these two entities.
"""
return (
annotation != ""
and prediction != ""
and editdistance.eval(annotation, prediction) / len(annotation) <= threshold
)
def compute_matches(
annotation: str,
prediction: str,
......@@ -56,14 +76,14 @@ def compute_matches(
labels_annot : list of strings, example : ['B-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','O', ...]
labels_predict : list of string , example : ['B-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','O', ...]
Output : {TAG1 : nb_entity_matched, ...}, example : {'All': 1, 'OCC': 0, 'PER': 1}
Output : {TAG1 : nb_entity_matched, ...}, example : {'ALL': 1, 'OCC': 0, 'PER': 1}
"""
assert annotation, "Annotation is empty"
assert prediction, "Prediction is empty"
assert labels_annot, "Annotation labels are empty"
assert labels_predict, "Prediction labels are empty"
entity_count = {"All": 0}
entity_count = {ALL_ENTITIES: 0}
last_tag = NOT_ENTITY_TAG
# Track indexes of characters found for continuation of nested entities
......@@ -158,24 +178,17 @@ def compute_matches(
# Normalize collected strings
entity_ref = "".join(current_ref)
entity_ref = entity_ref.replace("-", "")
len_entity = len(entity_ref)
entity_compar = "".join(current_compar)
entity_compar = entity_compar.replace("-", "")
# One entity is counted as recognized (score of 1) if the Levenhstein distance between the expected and predicted entities
# represents less than 30% (THRESHOLD) of the length of the expected entity.
# Precision and recall will be computed for each category in comparing the numbers of recognized entities and expected entities
score = (
1
if editdistance.eval(entity_ref, entity_compar) / len_entity
<= threshold
else 0
)
score = int(match(entity_ref, entity_compar, threshold))
entity_count[last_tag] = entity_count.get(last_tag, 0) + score
entity_count["All"] += score
entity_count[ALL_ENTITIES] += score
current_ref = []
current_compar = []
return entity_count
......@@ -263,7 +276,6 @@ def compute_scores(
if (prec + rec == 0)
else 2 * (prec * rec) / (prec + rec)
)
scores[tag]["predicted"] = nb_predict
scores[tag]["matched"] = nb_match
scores[tag]["P"] = prec
......@@ -321,7 +333,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
"""
# Get string and list of labels per character
def read_file(path: Path) -> List[str]:
def read_file(path: Path) -> list[str]:
assert path.exists(), f"Error: Input file {path} does not exist"
return path.read_text().strip().splitlines()
......@@ -382,9 +394,9 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):
count += 1
scores = run(annot, predict, threshold, verbose)
precision += scores["All"]["P"]
recall += scores["All"]["R"]
f1 += scores["All"]["F1"]
precision += scores[ALL_ENTITIES]["P"]
recall += scores[ALL_ENTITIES]["R"]
f1 += scores[ALL_ENTITIES]["F1"]
if not count:
raise Exception("No file were counted")
......
import re
from typing import List
from nerval import ALL_ENTITIES
NOT_ENTITY_TAG = "O"
BEGINNING_POS = ["B", "S", "U"]
......@@ -49,7 +50,7 @@ def parse_line(index: int, line: str):
raise Exception(f"The file is not in BIO format: check line {index} ({line})")
def parse_bio(lines: List[str]) -> dict:
def parse_bio(lines: list[str]) -> dict:
"""Parse a BIO file to get text content, character-level NE labels and entity types count.
Input: lines of a valid BIO file
......@@ -57,7 +58,7 @@ def parse_bio(lines: List[str]) -> dict:
"""
words = []
labels = []
entity_count = {"All": 0}
entity_count = {ALL_ENTITIES: 0}
last_tag = None
if "§" in " ".join(lines):
......@@ -140,7 +141,7 @@ def parse_bio(lines: List[str]) -> dict:
# Count nb entity for each type
if get_position_label(label) in BEGINNING_POS:
entity_count[tag] = entity_count.get(tag, 0) + 1
entity_count["All"] += 1
entity_count[ALL_ENTITIES] += 1
last_tag = tag
......@@ -156,7 +157,7 @@ def parse_bio(lines: List[str]) -> dict:
result["labels"]
), f'Found {len(result["words"])} word(s) for {len(result["labels"])} label(s)'
for tag in result["entity_count"]:
if tag != "All":
if tag != ALL_ENTITIES:
assert (
result["labels"].count(f"B-{tag}") == result["entity_count"][tag]
), f'Found {result["entity_count"][tag]} entities for {result["labels"].count(f"B-{tag}")} label(s) for entity {tag}'
......
from prettytable import MARKDOWN, PrettyTable
from nerval import ALL_ENTITIES
def print_markdown_table(header: list[str], rows: list[list]) -> None:
"""Prints a Markdown table filled with the provided header and rows."""
table = PrettyTable()
table.field_names = header
table.set_style(MARKDOWN)
# Align all columns at right
table.align = "r"
# First column should be left aligned still
table.align[header[0]] = "l"
def _special_sort(row: list[str]) -> str:
if row[0] == ALL_ENTITIES:
# Place the line for all entities at the very top
return ""
return row[0]
rows.sort(key=_special_sort)
# Place ALL_ENTITIES row at the end
rows.append(rows.pop(0))
table.add_rows(rows)
print(table)
......@@ -41,13 +58,13 @@ def print_results(scores: dict) -> None:
def print_result_compact(scores: dict) -> None:
result = [
"All",
scores["All"]["predicted"],
scores["All"]["matched"],
round(scores["All"]["P"], 3),
round(scores["All"]["R"], 3),
round(scores["All"]["F1"], 3),
scores["All"]["Support"],
ALL_ENTITIES,
scores[ALL_ENTITIES]["predicted"],
scores[ALL_ENTITIES]["matched"],
round(scores[ALL_ENTITIES]["P"], 3),
round(scores[ALL_ENTITIES]["R"], 3),
round(scores[ALL_ENTITIES]["F1"], 3),
scores[ALL_ENTITIES]["Support"],
]
print_markdown_table(
["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"],
......
......@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "teklia-nerval"
version = "0.3.3-rc1"
version = "0.3.3rc2"
description = "Tool to evaluate NER on noisy text."
dynamic = ["dependencies"]
authors = [
......@@ -14,6 +14,7 @@ maintainers = [
{ name = "Teklia", email = "contact@teklia.com" },
]
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.10"
[project.scripts]
nerval = "nerval.cli:main"
......
import pytest
from nerval import evaluate
from nerval import ALL_ENTITIES, evaluate
THRESHOLD = 0.30
......@@ -370,7 +370,7 @@ fake_predict_tags_bk_boundary_2 = [
fake_predict_tags_aligned,
THRESHOLD,
),
{"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
{ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
),
(
(
......@@ -380,7 +380,7 @@ fake_predict_tags_bk_boundary_2 = [
fake_tags_aligned_nested_perfect,
THRESHOLD,
),
{"All": 3, "PER": 1, "LOC": 2},
{ALL_ENTITIES: 3, "PER": 1, "LOC": 2},
),
(
(
......@@ -390,7 +390,7 @@ fake_predict_tags_bk_boundary_2 = [
fake_tags_aligned_nested_false,
THRESHOLD,
),
{"All": 2, "PER": 1, "LOC": 1},
{ALL_ENTITIES: 2, "PER": 1, "LOC": 1},
),
(
(
......@@ -400,7 +400,7 @@ fake_predict_tags_bk_boundary_2 = [
fake_predict_tags_bk_boundary,
THRESHOLD,
),
{"All": 0, "PER": 0},
{ALL_ENTITIES: 0, "PER": 0},
),
(
(
......@@ -410,7 +410,7 @@ fake_predict_tags_bk_boundary_2 = [
fake_predict_tags_bk_boundary_2,
THRESHOLD,
),
{"All": 1, "PER": 1},
{ALL_ENTITIES: 1, "PER": 1},
),
],
)
......
import pytest
from nerval import evaluate
from nerval import ALL_ENTITIES, evaluate
@pytest.mark.parametrize(
("annot", "predict", "matches"),
[
(
{"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
{"All": 3, "DAT": 1, "***": 1, "PER": 1},
{"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
{ALL_ENTITIES: 3, "DAT": 1, "LOC": 1, "PER": 1},
{ALL_ENTITIES: 3, "DAT": 1, "***": 1, "PER": 1},
{ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
),
],
)
......@@ -31,7 +31,7 @@ def test_compute_scores(annot, predict, matches):
"matched": 0,
"Support": 1,
},
"All": {
ALL_ENTITIES: {
"P": 0.3333333333333333,
"R": 0.3333333333333333,
"F1": 0.3333333333333333,
......
......@@ -2,11 +2,11 @@ import re
import pytest
from nerval import evaluate
from nerval import ALL_ENTITIES, evaluate
from nerval.parse import get_type_label, parse_line
expected_parsed_annot = {
"entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
"entity_count": {ALL_ENTITIES: 3, "DAT": 1, "LOC": 1, "PER": 1},
"labels": [
"B-PER",
"I-PER",
......@@ -57,7 +57,7 @@ expected_parsed_annot = {
}
expected_parsed_predict = {
"entity_count": {"All": 3, "DAT": 1, "***": 1, "PER": 1},
"entity_count": {ALL_ENTITIES: 3, "DAT": 1, "***": 1, "PER": 1},
"labels": [
"B-PER",
"I-PER",
......@@ -108,7 +108,7 @@ expected_parsed_predict = {
}
expected_parsed_end_of_file = {
"entity_count": {"All": 3, "LOC": 2, "PER": 1},
"entity_count": {ALL_ENTITIES: 3, "LOC": 2, "PER": 1},
"labels": [
"B-PER",
"I-PER",
......
......@@ -3,7 +3,7 @@ from pathlib import Path
import pytest
from nerval import evaluate
from nerval import ALL_ENTITIES, evaluate
@pytest.mark.parametrize(
......@@ -29,7 +29,7 @@ from nerval import evaluate
"matched": 0,
"Support": 1,
},
"All": {
ALL_ENTITIES: {
"P": 0.3333333333333333,
"R": 0.3333333333333333,
"F1": 0.3333333333333333,
......@@ -59,7 +59,7 @@ from nerval import evaluate
pytest.lazy_fixture("nested_bio"),
pytest.lazy_fixture("nested_bio"),
{
"All": {
ALL_ENTITIES: {
"P": 1.0,
"R": 1.0,
"F1": 1.0,
......