Compare revisions

Yoann Schneider · Yoann Schneider · Yoann Schneider · Yoann Schneider · Yoann Schneider · Bastien Abadie
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -63,14 +63,9 @@ deploy-pypi:
  only:
    - tags

-  variables:
-    TWINE_USERNAME: gitlab-ci-token
-    TWINE_PASSWORD: ${CI_JOB_TOKEN}
-    TWINE_REPOSITORY_URL: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi
-
  before_script:
    - pip install twine build

  script:
    - python -m build
-    - twine upload --repository-url ${TWINE_REPOSITORY_URL} dist/*
+    - twine upload -r pypi dist/*
--- a/nerval/__init__.py
+++ b/nerval/__init__.py
@@ -5,3 +5,5 @@ logging.basicConfig(
    format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
 )
 logger = logging.getLogger(__name__)
+
+ALL_ENTITIES = "ALL"
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
 import csv
 import logging
 from pathlib import Path
-from typing import List

 import editdistance
 import edlib

+from nerval import ALL_ENTITIES
 from nerval.parse import (
    BEGINNING_POS,
    NOT_ENTITY_TAG,
@@ -23,6 +23,26 @@ PRED_COLUMN = "Prediction"
 CSV_HEADER = [ANNO_COLUMN, PRED_COLUMN]


+def match(annotation: str, prediction: str, threshold: float) -> bool:
+    """Test if two entities match based on their character edit distance.
+    Entities should be matched if both entity exist (e.g. not empty strings) and their Character Error Rate is below the threshold.
+    Otherwise they should not be matched.
+
+    Args:
+        annotation (str): ground-truth entity.
+        prediction (str): predicted entity.
+        threshold (float): matching threshold.
+
+    Returns:
+        bool: Whether to match these two entities.
+    """
+    return (
+        annotation != ""
+        and prediction != ""
+        and editdistance.eval(annotation, prediction) / len(annotation) <= threshold
+    )
+
+
 def compute_matches(
    annotation: str,
    prediction: str,
@@ -56,14 +76,14 @@ def compute_matches(
    labels_annot : list of strings,   example : ['B-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','O', ...]
    labels_predict : list of string , example : ['B-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','I-P','O', ...]

-    Output : {TAG1 : nb_entity_matched, ...}, example : {'All': 1, 'OCC': 0, 'PER': 1}
+    Output : {TAG1 : nb_entity_matched, ...}, example : {'ALL': 1, 'OCC': 0, 'PER': 1}
    """
    assert annotation, "Annotation is empty"
    assert prediction, "Prediction is empty"
    assert labels_annot, "Annotation labels are empty"
    assert labels_predict, "Prediction labels are empty"

-    entity_count = {"All": 0}
+    entity_count = {ALL_ENTITIES: 0}
    last_tag = NOT_ENTITY_TAG

    # Track indexes of characters found for continuation of nested entities
@@ -158,24 +178,17 @@ def compute_matches(
                # Normalize collected strings
                entity_ref = "".join(current_ref)
                entity_ref = entity_ref.replace("-", "")
-                len_entity = len(entity_ref)
                entity_compar = "".join(current_compar)
                entity_compar = entity_compar.replace("-", "")

                # One entity is counted as recognized (score of 1) if the Levenhstein distance between the expected and predicted entities
                # represents less than 30% (THRESHOLD) of the length of the expected entity.
                # Precision and recall will be computed for each category in comparing the numbers of recognized entities and expected entities
-                score = (
-                    1
-                    if editdistance.eval(entity_ref, entity_compar) / len_entity
-                    <= threshold
-                    else 0
-                )
+                score = int(match(entity_ref, entity_compar, threshold))
                entity_count[last_tag] = entity_count.get(last_tag, 0) + score
-                entity_count["All"] += score
+                entity_count[ALL_ENTITIES] += score
                current_ref = []
                current_compar = []
-
    return entity_count


@@ -263,7 +276,6 @@ def compute_scores(
            if (prec + rec == 0)
            else 2 * (prec * rec) / (prec + rec)
        )
-
        scores[tag]["predicted"] = nb_predict
        scores[tag]["matched"] = nb_match
        scores[tag]["P"] = prec
@@ -321,7 +333,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
    """

    # Get string and list of labels per character
-    def read_file(path: Path) -> List[str]:
+    def read_file(path: Path) -> list[str]:
        assert path.exists(), f"Error: Input file {path} does not exist"
        return path.read_text().strip().splitlines()

@@ -382,9 +394,9 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):

        count += 1
        scores = run(annot, predict, threshold, verbose)
-        precision += scores["All"]["P"]
-        recall += scores["All"]["R"]
-        f1 += scores["All"]["F1"]
+        precision += scores[ALL_ENTITIES]["P"]
+        recall += scores[ALL_ENTITIES]["R"]
+        f1 += scores[ALL_ENTITIES]["F1"]

    if not count:
        raise Exception("No file were counted")

--- a/nerval/parse.py
+++ b/nerval/parse.py
 import re
-from typing import List
+
+from nerval import ALL_ENTITIES

 NOT_ENTITY_TAG = "O"
 BEGINNING_POS = ["B", "S", "U"]
@@ -49,7 +50,7 @@ def parse_line(index: int, line: str):
        raise Exception(f"The file is not in BIO format: check line {index} ({line})")


-def parse_bio(lines: List[str]) -> dict:
+def parse_bio(lines: list[str]) -> dict:
    """Parse a BIO file to get text content, character-level NE labels and entity types count.

    Input: lines of a valid BIO file
@@ -57,7 +58,7 @@ def parse_bio(lines: List[str]) -> dict:
    """
    words = []
    labels = []
-    entity_count = {"All": 0}
+    entity_count = {ALL_ENTITIES: 0}
    last_tag = None

    if "§" in " ".join(lines):
@@ -140,7 +141,7 @@ def parse_bio(lines: List[str]) -> dict:
        # Count nb entity for each type
        if get_position_label(label) in BEGINNING_POS:
            entity_count[tag] = entity_count.get(tag, 0) + 1
-            entity_count["All"] += 1
+            entity_count[ALL_ENTITIES] += 1

        last_tag = tag

@@ -156,7 +157,7 @@ def parse_bio(lines: List[str]) -> dict:
            result["labels"]
        ), f'Found {len(result["words"])} word(s) for {len(result["labels"])} label(s)'
        for tag in result["entity_count"]:
-            if tag != "All":
+            if tag != ALL_ENTITIES:
                assert (
                    result["labels"].count(f"B-{tag}") == result["entity_count"][tag]
                ), f'Found {result["entity_count"][tag]} entities for {result["labels"].count(f"B-{tag}")} label(s) for entity {tag}'

--- a/nerval/utils.py
+++ b/nerval/utils.py
 from prettytable import MARKDOWN, PrettyTable

+from nerval import ALL_ENTITIES
+

 def print_markdown_table(header: list[str], rows: list[list]) -> None:
    """Prints a Markdown table filled with the provided header and rows."""
    table = PrettyTable()
    table.field_names = header
    table.set_style(MARKDOWN)
+    # Align all columns at right
+    table.align = "r"
+    # First column should be left aligned still
+    table.align[header[0]] = "l"
+
+    def _special_sort(row: list[str]) -> str:
+        if row[0] == ALL_ENTITIES:
+            # Place the line for all entities at the very top
+            return ""
+        return row[0]
+
+    rows.sort(key=_special_sort)
+    # Place ALL_ENTITIES row at the end
+    rows.append(rows.pop(0))
+
    table.add_rows(rows)
    print(table)

@@ -41,13 +58,13 @@ def print_results(scores: dict) -> None:

 def print_result_compact(scores: dict) -> None:
    result = [
-        "All",
-        scores["All"]["predicted"],
-        scores["All"]["matched"],
-        round(scores["All"]["P"], 3),
-        round(scores["All"]["R"], 3),
-        round(scores["All"]["F1"], 3),
-        scores["All"]["Support"],
+        ALL_ENTITIES,
+        scores[ALL_ENTITIES]["predicted"],
+        scores[ALL_ENTITIES]["matched"],
+        round(scores[ALL_ENTITIES]["P"], 3),
+        round(scores[ALL_ENTITIES]["R"], 3),
+        round(scores[ALL_ENTITIES]["F1"], 3),
+        scores[ALL_ENTITIES]["Support"],
    ]
    print_markdown_table(
        ["tag", "predicted", "matched", "Precision", "Recall", "F1", "Support"],

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "teklia-nerval"
-version = "0.3.3-rc1"
+version = "0.3.3rc2"
 description = "Tool to evaluate NER on noisy text."
 dynamic = ["dependencies"]
 authors = [
@@ -14,6 +14,7 @@ maintainers = [
    { name = "Teklia", email = "contact@teklia.com" },
 ]
 readme = { file = "README.md", content-type = "text/markdown" }
+requires-python = ">=3.10"

 [project.scripts]
 nerval = "nerval.cli:main"

--- a/tests/test_compute_matches.py
+++ b/tests/test_compute_matches.py
 import pytest

-from nerval import evaluate
+from nerval import ALL_ENTITIES, evaluate

 THRESHOLD = 0.30

@@ -370,7 +370,7 @@ fake_predict_tags_bk_boundary_2 = [
                fake_predict_tags_aligned,
                THRESHOLD,
            ),
-            {"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
+            {ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
        ),
        (
            (
@@ -380,7 +380,7 @@ fake_predict_tags_bk_boundary_2 = [
                fake_tags_aligned_nested_perfect,
                THRESHOLD,
            ),
-            {"All": 3, "PER": 1, "LOC": 2},
+            {ALL_ENTITIES: 3, "PER": 1, "LOC": 2},
        ),
        (
            (
@@ -390,7 +390,7 @@ fake_predict_tags_bk_boundary_2 = [
                fake_tags_aligned_nested_false,
                THRESHOLD,
            ),
-            {"All": 2, "PER": 1, "LOC": 1},
+            {ALL_ENTITIES: 2, "PER": 1, "LOC": 1},
        ),
        (
            (
@@ -400,7 +400,7 @@ fake_predict_tags_bk_boundary_2 = [
                fake_predict_tags_bk_boundary,
                THRESHOLD,
            ),
-            {"All": 0, "PER": 0},
+            {ALL_ENTITIES: 0, "PER": 0},
        ),
        (
            (
@@ -410,7 +410,7 @@ fake_predict_tags_bk_boundary_2 = [
                fake_predict_tags_bk_boundary_2,
                THRESHOLD,
            ),
-            {"All": 1, "PER": 1},
+            {ALL_ENTITIES: 1, "PER": 1},
        ),
    ],
 )

--- a/tests/test_compute_scores.py
+++ b/tests/test_compute_scores.py
 import pytest

-from nerval import evaluate
+from nerval import ALL_ENTITIES, evaluate


 @pytest.mark.parametrize(
    ("annot", "predict", "matches"),
    [
        (
-            {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
-            {"All": 3, "DAT": 1, "***": 1, "PER": 1},
-            {"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
+            {ALL_ENTITIES: 3, "DAT": 1, "LOC": 1, "PER": 1},
+            {ALL_ENTITIES: 3, "DAT": 1, "***": 1, "PER": 1},
+            {ALL_ENTITIES: 1, "PER": 1, "LOC": 0, "DAT": 0},
        ),
    ],
 )
@@ -31,7 +31,7 @@ def test_compute_scores(annot, predict, matches):
            "matched": 0,
            "Support": 1,
        },
-        "All": {
+        ALL_ENTITIES: {
            "P": 0.3333333333333333,
            "R": 0.3333333333333333,
            "F1": 0.3333333333333333,

--- a/tests/test_parse_bio.py
+++ b/tests/test_parse_bio.py
@@ -2,11 +2,11 @@ import re

 import pytest

-from nerval import evaluate
+from nerval import ALL_ENTITIES, evaluate
 from nerval.parse import get_type_label, parse_line

 expected_parsed_annot = {
-    "entity_count": {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
+    "entity_count": {ALL_ENTITIES: 3, "DAT": 1, "LOC": 1, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",
@@ -57,7 +57,7 @@ expected_parsed_annot = {
 }

 expected_parsed_predict = {
-    "entity_count": {"All": 3, "DAT": 1, "***": 1, "PER": 1},
+    "entity_count": {ALL_ENTITIES: 3, "DAT": 1, "***": 1, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",
@@ -108,7 +108,7 @@ expected_parsed_predict = {
 }

 expected_parsed_end_of_file = {
-    "entity_count": {"All": 3, "LOC": 2, "PER": 1},
+    "entity_count": {ALL_ENTITIES: 3, "LOC": 2, "PER": 1},
    "labels": [
        "B-PER",
        "I-PER",

--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -3,7 +3,7 @@ from pathlib import Path

 import pytest

-from nerval import evaluate
+from nerval import ALL_ENTITIES, evaluate


 @pytest.mark.parametrize(
@@ -29,7 +29,7 @@ from nerval import evaluate
                    "matched": 0,
                    "Support": 1,
                },
-                "All": {
+                ALL_ENTITIES: {
                    "P": 0.3333333333333333,
                    "R": 0.3333333333333333,
                    "F1": 0.3333333333333333,
@@ -59,7 +59,7 @@ from nerval import evaluate
            pytest.lazy_fixture("nested_bio"),
            pytest.lazy_fixture("nested_bio"),
            {
-                "All": {
+                ALL_ENTITIES: {
                    "P": 1.0,
                    "R": 1.0,
                    "F1": 1.0,
No results found