From c8d4cf6e9d1ed9f62327e9a57d8e5b250ae8554a Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Tue, 9 Jan 2024 17:10:09 +0100
Subject: [PATCH] Add a message to assertion errors

---
 nerval/evaluate.py | 14 +++++++-------
 nerval/parse.py    | 10 +++++++---
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/nerval/evaluate.py b/nerval/evaluate.py
index e05d6a1..ce27e99 100644
--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
@@ -58,10 +58,10 @@ def compute_matches(
 
     Output : {TAG1 : nb_entity_matched, ...}, example : {'All': 1, 'OCC': 0, 'PER': 1}
     """
-    assert annotation
-    assert prediction
-    assert labels_annot
-    assert labels_predict
+    assert annotation, "Annotation is empty"
+    assert prediction, "Prediction is empty"
+    assert labels_annot, "Annotation labels are empty"
+    assert labels_predict, "Prediction labels are empty"
 
     entity_count = {"All": 0}
     last_tag = NOT_ENTITY_TAG
@@ -190,9 +190,9 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li
     Output format :
         list of strings
     """
-    assert original
-    assert aligned
-    assert labels_original
+    assert original, "Original is empty"
+    assert aligned, "Aligned is empty"
+    assert labels_original, "Original labels are empty"
 
     labels_aligned = []
     index_original = 0
diff --git a/nerval/parse.py b/nerval/parse.py
index 040d9fc..fda2b66 100644
--- a/nerval/parse.py
+++ b/nerval/parse.py
@@ -42,7 +42,7 @@ def parse_line(index: int, line: str):
     try:
         match_iob = REGEX_IOB_LINE.search(line)
 
-        assert match_iob
+        assert match_iob, f"Line {line} does not match IOB regex"
 
         return match_iob.group(1, 2)
     except AssertionError:
@@ -149,10 +149,14 @@ def parse_bio(lines: List[str]) -> dict:
         result["labels"] = labels
         result["entity_count"] = entity_count
 
-        assert len(result["words"]) == len(result["labels"])
+        assert len(result["words"]) == len(
+            result["labels"]
+        ), f'Found {len(result["words"])} word(s) for {len(result["labels"])} label(s)'
         for tag in result["entity_count"]:
             if tag != "All":
-                assert result["labels"].count(f"B-{tag}") == result["entity_count"][tag]
+                assert (
+                    result["labels"].count(f"B-{tag}") == result["entity_count"][tag]
+                ), f'Found {result["entity_count"][tag]} entities for {result["labels"].count(f"B-{tag}")} label(s) for entity {tag}'
 
     return result
 
-- 
GitLab