From 6f1d8643680ac9de1d0e6d4f747611a3d41fff03 Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Thu, 21 Dec 2023 16:42:27 +0100
Subject: [PATCH] Update max_char_prediction

---
 configs/eval.json                    |  2 +-
 tests/data/evaluate/metrics_table.md | 10 ++++-----
 tests/test_evaluate.py               | 32 ++++++++++++++--------------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/configs/eval.json b/configs/eval.json
index d018cc3f..d93920aa 100644
--- a/configs/eval.json
+++ b/configs/eval.json
@@ -19,7 +19,7 @@
                 ["training", "test"]
             ]
         },
-        "max_char_prediction": 30,
+        "max_char_prediction": 200,
         "tokens": "tests/data/prediction/tokens.yml"
     },
     "model": {
diff --git a/tests/data/evaluate/metrics_table.md b/tests/data/evaluate/metrics_table.md
index a34b0f41..d67456d8 100644
--- a/tests/data/evaluate/metrics_table.md
+++ b/tests/data/evaluate/metrics_table.md
@@ -1,5 +1,5 @@
-| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) |  NER  |
-|:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:-----:|
-| train |     36.67     |   38.16   |      40.0     |    40.0   |        40.0        | 28.57 |
-|  val  |     20.59     |   30.77   |      37.5     |    37.5   |        37.5        |  0.0  |
-|  test |     22.22     |   26.67   |     42.86     |   42.86   |       42.86        |  0.0  |
+| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER  |
+|:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:----:|
+| train |     18.89     |   21.05   |     26.67     |   26.67   |       26.67        | 7.14 |
+|  val  |      8.82     |   11.54   |      50.0     |    50.0   |        50.0        | 0.0  |
+|  test |      2.78     |    3.33   |     14.29     |   14.29   |       14.29        | 0.0  |
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 56a7f8b9..98fbd938 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -53,45 +53,45 @@ def test_add_metrics_table_row():
         (
             {
                 "nb_chars": 90,
-                "cer": 0.3667,
+                "cer": 0.1889,
                 "nb_chars_no_token": 76,
-                "cer_no_token": 0.3816,
+                "cer_no_token": 0.2105,
                 "nb_words": 15,
-                "wer": 0.4,
+                "wer": 0.2667,
                 "nb_words_no_punct": 15,
-                "wer_no_punct": 0.4,
+                "wer_no_punct": 0.2667,
                 "nb_words_no_token": 15,
-                "wer_no_token": 0.4,
+                "wer_no_token": 0.2667,
                 "nb_tokens": 14,
-                "ner": 0.2857,
+                "ner": 0.0714,
                 "nb_samples": 2,
             },
             {
                 "nb_chars": 34,
-                "cer": 0.2059,
+                "cer": 0.0882,
                 "nb_chars_no_token": 26,
-                "cer_no_token": 0.3077,
+                "cer_no_token": 0.1154,
                 "nb_words": 8,
-                "wer": 0.375,
+                "wer": 0.5,
                 "nb_words_no_punct": 8,
-                "wer_no_punct": 0.375,
+                "wer_no_punct": 0.5,
                 "nb_words_no_token": 8,
-                "wer_no_token": 0.375,
+                "wer_no_token": 0.5,
                 "nb_tokens": 8,
                 "ner": 0.0,
                 "nb_samples": 1,
             },
             {
                 "nb_chars": 36,
-                "cer": 0.2222,
+                "cer": 0.0278,
                 "nb_chars_no_token": 30,
-                "cer_no_token": 0.2667,
+                "cer_no_token": 0.0333,
                 "nb_words": 7,
-                "wer": 0.4286,
+                "wer": 0.1429,
                 "nb_words_no_punct": 7,
-                "wer_no_punct": 0.4286,
+                "wer_no_punct": 0.1429,
                 "nb_words_no_token": 7,
-                "wer_no_token": 0.4286,
+                "wer_no_token": 0.1429,
                 "nb_tokens": 6,
                 "ner": 0.0,
                 "nb_samples": 1,
-- 
GitLab