From 6f1d8643680ac9de1d0e6d4f747611a3d41fff03 Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Thu, 21 Dec 2023 16:42:27 +0100 Subject: [PATCH] Update max_char_prediction --- configs/eval.json | 2 +- tests/data/evaluate/metrics_table.md | 10 ++++----- tests/test_evaluate.py | 32 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/configs/eval.json b/configs/eval.json index d018cc3f..d93920aa 100644 --- a/configs/eval.json +++ b/configs/eval.json @@ -19,7 +19,7 @@ ["training", "test"] ] }, - "max_char_prediction": 30, + "max_char_prediction": 200, "tokens": "tests/data/prediction/tokens.yml" }, "model": { diff --git a/tests/data/evaluate/metrics_table.md b/tests/data/evaluate/metrics_table.md index a34b0f41..d67456d8 100644 --- a/tests/data/evaluate/metrics_table.md +++ b/tests/data/evaluate/metrics_table.md @@ -1,5 +1,5 @@ -| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER | -|:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:-----:| -| train | 36.67 | 38.16 | 40.0 | 40.0 | 40.0 | 28.57 | -| val | 20.59 | 30.77 | 37.5 | 37.5 | 37.5 | 0.0 | -| test | 22.22 | 26.67 | 42.86 | 42.86 | 42.86 | 0.0 | +| Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER | +|:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:----:| +| train | 18.89 | 21.05 | 26.67 | 26.67 | 26.67 | 7.14 | +| val | 8.82 | 11.54 | 50.0 | 50.0 | 50.0 | 0.0 | +| test | 2.78 | 3.33 | 14.29 | 14.29 | 14.29 | 0.0 | diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 56a7f8b9..98fbd938 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -53,45 +53,45 @@ def test_add_metrics_table_row(): ( { "nb_chars": 90, - "cer": 0.3667, + "cer": 0.1889, "nb_chars_no_token": 76, - "cer_no_token": 0.3816, + "cer_no_token": 0.2105, "nb_words": 15, - "wer": 0.4, + "wer": 0.2667, "nb_words_no_punct": 15, - "wer_no_punct": 0.4, + "wer_no_punct": 0.2667, "nb_words_no_token": 15, - "wer_no_token": 0.4, + "wer_no_token": 0.2667, "nb_tokens": 14, - "ner": 0.2857, + "ner": 0.0714, "nb_samples": 2, }, { "nb_chars": 34, - "cer": 0.2059, + "cer": 0.0882, "nb_chars_no_token": 26, - "cer_no_token": 0.3077, + "cer_no_token": 0.1154, "nb_words": 8, - "wer": 0.375, + "wer": 0.5, "nb_words_no_punct": 8, - "wer_no_punct": 0.375, + "wer_no_punct": 0.5, "nb_words_no_token": 8, - "wer_no_token": 0.375, + "wer_no_token": 0.5, "nb_tokens": 8, "ner": 0.0, "nb_samples": 1, }, { "nb_chars": 36, - "cer": 0.2222, + "cer": 0.0278, "nb_chars_no_token": 30, - "cer_no_token": 0.2667, + "cer_no_token": 0.0333, "nb_words": 7, - "wer": 0.4286, + "wer": 0.1429, "nb_words_no_punct": 7, - "wer_no_punct": 0.4286, + "wer_no_punct": 0.1429, "nb_words_no_token": 7, - "wer_no_token": 0.4286, + "wer_no_token": 0.1429, "nb_tokens": 6, "ner": 0.0, "nb_samples": 1, -- GitLab