From e01751a4df836018bbf2eb05481433d2e36d488e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9lodie=20Boillet?= <boillet@teklia.com> Date: Wed, 24 May 2023 13:48:27 +0200 Subject: [PATCH] Remove ctc mode --- dan/manager/metrics.py | 2 -- dan/manager/ocr.py | 19 ++++++------------- dan/manager/training.py | 7 ------- dan/ocr/document/train.py | 1 - docs/usage/train/parameters.md | 1 - tests/conftest.py | 1 - 6 files changed, 6 insertions(+), 25 deletions(-) diff --git a/dan/manager/metrics.py b/dan/manager/metrics.py index ecb94a86..1f78f505 100644 --- a/dan/manager/metrics.py +++ b/dan/manager/metrics.py @@ -150,7 +150,6 @@ class MetricManager: ) elif metric_name in [ "loss", - "loss_ctc", "loss_ce", ]: value = float( @@ -220,7 +219,6 @@ class MetricManager: ] metrics["nb_words_no_punct"] = [len(gt) for gt in split_gt] elif metric_name in [ - "loss_ctc", "loss_ce", "loss", ]: diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index bd464073..ec98755a 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -45,19 +45,12 @@ class OCRDatasetManager(DatasetManager): self.tokens = { "pad": params["config"]["padding_token"], } - if self.params["config"]["charset_mode"].lower() == "ctc": - self.tokens["blank"] = len(self.charset) - self.tokens["pad"] = ( - self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 1 - ) - self.params["config"]["padding_token"] = self.tokens["pad"] - elif self.params["config"]["charset_mode"] == "seq2seq": - self.tokens["end"] = len(self.charset) - self.tokens["start"] = len(self.charset) + 1 - self.tokens["pad"] = ( - self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2 - ) - self.params["config"]["padding_token"] = self.tokens["pad"] + self.tokens["end"] = len(self.charset) + self.tokens["start"] = len(self.charset) + 1 + self.tokens["pad"] = ( + self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2 + ) + self.params["config"]["padding_token"] = self.tokens["pad"] def get_merged_charsets(self): """ diff --git a/dan/manager/training.py b/dan/manager/training.py index 5821d2e8..1f18aace 100644 --- a/dan/manager/training.py +++ b/dan/manager/training.py @@ -343,12 +343,6 @@ class GenericTrainingManager: if c in old_charset: new_weights[i] = weights[old_charset.index(c)] pretrained_chars.append(c) - if ( - "transfered_charset_last_is_ctc_blank" in self.params["model_params"] - and self.params["model_params"]["transfered_charset_last_is_ctc_blank"] - ): - new_weights[-1] = weights[-1] - pretrained_chars.append("<blank>") checkpoint["{}_state_dict".format(state_dict_name)][key] = new_weights self.models[model_name].load_state_dict( {key: checkpoint["{}_state_dict".format(state_dict_name)][key]}, @@ -920,7 +914,6 @@ class GenericTrainingManager: "nb_samples", "loss", "loss_ce", - "loss_ctc", "loss_ce_end", ]: metrics[metric_name] = self.sum_ddp_metric( diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index 5a5ebd00..81660d56 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -114,7 +114,6 @@ def get_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "charset_mode": "seq2seq", # add end-of-transcription and start-of-transcription tokens to charset "constraints": [ "add_eot", "add_sot", diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index dacfd9dd..4e3fa785 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -18,7 +18,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | -| `dataset_params.config.charset_mode` | Whether to add end-of-transcription and start-of-transcription tokens to charset. | `str` | `seq2seq` | | `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `["add_eot", "add_sot"]` | | `dataset_params.config.normalize` | Normalize with mean and variance of training dataset. | `bool` | `True` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | diff --git a/tests/conftest.py b/tests/conftest.py index 777bdda5..9d0860f0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,7 +72,6 @@ def training_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "charset_mode": "seq2seq", # add end-of-transcription and start-of-transcription tokens to charset "constraints": [ "add_eot", "add_sot", -- GitLab