diff --git a/dan/manager/metrics.py b/dan/manager/metrics.py index 565f307211ab31eff18eda16abdeae4d4514bb57..abb8c166dad8afa8b68fa4ff724b1820324f71e1 100644 --- a/dan/manager/metrics.py +++ b/dan/manager/metrics.py @@ -149,7 +149,6 @@ class MetricManager: ) elif metric_name in [ "loss", - "loss_ctc", "loss_ce", ]: value = float( @@ -219,7 +218,6 @@ class MetricManager: ] metrics["nb_words_no_punct"] = [len(gt) for gt in split_gt] elif metric_name in [ - "loss_ctc", "loss_ce", "loss", ]: diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index bd4640731aafa905b05b596233e917e1971a7209..45d851c3bb558796c29db980a8d84380d61c6908 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -14,7 +14,6 @@ from dan.ocr.utils import LM_str_to_ind from dan.utils import ( pad_image, pad_image_width_random, - pad_image_width_right, pad_images, pad_sequences_1D, rand, @@ -45,19 +44,12 @@ class OCRDatasetManager(DatasetManager): self.tokens = { "pad": params["config"]["padding_token"], } - if self.params["config"]["charset_mode"].lower() == "ctc": - self.tokens["blank"] = len(self.charset) - self.tokens["pad"] = ( - self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 1 - ) - self.params["config"]["padding_token"] = self.tokens["pad"] - elif self.params["config"]["charset_mode"] == "seq2seq": - self.tokens["end"] = len(self.charset) - self.tokens["start"] = len(self.charset) + 1 - self.tokens["pad"] = ( - self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2 - ) - self.params["config"]["padding_token"] = self.tokens["pad"] + self.tokens["end"] = len(self.charset) + self.tokens["start"] = len(self.charset) + 1 + self.tokens["pad"] = ( + self.tokens["pad"] if self.tokens["pad"] else len(self.charset) + 2 + ) + self.params["config"]["padding_token"] = self.tokens["pad"] def get_merged_charsets(self): """ @@ -161,37 +153,7 @@ class OCRDataset(GenericDataset): sample["img_shape"] / self.reduce_dims_factor ).astype(int) - # Padding to handle CTC requirements if self.set_name == "train": - max_label_len = 0 - height = 1 - ctc_padding = False - if "CTC_line" in self.params["config"]["constraints"]: - max_label_len = sample["label_len"] - ctc_padding = True - if "CTC_va" in self.params["config"]["constraints"]: - max_label_len = max(sample["line_label_len"]) - ctc_padding = True - if "CTC_pg" in self.params["config"]["constraints"]: - max_label_len = sample["label_len"] - height = max(sample["img_reduced_shape"][0], 1) - ctc_padding = True - if ( - ctc_padding - and 2 * max_label_len + 1 > sample["img_reduced_shape"][1] * height - ): - sample["img"] = pad_image_width_right( - sample["img"], - int( - np.ceil((2 * max_label_len + 1) / height) - * self.reduce_dims_factor[1] - ), - self.padding_value, - ) - sample["img_shape"] = sample["img"].shape - sample["img_reduced_shape"] = np.ceil( - sample["img_shape"] / self.reduce_dims_factor - ).astype(int) sample["img_reduced_shape"] = [ max(1, t) for t in sample["img_reduced_shape"] ] diff --git a/dan/manager/training.py b/dan/manager/training.py index 876e788c23c3ec785319535674e3af985790b4a7..53012a7c22166f5461bc4f55d840b5b191dc8078 100644 --- a/dan/manager/training.py +++ b/dan/manager/training.py @@ -341,12 +341,6 @@ class GenericTrainingManager: if c in old_charset: new_weights[i] = weights[old_charset.index(c)] pretrained_chars.append(c) - if ( - "transfered_charset_last_is_ctc_blank" in self.params["model_params"] - and self.params["model_params"]["transfered_charset_last_is_ctc_blank"] - ): - new_weights[-1] = weights[-1] - pretrained_chars.append("<blank>") checkpoint["{}_state_dict".format(state_dict_name)][key] = new_weights self.models[model_name].load_state_dict( {key: checkpoint["{}_state_dict".format(state_dict_name)][key]}, @@ -903,7 +897,6 @@ class GenericTrainingManager: "nb_samples", "loss", "loss_ce", - "loss_ctc", "loss_ce_end", ]: metrics[metric_name] = self.sum_ddp_metric( diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index 5a5ebd006e166f1f3c7b872bd2a68dc7b9cbe10a..81660d56e6807f1b84f79fab08f7268ae91441f4 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -114,7 +114,6 @@ def get_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "charset_mode": "seq2seq", # add end-of-transcription and start-of-transcription tokens to charset "constraints": [ "add_eot", "add_sot", diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index dacfd9dd07bc1f97666cf79b5d4df2e5340cc71f..4e3fa785c501273e6963536a5e53854fe1e045e2 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -18,7 +18,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | -| `dataset_params.config.charset_mode` | Whether to add end-of-transcription and start-of-transcription tokens to charset. | `str` | `seq2seq` | | `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `["add_eot", "add_sot"]` | | `dataset_params.config.normalize` | Normalize with mean and variance of training dataset. | `bool` | `True` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | diff --git a/tests/conftest.py b/tests/conftest.py index 777bdda54579cf43ca1e91c7dd85200781a96d64..9d0860f0db8ef0ce617458be26d867808e3637e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,7 +72,6 @@ def training_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "charset_mode": "seq2seq", # add end-of-transcription and start-of-transcription tokens to charset "constraints": [ "add_eot", "add_sot",