From c559d8efb26d60f687870787b868941f7282315b Mon Sep 17 00:00:00 2001 From: Manon blanco <blanco@teklia.com> Date: Tue, 13 Jun 2023 06:20:36 +0000 Subject: [PATCH] Remove normalize parameter from training configuration --- dan/manager/ocr.py | 5 ++--- dan/ocr/document/train.py | 1 - docs/usage/train/parameters.md | 1 - tests/conftest.py | 1 - 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index d1562780..b70b9364 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -112,9 +112,8 @@ class OCRDataset(GenericDataset): ) sample["img"] = cv2.resize(sample["img"], (new_w, new_h)) - # Normalization if requested - if "normalize" in self.params["config"] and self.params["config"]["normalize"]: - sample["img"] = (sample["img"] - self.mean) / self.std + # Normalization + sample["img"] = (sample["img"] - self.mean) / self.std sample["img_reduced_shape"] = np.ceil( sample["img"].shape / self.reduce_dims_factor diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index 4745fb97..a3bbba55 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -113,7 +113,6 @@ def get_config(): "add_eot", "add_sot", ], # add end-of-transcription and start-of-transcription tokens in labels - "normalize": True, # Normalize with mean and variance of training dataset "preprocessings": [ { "type": "to_RGB", diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index 7447e082..9c82931b 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -19,7 +19,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `["add_eot", "add_sot"]` | -| `dataset_params.config.normalize` | Normalize with mean and variance of training dataset. | `bool` | `True` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | | `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) | diff --git a/tests/conftest.py b/tests/conftest.py index 7febae99..854de3c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -76,7 +76,6 @@ def training_config(): "add_eot", "add_sot", ], # add end-of-transcription and start-of-transcription tokens in labels - "normalize": True, # Normalize with mean and variance of training dataset "preprocessings": [ { "type": "to_RGB", -- GitLab