From c559d8efb26d60f687870787b868941f7282315b Mon Sep 17 00:00:00 2001
From: Manon blanco <blanco@teklia.com>
Date: Tue, 13 Jun 2023 06:20:36 +0000
Subject: [PATCH] Remove normalize parameter from training configuration

---
 dan/manager/ocr.py             | 5 ++---
 dan/ocr/document/train.py      | 1 -
 docs/usage/train/parameters.md | 1 -
 tests/conftest.py              | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py
index d1562780..b70b9364 100644
--- a/dan/manager/ocr.py
+++ b/dan/manager/ocr.py
@@ -112,9 +112,8 @@ class OCRDataset(GenericDataset):
                 )
                 sample["img"] = cv2.resize(sample["img"], (new_w, new_h))
 
-        # Normalization if requested
-        if "normalize" in self.params["config"] and self.params["config"]["normalize"]:
-            sample["img"] = (sample["img"] - self.mean) / self.std
+        # Normalization
+        sample["img"] = (sample["img"] - self.mean) / self.std
 
         sample["img_reduced_shape"] = np.ceil(
             sample["img"].shape / self.reduce_dims_factor
diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py
index 4745fb97..a3bbba55 100644
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -113,7 +113,6 @@ def get_config():
                     "add_eot",
                     "add_sot",
                 ],  # add end-of-transcription and start-of-transcription tokens in labels
-                "normalize": True,  # Normalize with mean and variance of training dataset
                 "preprocessings": [
                     {
                         "type": "to_RGB",
diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md
index 7447e082..9c82931b 100644
--- a/docs/usage/train/parameters.md
+++ b/docs/usage/train/parameters.md
@@ -19,7 +19,6 @@ All hyperparameters are specified and editable in the training scripts (meaning
 | `dataset_params.config.padding_value`   | Image padding value.                                                                   | `int`        | `0`                                            |
 | `dataset_params.config.padding_token`   | Transcription padding value.                                                           | `int`        | `None`                                         |
 | `dataset_params.config.constraints`     | Whether to add end-of-transcription and start-of-transcription tokens in labels.       | `list`       | `["add_eot", "add_sot"]`                       |
-| `dataset_params.config.normalize`       | Normalize with mean and variance of training dataset.                                  | `bool`       | `True`                                         |
 | `dataset_params.config.preprocessings`  | List of pre-processing functions to apply to input images.                             | `list`       | (see [dedicated section](#data-preprocessing)) |
 | `dataset_params.config.augmentation`    | Configuration for data augmentation.                                                   | `dict`       | (see [dedicated section](#data-augmentation))  |
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 7febae99..854de3c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -76,7 +76,6 @@ def training_config():
                     "add_eot",
                     "add_sot",
                 ],  # add end-of-transcription and start-of-transcription tokens in labels
-                "normalize": True,  # Normalize with mean and variance of training dataset
                 "preprocessings": [
                     {
                         "type": "to_RGB",
-- 
GitLab