From 435fbdd5b6ff0214549609574e899ad428983a7b Mon Sep 17 00:00:00 2001 From: Manon blanco <blanco@teklia.com> Date: Tue, 13 Jun 2023 10:01:22 +0000 Subject: [PATCH] Remove width_divisor and height_divisor parameters from training configuration --- dan/manager/ocr.py | 5 ++--- dan/ocr/document/train.py | 2 -- docs/usage/train/parameters.md | 2 -- tests/conftest.py | 2 -- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 533d074c..fdcd54a0 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -77,9 +77,8 @@ class OCRDataset(GenericDataset): super(OCRDataset, self).__init__(params, set_name, custom_name, paths_and_sets) self.charset = None self.tokens = None - self.reduce_dims_factor = np.array( - [params["config"]["height_divisor"], params["config"]["width_divisor"], 1] - ) + # Factor to reduce the height and width of the feature vector before feeding the decoder. + self.reduce_dims_factor = np.array([32, 8, 1]) self.collate_function = OCRCollateFunction def __getitem__(self, idx): diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index d0a5fc47..b1ae6bb5 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -105,8 +105,6 @@ def get_config(): "config": { "load_in_memory": True, # Load all images in CPU memory "worker_per_gpu": 4, # Num of parallel processes per gpu for data loading - "width_divisor": 8, # Image width will be divided by 8 - "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value "preprocessings": [ diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index 8d97ae63..03d334f4 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -14,8 +14,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.datasets` | Dataset dictionary with the dataset name as key and dataset path as value. | `dict` | | | `dataset_params.config.load_in_memory` | Load all images in CPU memory. | `str` | `True` | | `dataset_params.config.worker_per_gpu` | Number of parallel processes per gpu for data loading. | `int` | `4` | -| `dataset_params.config.height_divisor` | Factor to reduce the width of the feature vector before feeding the decoder. | `int` | `8` | -| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | diff --git a/tests/conftest.py b/tests/conftest.py index e660869c..4d345847 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,8 +68,6 @@ def training_config(): }, "config": { "load_in_memory": True, # Load all images in CPU memory - "width_divisor": 8, # Image width will be divided by 8 - "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value "preprocessings": [ -- GitLab