diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 533d074cc72495b10f38a72b492a00ca4f7a1d06..fdcd54a0edec39510d7d31d823407c0317b7ac1a 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -77,9 +77,8 @@ class OCRDataset(GenericDataset): super(OCRDataset, self).__init__(params, set_name, custom_name, paths_and_sets) self.charset = None self.tokens = None - self.reduce_dims_factor = np.array( - [params["config"]["height_divisor"], params["config"]["width_divisor"], 1] - ) + # Factor to reduce the height and width of the feature vector before feeding the decoder. + self.reduce_dims_factor = np.array([32, 8, 1]) self.collate_function = OCRCollateFunction def __getitem__(self, idx): diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index d0a5fc479236911debe3ac5cfc3cf57d120e5d33..b1ae6bb5904024b598438419cd4487e2202de366 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -105,8 +105,6 @@ def get_config(): "config": { "load_in_memory": True, # Load all images in CPU memory "worker_per_gpu": 4, # Num of parallel processes per gpu for data loading - "width_divisor": 8, # Image width will be divided by 8 - "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value "preprocessings": [ diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index 8d97ae637cb1a47bd467f07f1f6b4ae743d9f714..03d334f4a0476966d70a1a9357be45dfdb6fa91e 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -14,8 +14,6 @@ All hyperparameters are specified and editable in the training scripts (meaning | `dataset_params.config.datasets` | Dataset dictionary with the dataset name as key and dataset path as value. | `dict` | | | `dataset_params.config.load_in_memory` | Load all images in CPU memory. | `str` | `True` | | `dataset_params.config.worker_per_gpu` | Number of parallel processes per gpu for data loading. | `int` | `4` | -| `dataset_params.config.height_divisor` | Factor to reduce the width of the feature vector before feeding the decoder. | `int` | `8` | -| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | diff --git a/tests/conftest.py b/tests/conftest.py index e660869c0f5c0fb80b79a2d8f133ee246e66da52..4d34584724c5a1ef2ecb519201af77738b7c80ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,8 +68,6 @@ def training_config(): }, "config": { "load_in_memory": True, # Load all images in CPU memory - "width_divisor": 8, # Image width will be divided by 8 - "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value "preprocessings": [