Skip to content
Snippets Groups Projects
Commit a93bc055 authored by Manon Blanco's avatar Manon Blanco
Browse files

Remove the remove_linebreaks parameter from training configuration

parent 0c2612fd
No related branches found
No related tags found
1 merge request!165Remove the remove_linebreaks parameter from training configuration
...@@ -44,11 +44,6 @@ class OCRDatasetManager(DatasetManager): ...@@ -44,11 +44,6 @@ class OCRDatasetManager(DatasetManager):
for key in datasets.keys(): for key in datasets.keys():
with open(os.path.join(datasets[key], "charset.pkl"), "rb") as f: with open(os.path.join(datasets[key], "charset.pkl"), "rb") as f:
charset = charset.union(set(pickle.load(f))) charset = charset.union(set(pickle.load(f)))
if (
"\n" in charset
and "remove_linebreaks" in self.params["config"]["constraints"]
):
charset.remove("\n")
if "" in charset: if "" in charset:
charset.remove("") charset.remove("")
return sorted(list(charset)) return sorted(list(charset))
...@@ -167,13 +162,9 @@ class OCRDataset(GenericDataset): ...@@ -167,13 +162,9 @@ class OCRDataset(GenericDataset):
def convert_sample_labels(self, sample): def convert_sample_labels(self, sample):
label = sample["label"] label = sample["label"]
if "remove_linebreaks" in self.params["config"]["constraints"]:
full_label = label.replace("\n", " ").replace(" ", " ")
else:
full_label = label
sample["label"] = full_label sample["label"] = label
sample["token_label"] = token_to_ind(self.charset, full_label) sample["token_label"] = token_to_ind(self.charset, label)
sample["token_label"].append(self.tokens["end"]) sample["token_label"].append(self.tokens["end"])
sample["label_len"] = len(sample["token_label"]) sample["label_len"] = len(sample["token_label"])
sample["token_label"].insert(0, self.tokens["start"]) sample["token_label"].insert(0, self.tokens["start"])
......
...@@ -109,7 +109,6 @@ def get_config(): ...@@ -109,7 +109,6 @@ def get_config():
"height_divisor": 32, # Image height will be divided by 32 "height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value "padding_value": 0, # Image padding value
"padding_token": None, # Label padding value "padding_token": None, # Label padding value
"constraints": [],
"preprocessings": [ "preprocessings": [
{ {
"type": "to_RGB", "type": "to_RGB",
......
...@@ -18,7 +18,6 @@ All hyperparameters are specified and editable in the training scripts (meaning ...@@ -18,7 +18,6 @@ All hyperparameters are specified and editable in the training scripts (meaning
| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` | | `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` |
| `dataset_params.config.padding_value` | Image padding value. | `int` | `0` | | `dataset_params.config.padding_value` | Image padding value. | `int` | `0` |
| `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` | | `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` |
| `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `[]` |
| `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) | | `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) |
| `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) | | `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) |
......
...@@ -72,7 +72,6 @@ def training_config(): ...@@ -72,7 +72,6 @@ def training_config():
"height_divisor": 32, # Image height will be divided by 32 "height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value "padding_value": 0, # Image padding value
"padding_token": None, # Label padding value "padding_token": None, # Label padding value
"constraints": [],
"preprocessings": [ "preprocessings": [
{ {
"type": "to_RGB", "type": "to_RGB",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment