Skip to content
Snippets Groups Projects
Commit 8fd4c5ba authored by Mélodie's avatar Mélodie
Browse files

Apply a93bc055

parent 36ac733b
No related branches found
No related tags found
No related merge requests found
......@@ -44,11 +44,6 @@ class OCRDatasetManager(DatasetManager):
for key in datasets.keys():
with open(os.path.join(datasets[key], "charset.pkl"), "rb") as f:
charset = charset.union(set(pickle.load(f)))
if (
"\n" in charset
and "remove_linebreaks" in self.params["config"]["constraints"]
):
charset.remove("\n")
if "" in charset:
charset.remove("")
return sorted(list(charset))
......@@ -167,13 +162,9 @@ class OCRDataset(GenericDataset):
def convert_sample_labels(self, sample):
label = sample["label"]
if "remove_linebreaks" in self.params["config"]["constraints"]:
full_label = label.replace("\n", " ").replace(" ", " ")
else:
full_label = label
sample["label"] = full_label
sample["token_label"] = token_to_ind(self.charset, full_label)
sample["label"] = label
sample["token_label"] = token_to_ind(self.charset, label)
sample["token_label"].append(self.tokens["end"])
sample["label_len"] = len(sample["token_label"])
sample["token_label"].insert(0, self.tokens["start"])
......
......@@ -109,7 +109,6 @@ def get_config():
"height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value
"padding_token": None, # Label padding value
"constraints": [],
"preprocessings": [
{
"type": "to_RGB",
......
......@@ -18,7 +18,6 @@ All hyperparameters are specified and editable in the training scripts (meaning
| `dataset_params.config.width_divisor` | Factor to reduce the height of the feature vector before feeding the decoder. | `int` | `32` |
| `dataset_params.config.padding_value` | Image padding value. | `int` | `0` |
| `dataset_params.config.padding_token` | Transcription padding value. | `int` | `None` |
| `dataset_params.config.constraints` | Whether to add end-of-transcription and start-of-transcription tokens in labels. | `list` | `[]` |
| `dataset_params.config.preprocessings` | List of pre-processing functions to apply to input images. | `list` | (see [dedicated section](#data-preprocessing)) |
| `dataset_params.config.augmentation` | Configuration for data augmentation. | `dict` | (see [dedicated section](#data-augmentation)) |
......
......@@ -72,7 +72,6 @@ def training_config():
"height_divisor": 32, # Image height will be divided by 32
"padding_value": 0, # Image padding value
"padding_token": None, # Label padding value
"constraints": [],
"preprocessings": [
{
"type": "to_RGB",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment