diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 3d8ebdd489abdd26ea53cea5ddb05fc520eb4e40..30ca845ff3aa57b3a2a0b21957f4e54f2f53d3bb 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -174,11 +174,9 @@ class OCRDataset(GenericDataset): sample["label"] = full_label sample["token_label"] = token_to_ind(self.charset, full_label) - if "add_eot" in self.params["config"]["constraints"]: - sample["token_label"].append(self.tokens["end"]) + sample["token_label"].append(self.tokens["end"]) sample["label_len"] = len(sample["token_label"]) - if "add_sot" in self.params["config"]["constraints"]: - sample["token_label"].insert(0, self.tokens["start"]) + sample["token_label"].insert(0, self.tokens["start"]) return sample diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index a3bbba555dd526e70c6e1929a3c09fc5b12f0c5b..7d755e7112ca319e81eaf87cd8da9b562477e852 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -109,10 +109,7 @@ def get_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "constraints": [ - "add_eot", - "add_sot", - ], # add end-of-transcription and start-of-transcription tokens in labels + "constraints": [], "preprocessings": [ { "type": "to_RGB", diff --git a/tests/conftest.py b/tests/conftest.py index 854de3c0528b2df1cdecd53fbfd3e7b9e37f5092..fb83a1867028d2f6428079549a7e9dd5f311075a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,10 +72,7 @@ def training_config(): "height_divisor": 32, # Image height will be divided by 32 "padding_value": 0, # Image padding value "padding_token": None, # Label padding value - "constraints": [ - "add_eot", - "add_sot", - ], # add end-of-transcription and start-of-transcription tokens in labels + "constraints": [], "preprocessings": [ { "type": "to_RGB",