From b73056176d5b3236a729f25957b96a7c16143d41 Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Mon, 12 Jun 2023 17:00:42 +0200
Subject: [PATCH] Remove add_eot and add_sot parameters from training
 configuration

---
 dan/manager/ocr.py        | 6 ++----
 dan/ocr/document/train.py | 5 +----
 tests/conftest.py         | 5 +----
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py
index 3d8ebdd4..30ca845f 100644
--- a/dan/manager/ocr.py
+++ b/dan/manager/ocr.py
@@ -174,11 +174,9 @@ class OCRDataset(GenericDataset):
 
         sample["label"] = full_label
         sample["token_label"] = token_to_ind(self.charset, full_label)
-        if "add_eot" in self.params["config"]["constraints"]:
-            sample["token_label"].append(self.tokens["end"])
+        sample["token_label"].append(self.tokens["end"])
         sample["label_len"] = len(sample["token_label"])
-        if "add_sot" in self.params["config"]["constraints"]:
-            sample["token_label"].insert(0, self.tokens["start"])
+        sample["token_label"].insert(0, self.tokens["start"])
         return sample
 
 
diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py
index a3bbba55..7d755e71 100644
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -109,10 +109,7 @@ def get_config():
                 "height_divisor": 32,  # Image height will be divided by 32
                 "padding_value": 0,  # Image padding value
                 "padding_token": None,  # Label padding value
-                "constraints": [
-                    "add_eot",
-                    "add_sot",
-                ],  # add end-of-transcription and start-of-transcription tokens in labels
+                "constraints": [],
                 "preprocessings": [
                     {
                         "type": "to_RGB",
diff --git a/tests/conftest.py b/tests/conftest.py
index 854de3c0..fb83a186 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -72,10 +72,7 @@ def training_config():
                 "height_divisor": 32,  # Image height will be divided by 32
                 "padding_value": 0,  # Image padding value
                 "padding_token": None,  # Label padding value
-                "constraints": [
-                    "add_eot",
-                    "add_sot",
-                ],  # add end-of-transcription and start-of-transcription tokens in labels
+                "constraints": [],
                 "preprocessings": [
                     {
                         "type": "to_RGB",
-- 
GitLab