diff --git a/dan/manager/training.py b/dan/manager/training.py
index f231e372e6cf5c76c84281da03fc2051f31c6fb2..1eba86706aafc091c556eee92901271b548e7598 100644
--- a/dan/manager/training.py
+++ b/dan/manager/training.py
@@ -524,7 +524,6 @@ class GenericTrainingManager:
         self.begin_time = time()
         focus_metric_name = self.params["training_params"]["focus_metric"]
         nb_epochs = self.params["training_params"]["max_nb_epochs"]
-        interval_save_weights = self.params["training_params"]["interval_save_weights"]
         metric_names = self.params["training_params"]["train_metrics"]
 
         display_values = None
@@ -676,8 +675,6 @@ class GenericTrainingManager:
             # save model weights
             if self.is_master:
                 self.save_model(epoch=num_epoch, name="last")
-                if interval_save_weights and num_epoch % interval_save_weights == 0:
-                    self.save_model(epoch=num_epoch, name="weights", keep_weights=True)
                 self.writer.flush()
 
     def evaluate(self, set_name, mlflow_logging=False, **kwargs):
diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py
index 97cae76166000d011095d5b50d58c38263b272b0..d4c8330acb0c0437f1f9dbfd7a8a4b902b8901f1 100644
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -164,7 +164,6 @@ def get_config():
             * 24
             * 1.9,  # maximum time before to stop (in seconds)
             "load_epoch": "last",  # ["best", "last"]: last to continue training, best to evaluate
-            "interval_save_weights": None,  # None: keep best and last only
             "batch_size": 2,  # mini-batch size for training
             "use_ddp": False,  # Use DistributedDataParallel
             "ddp_port": "20027",
diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md
index b8dbef3cec08c564dc5fc848d82ee23eb4675605..bf0632a96c1d2a510eda0dddc3e6bf9aa0cdaa1b 100644
--- a/docs/usage/train/parameters.md
+++ b/docs/usage/train/parameters.md
@@ -148,7 +148,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
 | `training_params.max_nb_epochs`                         | Maximum number of epochs before stopping training.                          | `int`        | `800`                                       |
 | `training_params.max_training_time`                     | Maximum time (in seconds) before stopping training.                         | `int`        | `164160`                                    |
 | `training_params.load_epoch`                            | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str`        | `"last"`                                    |
-| `training_params.interval_save_weights`                 | Step to save weights. Set to `None` to keep only best and last epochs.      | `int`        | `None`                                      |
 | `training_params.batch_size`                            | Mini-batch size for the training loop.                                      | `int`        | `2`                                         |
 | `training_params.use_ddp`                               | Whether to use DistributedDataParallel.                                     | `bool`       | `False`                                     |
 | `training_params.ddp_port`                              | DDP port.                                                                   | `int`        | `20027`                                     |
diff --git a/tests/conftest.py b/tests/conftest.py
index 4a365be7b90aefcab64fcf500da664ccea4c0b20..d85d11b41ddf81049a5c02b08e3600b87237167f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -110,7 +110,6 @@ def training_config():
             "max_nb_epochs": 4,  # maximum number of epochs before to stop
             "max_training_time": 1200,  # maximum time before to stop (in seconds)
             "load_epoch": "last",  # ["best", "last"]: last to continue training, best to evaluate
-            "interval_save_weights": None,  # None: keep best and last only
             "batch_size": 2,  # mini-batch size for training
             "use_ddp": False,  # Use DistributedDataParallel
             "nb_gpu": 0,