From dc482e24fa1798ca910336fc5219941689ff897d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9lodie=20Boillet?= <boillet@teklia.com> Date: Thu, 3 Aug 2023 08:37:22 +0200 Subject: [PATCH] Apply cb58891e --- dan/manager/training.py | 3 --- dan/ocr/document/train.py | 1 - docs/usage/train/parameters.md | 1 - tests/conftest.py | 1 - 4 files changed, 6 deletions(-) diff --git a/dan/manager/training.py b/dan/manager/training.py index f231e372..1eba8670 100644 --- a/dan/manager/training.py +++ b/dan/manager/training.py @@ -524,7 +524,6 @@ class GenericTrainingManager: self.begin_time = time() focus_metric_name = self.params["training_params"]["focus_metric"] nb_epochs = self.params["training_params"]["max_nb_epochs"] - interval_save_weights = self.params["training_params"]["interval_save_weights"] metric_names = self.params["training_params"]["train_metrics"] display_values = None @@ -676,8 +675,6 @@ class GenericTrainingManager: # save model weights if self.is_master: self.save_model(epoch=num_epoch, name="last") - if interval_save_weights and num_epoch % interval_save_weights == 0: - self.save_model(epoch=num_epoch, name="weights", keep_weights=True) self.writer.flush() def evaluate(self, set_name, mlflow_logging=False, **kwargs): diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index 97cae761..d4c8330a 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -164,7 +164,6 @@ def get_config(): * 24 * 1.9, # maximum time before to stop (in seconds) "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate - "interval_save_weights": None, # None: keep best and last only "batch_size": 2, # mini-batch size for training "use_ddp": False, # Use DistributedDataParallel "ddp_port": "20027", diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index 5dd48610..478d410f 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -157,7 +157,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa | `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` | | `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` | | `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` | -| `training_params.interval_save_weights` | Step to save weights. Set to `None` to keep only best and last epochs. | `int` | `None` | | `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` | | `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` | | `training_params.ddp_port` | DDP port. | `int` | `20027` | diff --git a/tests/conftest.py b/tests/conftest.py index 4a365be7..d85d11b4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -110,7 +110,6 @@ def training_config(): "max_nb_epochs": 4, # maximum number of epochs before to stop "max_training_time": 1200, # maximum time before to stop (in seconds) "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate - "interval_save_weights": None, # None: keep best and last only "batch_size": 2, # mini-batch size for training "use_ddp": False, # Use DistributedDataParallel "nb_gpu": 0, -- GitLab