Skip to content
Snippets Groups Projects
Verified Commit dc482e24 authored by Mélodie Boillet's avatar Mélodie Boillet
Browse files

Apply cb58891e

parent 20dbc910
No related branches found
No related tags found
No related merge requests found
...@@ -524,7 +524,6 @@ class GenericTrainingManager: ...@@ -524,7 +524,6 @@ class GenericTrainingManager:
self.begin_time = time() self.begin_time = time()
focus_metric_name = self.params["training_params"]["focus_metric"] focus_metric_name = self.params["training_params"]["focus_metric"]
nb_epochs = self.params["training_params"]["max_nb_epochs"] nb_epochs = self.params["training_params"]["max_nb_epochs"]
interval_save_weights = self.params["training_params"]["interval_save_weights"]
metric_names = self.params["training_params"]["train_metrics"] metric_names = self.params["training_params"]["train_metrics"]
display_values = None display_values = None
...@@ -676,8 +675,6 @@ class GenericTrainingManager: ...@@ -676,8 +675,6 @@ class GenericTrainingManager:
# save model weights # save model weights
if self.is_master: if self.is_master:
self.save_model(epoch=num_epoch, name="last") self.save_model(epoch=num_epoch, name="last")
if interval_save_weights and num_epoch % interval_save_weights == 0:
self.save_model(epoch=num_epoch, name="weights", keep_weights=True)
self.writer.flush() self.writer.flush()
def evaluate(self, set_name, mlflow_logging=False, **kwargs): def evaluate(self, set_name, mlflow_logging=False, **kwargs):
......
...@@ -164,7 +164,6 @@ def get_config(): ...@@ -164,7 +164,6 @@ def get_config():
* 24 * 24
* 1.9, # maximum time before to stop (in seconds) * 1.9, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training "batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel "use_ddp": False, # Use DistributedDataParallel
"ddp_port": "20027", "ddp_port": "20027",
......
...@@ -157,7 +157,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -157,7 +157,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` | | `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` |
| `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` | | `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` |
| `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` | | `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` |
| `training_params.interval_save_weights` | Step to save weights. Set to `None` to keep only best and last epochs. | `int` | `None` |
| `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` | | `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` |
| `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` | | `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` |
| `training_params.ddp_port` | DDP port. | `int` | `20027` | | `training_params.ddp_port` | DDP port. | `int` | `20027` |
......
...@@ -110,7 +110,6 @@ def training_config(): ...@@ -110,7 +110,6 @@ def training_config():
"max_nb_epochs": 4, # maximum number of epochs before to stop "max_nb_epochs": 4, # maximum number of epochs before to stop
"max_training_time": 1200, # maximum time before to stop (in seconds) "max_training_time": 1200, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training "batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel "use_ddp": False, # Use DistributedDataParallel
"nb_gpu": 0, "nb_gpu": 0,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment