From 803610d39be7a1aaef5b7c25735509310344deed Mon Sep 17 00:00:00 2001 From: Manon blanco <blanco@teklia.com> Date: Tue, 25 Jul 2023 08:27:36 +0000 Subject: [PATCH] Always use the same max_training_time --- dan/manager/training.py | 9 --------- dan/ocr/document/train.py | 3 --- docs/usage/train/parameters.md | 1 - tests/conftest.py | 1 - 4 files changed, 14 deletions(-) diff --git a/dan/manager/training.py b/dan/manager/training.py index a3e1af66..7b73a487 100644 --- a/dan/manager/training.py +++ b/dan/manager/training.py @@ -33,7 +33,6 @@ class GenericTrainingManager: self.params = params self.dropout_scheduler = None self.models = {} - self.begin_time = None self.dataset = None self.dataset_name = list(self.params["dataset_params"]["datasets"].values())[0] self.paths = None @@ -534,7 +533,6 @@ class GenericTrainingManager: self.writer = SummaryWriter(self.paths["results"]) self.save_params() # init variables - self.begin_time = time() nb_epochs = self.params["training_params"]["max_nb_epochs"] metric_names = self.params["training_params"]["train_metrics"] @@ -547,13 +545,6 @@ class GenericTrainingManager: self.init_curriculum() # perform epochs for num_epoch in range(self.latest_epoch + 1, nb_epochs): - # Check maximum training time stop condition - if ( - self.params["training_params"]["max_training_time"] - and time() - self.begin_time - > self.params["training_params"]["max_training_time"] - ): - break # set models trainable for model_name in self.models.keys(): self.models[model_name].train() diff --git a/dan/ocr/document/train.py b/dan/ocr/document/train.py index fb0ccf39..b6146a94 100644 --- a/dan/ocr/document/train.py +++ b/dan/ocr/document/train.py @@ -160,9 +160,6 @@ def get_config(): "training_params": { "output_folder": "outputs/dan_esposalles_record", # folder name for checkpoint and results "max_nb_epochs": 800, # maximum number of epochs before to stop - "max_training_time": 3600 - * 24 - * 1.9, # maximum time before to stop (in seconds) "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "batch_size": 2, # mini-batch size for training "use_ddp": False, # Use DistributedDataParallel diff --git a/docs/usage/train/parameters.md b/docs/usage/train/parameters.md index c61cf81d..69cc44c2 100644 --- a/docs/usage/train/parameters.md +++ b/docs/usage/train/parameters.md @@ -145,7 +145,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa | ------------------------------------------------------- | --------------------------------------------------------------------------- | ------------ | ------------------------------------------- | | `training_params.output_folder` | Directory for checkpoint and results. | `str` | | | `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` | -| `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` | | `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` | | `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` | | `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` | diff --git a/tests/conftest.py b/tests/conftest.py index ffc0dccb..f65fcb74 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -108,7 +108,6 @@ def training_config(): "training_params": { "output_folder": "dan_trained_model", # folder name for checkpoint and results "max_nb_epochs": 4, # maximum number of epochs before to stop - "max_training_time": 1200, # maximum time before to stop (in seconds) "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "batch_size": 2, # mini-batch size for training "use_ddp": False, # Use DistributedDataParallel -- GitLab