Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atr/dan
1 result
Show changes
Commits on Source (6)
0.2.0-dev2 0.2.0-dev3
...@@ -2,16 +2,7 @@ ...@@ -2,16 +2,7 @@
import torch import torch
from torch import relu, softmax from torch import relu, softmax
from torch.nn import ( from torch.nn import Conv1d, Dropout, Embedding, LayerNorm, Linear, Module, ModuleList
LSTM,
Conv1d,
Dropout,
Embedding,
LayerNorm,
Linear,
Module,
ModuleList,
)
from torch.nn.init import xavier_uniform_ from torch.nn.init import xavier_uniform_
...@@ -314,14 +305,9 @@ class FeaturesUpdater(Module): ...@@ -314,14 +305,9 @@ class FeaturesUpdater(Module):
self.pe_2d = PositionalEncoding2D( self.pe_2d = PositionalEncoding2D(
params["enc_dim"], params["h_max"], params["w_max"], params["device"] params["enc_dim"], params["h_max"], params["w_max"], params["device"]
) )
self.use_2d_positional_encoding = (
"use_2d_pe" not in params or params["use_2d_pe"]
)
def get_pos_features(self, features): def get_pos_features(self, features):
if self.use_2d_positional_encoding: return self.pe_2d(features)
return self.pe_2d(features)
return features
class GlobalHTADecoder(Module): class GlobalHTADecoder(Module):
...@@ -335,8 +321,6 @@ class GlobalHTADecoder(Module): ...@@ -335,8 +321,6 @@ class GlobalHTADecoder(Module):
self.dec_att_win = ( self.dec_att_win = (
params["attention_win"] if params["attention_win"] is not None else 1 params["attention_win"] if params["attention_win"] is not None else 1
) )
self.use_1d_pe = "use_1d_pe" not in params or params["use_1d_pe"]
self.use_lstm = params["use_lstm"]
self.features_updater = FeaturesUpdater(params) self.features_updater = FeaturesUpdater(params)
self.att_decoder = GlobalAttDecoder(params) self.att_decoder = GlobalAttDecoder(params)
...@@ -348,9 +332,6 @@ class GlobalHTADecoder(Module): ...@@ -348,9 +332,6 @@ class GlobalHTADecoder(Module):
params["enc_dim"], params["l_max"], params["device"] params["enc_dim"], params["l_max"], params["device"]
) )
if self.use_lstm:
self.lstm_predict = LSTM(params["enc_dim"], params["enc_dim"])
vocab_size = params["vocab_size"] + 1 vocab_size = params["vocab_size"] + 1
self.end_conv = Conv1d(params["enc_dim"], vocab_size, kernel_size=1) self.end_conv = Conv1d(params["enc_dim"], vocab_size, kernel_size=1)
...@@ -374,9 +355,7 @@ class GlobalHTADecoder(Module): ...@@ -374,9 +355,7 @@ class GlobalHTADecoder(Module):
pos_tokens = self.emb(tokens).permute(0, 2, 1) pos_tokens = self.emb(tokens).permute(0, 2, 1)
# Add 1D Positional Encoding # Add 1D Positional Encoding
if self.use_1d_pe: pos_tokens = self.pe_1d(pos_tokens, start=start).permute(2, 0, 1)
pos_tokens = self.pe_1d(pos_tokens, start=start)
pos_tokens = pos_tokens.permute(2, 0, 1)
if num_pred is None: if num_pred is None:
num_pred = tokens.size(1) num_pred = tokens.size(1)
...@@ -426,9 +405,6 @@ class GlobalHTADecoder(Module): ...@@ -426,9 +405,6 @@ class GlobalHTADecoder(Module):
keep_all_weights=keep_all_weights, keep_all_weights=keep_all_weights,
) )
if self.use_lstm:
output, hidden_predict = self.lstm_predict(output, hidden_predict)
dp_output = self.dropout(relu(output)) dp_output = self.dropout(relu(output))
preds = self.end_conv(dp_output.permute(1, 2, 0)) preds = self.end_conv(dp_output.permute(1, 2, 0))
......
...@@ -92,9 +92,7 @@ class FCN_Encoder(Module): ...@@ -92,9 +92,7 @@ class FCN_Encoder(Module):
self.init_blocks = ModuleList( self.init_blocks = ModuleList(
[ [
ConvBlock( ConvBlock(3, 16, stride=(1, 1), dropout=self.dropout),
params["input_channels"], 16, stride=(1, 1), dropout=self.dropout
),
ConvBlock(16, 32, stride=(2, 2), dropout=self.dropout), ConvBlock(16, 32, stride=(2, 2), dropout=self.dropout),
ConvBlock(32, 64, stride=(2, 2), dropout=self.dropout), ConvBlock(32, 64, stride=(2, 2), dropout=self.dropout),
ConvBlock(64, 128, stride=(2, 2), dropout=self.dropout), ConvBlock(64, 128, stride=(2, 2), dropout=self.dropout),
......
...@@ -522,9 +522,7 @@ class GenericTrainingManager: ...@@ -522,9 +522,7 @@ class GenericTrainingManager:
self.save_params() self.save_params()
# init variables # init variables
self.begin_time = time() self.begin_time = time()
focus_metric_name = self.params["training_params"]["focus_metric"]
nb_epochs = self.params["training_params"]["max_nb_epochs"] nb_epochs = self.params["training_params"]["max_nb_epochs"]
interval_save_weights = self.params["training_params"]["interval_save_weights"]
metric_names = self.params["training_params"]["train_metrics"] metric_names = self.params["training_params"]["train_metrics"]
display_values = None display_values = None
...@@ -643,25 +641,9 @@ class GenericTrainingManager: ...@@ -643,25 +641,9 @@ class GenericTrainingManager:
) )
if valid_set_name == self.params["training_params"][ if valid_set_name == self.params["training_params"][
"set_name_focus_metric" "set_name_focus_metric"
] and ( ] and (self.best is None or eval_values["cer"] <= self.best):
self.best is None
or (
eval_values[focus_metric_name] <= self.best
and self.params["training_params"][
"expected_metric_value"
]
== "low"
)
or (
eval_values[focus_metric_name] >= self.best
and self.params["training_params"][
"expected_metric_value"
]
== "high"
)
):
self.save_model(epoch=num_epoch, name="best") self.save_model(epoch=num_epoch, name="best")
self.best = eval_values[focus_metric_name] self.best = eval_values["cer"]
# Handle curriculum learning update # Handle curriculum learning update
if self.dataset.train_dataset.curriculum_config: if self.dataset.train_dataset.curriculum_config:
...@@ -676,8 +658,6 @@ class GenericTrainingManager: ...@@ -676,8 +658,6 @@ class GenericTrainingManager:
# save model weights # save model weights
if self.is_master: if self.is_master:
self.save_model(epoch=num_epoch, name="last") self.save_model(epoch=num_epoch, name="last")
if interval_save_weights and num_epoch % interval_save_weights == 0:
self.save_model(epoch=num_epoch, name="weights", keep_weights=True)
self.writer.flush() self.writer.flush()
def evaluate(self, set_name, mlflow_logging=False, **kwargs): def evaluate(self, set_name, mlflow_logging=False, **kwargs):
......
...@@ -138,7 +138,6 @@ def get_config(): ...@@ -138,7 +138,6 @@ def get_config():
}, },
"transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model "transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model
"additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset "additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset
"input_channels": 3, # number of channels of input image
"dropout": 0.5, # dropout rate for encoder "dropout": 0.5, # dropout rate for encoder
"enc_dim": 256, # dimension of extracted features "enc_dim": 256, # dimension of extracted features
"nb_layers": 5, # encoder "nb_layers": 5, # encoder
...@@ -151,9 +150,6 @@ def get_config(): ...@@ -151,9 +150,6 @@ def get_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer "dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention "dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers "dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
"attention_win": 100, # length of attention window "attention_win": 100, # length of attention window
# Curriculum dropout # Curriculum dropout
"dropout_scheduler": { "dropout_scheduler": {
...@@ -168,7 +164,6 @@ def get_config(): ...@@ -168,7 +164,6 @@ def get_config():
* 24 * 24
* 1.9, # maximum time before to stop (in seconds) * 1.9, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training "batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel "use_ddp": False, # Use DistributedDataParallel
"ddp_port": "20027", "ddp_port": "20027",
...@@ -186,8 +181,6 @@ def get_config(): ...@@ -186,8 +181,6 @@ def get_config():
"lr_schedulers": None, # Learning rate schedulers "lr_schedulers": None, # Learning rate schedulers
"eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not "eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not
"eval_on_valid_interval": 5, # Interval (in epochs) to evaluate during training "eval_on_valid_interval": 5, # Interval (in epochs) to evaluate during training
"focus_metric": "cer", # Metrics to focus on to determine best epoch
"expected_metric_value": "low", # ["high", "low"] What is best for the focus metric value
"set_name_focus_metric": "{}-val".format( "set_name_focus_metric": "{}-val".format(
dataset_name dataset_name
), # Which dataset to focus on to select best weights ), # Which dataset to focus on to select best weights
......
...@@ -51,15 +51,12 @@ version: 0.0.1 ...@@ -51,15 +51,12 @@ version: 0.0.1
parameters: parameters:
max_char_prediction: int max_char_prediction: int
encoder: encoder:
input_channels: int
dropout: float dropout: float
decoder: decoder:
enc_dim: int enc_dim: int
l_max: int l_max: int
dec_pred_dropout: float dec_pred_dropout: float
attention_win: int attention_win: int
use_1d_pe: bool
use_lstm: bool
vocab_size: int vocab_size: int
h_max: int h_max: int
w_max: int w_max: int
......
...@@ -123,7 +123,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -123,7 +123,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `model_params.transfer_learning.decoder` | Model to load for the decoder [state_dict_name, checkpoint_path, learnable, strict]. | `list` | `["encoder", "pretrained_models/dan_rimes_page.pt", True, False]` | | `model_params.transfer_learning.decoder` | Model to load for the decoder [state_dict_name, checkpoint_path, learnable, strict]. | `list` | `["encoder", "pretrained_models/dan_rimes_page.pt", True, False]` |
| `model_params.transfered_charset` | Transfer learning of the decision layer based on charset of the model to transfer. | `bool` | `True` | | `model_params.transfered_charset` | Transfer learning of the decision layer based on charset of the model to transfer. | `bool` | `True` |
| `model_params.additional_tokens` | For decision layer = [<eot>, ], only for transferred charset. | `int` | `1` | | `model_params.additional_tokens` | For decision layer = [<eot>, ], only for transferred charset. | `int` | `1` |
| `model_params.input_channels` | Number of channels of input image. | `int` | `3` |
| `model_params.dropout` | Dropout probability in the encoder. | `float` | `0.5` | | `model_params.dropout` | Dropout probability in the encoder. | `float` | `0.5` |
| `model_params.enc_dim` | Dimension of features extracted by the encoder. | `int` | `256` | | `model_params.enc_dim` | Dimension of features extracted by the encoder. | `int` | `256` |
| `model_params.nb_layers` | Number of layers in the encoder. | `int` | `5` | | `model_params.nb_layers` | Number of layers in the encoder. | `int` | `5` |
...@@ -136,9 +135,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -136,9 +135,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `model_params.dec_pred_dropout` | Dropout rate before decision layer. | `float` | `0.1` | | `model_params.dec_pred_dropout` | Dropout rate before decision layer. | `float` | `0.1` |
| `model_params.dec_att_dropout` | Dropout rate in multi head attention. | `float` | `0.1` | | `model_params.dec_att_dropout` | Dropout rate in multi head attention. | `float` | `0.1` |
| `model_params.dec_dim_feedforward` | Number of dimensions for feedforward layer in transformer decoder layers. | `int` | `256` | | `model_params.dec_dim_feedforward` | Number of dimensions for feedforward layer in transformer decoder layers. | `int` | `256` |
| `model_params.use_2d_pe` | Whether to use 2D positional embedding. | `bool` | `True` |
| `model_params.use_1d_pe` | Whether to use 1D positional embedding. | `bool` | `True` |
| `model_params.use_lstm` | Whether to use a LSTM layer in the decoder. | `bool` | `False` |
| `model_params.attention_win` | Length of attention window. | `int` | `100` | | `model_params.attention_win` | Length of attention window. | `int` | `100` |
| `model_params.dropout_scheduler.function` | Curriculum dropout scheduler. | custom class | `exponential_dropout_scheduler` | | `model_params.dropout_scheduler.function` | Curriculum dropout scheduler. | custom class | `exponential_dropout_scheduler` |
| `model_params.dropout_scheduler.T` | Exponential factor. | `float` | `5e4` | | `model_params.dropout_scheduler.T` | Exponential factor. | `float` | `5e4` |
...@@ -152,7 +148,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -152,7 +148,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` | | `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` |
| `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` | | `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` |
| `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` | | `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` |
| `training_params.interval_save_weights` | Step to save weights. Set to `None` to keep only best and last epochs. | `int` | `None` |
| `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` | | `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` |
| `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` | | `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` |
| `training_params.ddp_port` | DDP port. | `int` | `20027` | | `training_params.ddp_port` | DDP port. | `int` | `20027` |
...@@ -164,8 +159,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -164,8 +159,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `training_params.lr_schedulers` | Learning rate schedulers. | custom class | `None` | | `training_params.lr_schedulers` | Learning rate schedulers. | custom class | `None` |
| `training_params.eval_on_valid` | Whether to evaluate and log metrics on the validation set during training. | `bool` | `True` | | `training_params.eval_on_valid` | Whether to evaluate and log metrics on the validation set during training. | `bool` | `True` |
| `training_params.eval_on_valid_interval` | Interval (in epochs) to evaluate during training. | `int` | `5` | | `training_params.eval_on_valid_interval` | Interval (in epochs) to evaluate during training. | `int` | `5` |
| `training_params.focus_metric` | Metrics to focus on to determine best epoch. | `str` | `cer` |
| `training_params.expected_metric_value` | Best value for the focus metric. Should be either `"high"` or `"low"`. | `low` | `cer` |
| `training_params.set_name_focus_metric` | Dataset to focus on to select best weights. | `str` | | | `training_params.set_name_focus_metric` | Dataset to focus on to select best weights. | `str` | |
| `training_params.train_metrics` | List of metrics to compute during training. | `list` | `["loss_ce", "cer", "wer", "wer_no_punct"]` | | `training_params.train_metrics` | List of metrics to compute during training. | `list` | `["loss_ce", "cer", "wer", "wer_no_punct"]` |
| `training_params.eval_metrics` | List of metrics to compute during validation. | `list` | `["cer", "wer", "wer_no_punct"]` | | `training_params.eval_metrics` | List of metrics to compute during validation. | `list` | `["cer", "wer", "wer_no_punct"]` |
......
...@@ -86,7 +86,6 @@ def training_config(): ...@@ -86,7 +86,6 @@ def training_config():
"transfer_learning": None, "transfer_learning": None,
"transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model "transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model
"additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset "additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset
"input_channels": 3, # number of channels of input image
"dropout": 0.5, # dropout rate for encoder "dropout": 0.5, # dropout rate for encoder
"enc_dim": 256, # dimension of extracted features "enc_dim": 256, # dimension of extracted features
"nb_layers": 5, # encoder "nb_layers": 5, # encoder
...@@ -99,9 +98,6 @@ def training_config(): ...@@ -99,9 +98,6 @@ def training_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer "dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention "dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers "dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
"attention_win": 100, # length of attention window "attention_win": 100, # length of attention window
# Curriculum dropout # Curriculum dropout
"dropout_scheduler": { "dropout_scheduler": {
...@@ -114,7 +110,6 @@ def training_config(): ...@@ -114,7 +110,6 @@ def training_config():
"max_nb_epochs": 4, # maximum number of epochs before to stop "max_nb_epochs": 4, # maximum number of epochs before to stop
"max_training_time": 1200, # maximum time before to stop (in seconds) "max_training_time": 1200, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate "load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training "batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel "use_ddp": False, # Use DistributedDataParallel
"nb_gpu": 0, "nb_gpu": 0,
...@@ -130,8 +125,6 @@ def training_config(): ...@@ -130,8 +125,6 @@ def training_config():
"lr_schedulers": None, # Learning rate schedulers "lr_schedulers": None, # Learning rate schedulers
"eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not "eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not
"eval_on_valid_interval": 2, # Interval (in epochs) to evaluate during training "eval_on_valid_interval": 2, # Interval (in epochs) to evaluate during training
"focus_metric": "cer", # Metrics to focus on to determine best epoch
"expected_metric_value": "low", # ["high", "low"] What is best for the focus metric value
"set_name_focus_metric": "training-val", # Which dataset to focus on to select best weights "set_name_focus_metric": "training-val", # Which dataset to focus on to select best weights
"train_metrics": [ "train_metrics": [
"loss_ce", "loss_ce",
......
...@@ -3,15 +3,12 @@ version: 0.0.1 ...@@ -3,15 +3,12 @@ version: 0.0.1
parameters: parameters:
max_char_prediction: 200 max_char_prediction: 200
encoder: encoder:
input_channels: 3
dropout: 0.5 dropout: 0.5
decoder: decoder:
enc_dim: 256 enc_dim: 256
l_max: 15000 l_max: 15000
dec_pred_dropout: 0.1 dec_pred_dropout: 0.1
attention_win: 100 attention_win: 100
use_1d_pe: True
use_lstm: False
vocab_size: 96 vocab_size: 96
h_max: 500 h_max: 500
w_max: 1000 w_max: 1000
......