Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atr/dan
1 result
Show changes
Commits on Source (6)
0.2.0-dev2
0.2.0-dev3
......@@ -2,16 +2,7 @@
import torch
from torch import relu, softmax
from torch.nn import (
LSTM,
Conv1d,
Dropout,
Embedding,
LayerNorm,
Linear,
Module,
ModuleList,
)
from torch.nn import Conv1d, Dropout, Embedding, LayerNorm, Linear, Module, ModuleList
from torch.nn.init import xavier_uniform_
......@@ -314,14 +305,9 @@ class FeaturesUpdater(Module):
self.pe_2d = PositionalEncoding2D(
params["enc_dim"], params["h_max"], params["w_max"], params["device"]
)
self.use_2d_positional_encoding = (
"use_2d_pe" not in params or params["use_2d_pe"]
)
def get_pos_features(self, features):
if self.use_2d_positional_encoding:
return self.pe_2d(features)
return features
return self.pe_2d(features)
class GlobalHTADecoder(Module):
......@@ -335,8 +321,6 @@ class GlobalHTADecoder(Module):
self.dec_att_win = (
params["attention_win"] if params["attention_win"] is not None else 1
)
self.use_1d_pe = "use_1d_pe" not in params or params["use_1d_pe"]
self.use_lstm = params["use_lstm"]
self.features_updater = FeaturesUpdater(params)
self.att_decoder = GlobalAttDecoder(params)
......@@ -348,9 +332,6 @@ class GlobalHTADecoder(Module):
params["enc_dim"], params["l_max"], params["device"]
)
if self.use_lstm:
self.lstm_predict = LSTM(params["enc_dim"], params["enc_dim"])
vocab_size = params["vocab_size"] + 1
self.end_conv = Conv1d(params["enc_dim"], vocab_size, kernel_size=1)
......@@ -374,9 +355,7 @@ class GlobalHTADecoder(Module):
pos_tokens = self.emb(tokens).permute(0, 2, 1)
# Add 1D Positional Encoding
if self.use_1d_pe:
pos_tokens = self.pe_1d(pos_tokens, start=start)
pos_tokens = pos_tokens.permute(2, 0, 1)
pos_tokens = self.pe_1d(pos_tokens, start=start).permute(2, 0, 1)
if num_pred is None:
num_pred = tokens.size(1)
......@@ -426,9 +405,6 @@ class GlobalHTADecoder(Module):
keep_all_weights=keep_all_weights,
)
if self.use_lstm:
output, hidden_predict = self.lstm_predict(output, hidden_predict)
dp_output = self.dropout(relu(output))
preds = self.end_conv(dp_output.permute(1, 2, 0))
......
......@@ -92,9 +92,7 @@ class FCN_Encoder(Module):
self.init_blocks = ModuleList(
[
ConvBlock(
params["input_channels"], 16, stride=(1, 1), dropout=self.dropout
),
ConvBlock(3, 16, stride=(1, 1), dropout=self.dropout),
ConvBlock(16, 32, stride=(2, 2), dropout=self.dropout),
ConvBlock(32, 64, stride=(2, 2), dropout=self.dropout),
ConvBlock(64, 128, stride=(2, 2), dropout=self.dropout),
......
......@@ -522,9 +522,7 @@ class GenericTrainingManager:
self.save_params()
# init variables
self.begin_time = time()
focus_metric_name = self.params["training_params"]["focus_metric"]
nb_epochs = self.params["training_params"]["max_nb_epochs"]
interval_save_weights = self.params["training_params"]["interval_save_weights"]
metric_names = self.params["training_params"]["train_metrics"]
display_values = None
......@@ -643,25 +641,9 @@ class GenericTrainingManager:
)
if valid_set_name == self.params["training_params"][
"set_name_focus_metric"
] and (
self.best is None
or (
eval_values[focus_metric_name] <= self.best
and self.params["training_params"][
"expected_metric_value"
]
== "low"
)
or (
eval_values[focus_metric_name] >= self.best
and self.params["training_params"][
"expected_metric_value"
]
== "high"
)
):
] and (self.best is None or eval_values["cer"] <= self.best):
self.save_model(epoch=num_epoch, name="best")
self.best = eval_values[focus_metric_name]
self.best = eval_values["cer"]
# Handle curriculum learning update
if self.dataset.train_dataset.curriculum_config:
......@@ -676,8 +658,6 @@ class GenericTrainingManager:
# save model weights
if self.is_master:
self.save_model(epoch=num_epoch, name="last")
if interval_save_weights and num_epoch % interval_save_weights == 0:
self.save_model(epoch=num_epoch, name="weights", keep_weights=True)
self.writer.flush()
def evaluate(self, set_name, mlflow_logging=False, **kwargs):
......
......@@ -138,7 +138,6 @@ def get_config():
},
"transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model
"additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset
"input_channels": 3, # number of channels of input image
"dropout": 0.5, # dropout rate for encoder
"enc_dim": 256, # dimension of extracted features
"nb_layers": 5, # encoder
......@@ -151,9 +150,6 @@ def get_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
"attention_win": 100, # length of attention window
# Curriculum dropout
"dropout_scheduler": {
......@@ -168,7 +164,6 @@ def get_config():
* 24
* 1.9, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel
"ddp_port": "20027",
......@@ -186,8 +181,6 @@ def get_config():
"lr_schedulers": None, # Learning rate schedulers
"eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not
"eval_on_valid_interval": 5, # Interval (in epochs) to evaluate during training
"focus_metric": "cer", # Metrics to focus on to determine best epoch
"expected_metric_value": "low", # ["high", "low"] What is best for the focus metric value
"set_name_focus_metric": "{}-val".format(
dataset_name
), # Which dataset to focus on to select best weights
......
......@@ -51,15 +51,12 @@ version: 0.0.1
parameters:
max_char_prediction: int
encoder:
input_channels: int
dropout: float
decoder:
enc_dim: int
l_max: int
dec_pred_dropout: float
attention_win: int
use_1d_pe: bool
use_lstm: bool
vocab_size: int
h_max: int
w_max: int
......
......@@ -123,7 +123,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `model_params.transfer_learning.decoder` | Model to load for the decoder [state_dict_name, checkpoint_path, learnable, strict]. | `list` | `["encoder", "pretrained_models/dan_rimes_page.pt", True, False]` |
| `model_params.transfered_charset` | Transfer learning of the decision layer based on charset of the model to transfer. | `bool` | `True` |
| `model_params.additional_tokens` | For decision layer = [<eot>, ], only for transferred charset. | `int` | `1` |
| `model_params.input_channels` | Number of channels of input image. | `int` | `3` |
| `model_params.dropout` | Dropout probability in the encoder. | `float` | `0.5` |
| `model_params.enc_dim` | Dimension of features extracted by the encoder. | `int` | `256` |
| `model_params.nb_layers` | Number of layers in the encoder. | `int` | `5` |
......@@ -136,9 +135,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `model_params.dec_pred_dropout` | Dropout rate before decision layer. | `float` | `0.1` |
| `model_params.dec_att_dropout` | Dropout rate in multi head attention. | `float` | `0.1` |
| `model_params.dec_dim_feedforward` | Number of dimensions for feedforward layer in transformer decoder layers. | `int` | `256` |
| `model_params.use_2d_pe` | Whether to use 2D positional embedding. | `bool` | `True` |
| `model_params.use_1d_pe` | Whether to use 1D positional embedding. | `bool` | `True` |
| `model_params.use_lstm` | Whether to use a LSTM layer in the decoder. | `bool` | `False` |
| `model_params.attention_win` | Length of attention window. | `int` | `100` |
| `model_params.dropout_scheduler.function` | Curriculum dropout scheduler. | custom class | `exponential_dropout_scheduler` |
| `model_params.dropout_scheduler.T` | Exponential factor. | `float` | `5e4` |
......@@ -152,7 +148,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `training_params.max_nb_epochs` | Maximum number of epochs before stopping training. | `int` | `800` |
| `training_params.max_training_time` | Maximum time (in seconds) before stopping training. | `int` | `164160` |
| `training_params.load_epoch` | Model to load. Should be either `"best"` (evaluation) or `last` (training). | `str` | `"last"` |
| `training_params.interval_save_weights` | Step to save weights. Set to `None` to keep only best and last epochs. | `int` | `None` |
| `training_params.batch_size` | Mini-batch size for the training loop. | `int` | `2` |
| `training_params.use_ddp` | Whether to use DistributedDataParallel. | `bool` | `False` |
| `training_params.ddp_port` | DDP port. | `int` | `20027` |
......@@ -164,8 +159,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `training_params.lr_schedulers` | Learning rate schedulers. | custom class | `None` |
| `training_params.eval_on_valid` | Whether to evaluate and log metrics on the validation set during training. | `bool` | `True` |
| `training_params.eval_on_valid_interval` | Interval (in epochs) to evaluate during training. | `int` | `5` |
| `training_params.focus_metric` | Metrics to focus on to determine best epoch. | `str` | `cer` |
| `training_params.expected_metric_value` | Best value for the focus metric. Should be either `"high"` or `"low"`. | `low` | `cer` |
| `training_params.set_name_focus_metric` | Dataset to focus on to select best weights. | `str` | |
| `training_params.train_metrics` | List of metrics to compute during training. | `list` | `["loss_ce", "cer", "wer", "wer_no_punct"]` |
| `training_params.eval_metrics` | List of metrics to compute during validation. | `list` | `["cer", "wer", "wer_no_punct"]` |
......
......@@ -86,7 +86,6 @@ def training_config():
"transfer_learning": None,
"transfered_charset": True, # Transfer learning of the decision layer based on charset of the line HTR model
"additional_tokens": 1, # for decision layer = [<eot>, ], only for transferred charset
"input_channels": 3, # number of channels of input image
"dropout": 0.5, # dropout rate for encoder
"enc_dim": 256, # dimension of extracted features
"nb_layers": 5, # encoder
......@@ -99,9 +98,6 @@ def training_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"use_lstm": False,
"attention_win": 100, # length of attention window
# Curriculum dropout
"dropout_scheduler": {
......@@ -114,7 +110,6 @@ def training_config():
"max_nb_epochs": 4, # maximum number of epochs before to stop
"max_training_time": 1200, # maximum time before to stop (in seconds)
"load_epoch": "last", # ["best", "last"]: last to continue training, best to evaluate
"interval_save_weights": None, # None: keep best and last only
"batch_size": 2, # mini-batch size for training
"use_ddp": False, # Use DistributedDataParallel
"nb_gpu": 0,
......@@ -130,8 +125,6 @@ def training_config():
"lr_schedulers": None, # Learning rate schedulers
"eval_on_valid": True, # Whether to eval and logs metrics on validation set during training or not
"eval_on_valid_interval": 2, # Interval (in epochs) to evaluate during training
"focus_metric": "cer", # Metrics to focus on to determine best epoch
"expected_metric_value": "low", # ["high", "low"] What is best for the focus metric value
"set_name_focus_metric": "training-val", # Which dataset to focus on to select best weights
"train_metrics": [
"loss_ce",
......
......@@ -3,15 +3,12 @@ version: 0.0.1
parameters:
max_char_prediction: 200
encoder:
input_channels: 3
dropout: 0.5
decoder:
enc_dim: 256
l_max: 15000
dec_pred_dropout: 0.1
attention_win: 100
use_1d_pe: True
use_lstm: False
vocab_size: 96
h_max: 500
w_max: 1000
......