Skip to content
Snippets Groups Projects
Verified Commit f0a6e38c authored by Manon Blanco's avatar Manon Blanco Committed by Mélodie Boillet
Browse files

Always use 1D and 2D positional embedding

parent 6679f473
No related branches found
No related tags found
1 merge request!206Always use 1D and 2D positional embedding
...@@ -305,14 +305,9 @@ class FeaturesUpdater(Module): ...@@ -305,14 +305,9 @@ class FeaturesUpdater(Module):
self.pe_2d = PositionalEncoding2D( self.pe_2d = PositionalEncoding2D(
params["enc_dim"], params["h_max"], params["w_max"], params["device"] params["enc_dim"], params["h_max"], params["w_max"], params["device"]
) )
self.use_2d_positional_encoding = (
"use_2d_pe" not in params or params["use_2d_pe"]
)
def get_pos_features(self, features): def get_pos_features(self, features):
if self.use_2d_positional_encoding: return self.pe_2d(features)
return self.pe_2d(features)
return features
class GlobalHTADecoder(Module): class GlobalHTADecoder(Module):
...@@ -326,7 +321,6 @@ class GlobalHTADecoder(Module): ...@@ -326,7 +321,6 @@ class GlobalHTADecoder(Module):
self.dec_att_win = ( self.dec_att_win = (
params["attention_win"] if params["attention_win"] is not None else 1 params["attention_win"] if params["attention_win"] is not None else 1
) )
self.use_1d_pe = "use_1d_pe" not in params or params["use_1d_pe"]
self.features_updater = FeaturesUpdater(params) self.features_updater = FeaturesUpdater(params)
self.att_decoder = GlobalAttDecoder(params) self.att_decoder = GlobalAttDecoder(params)
...@@ -361,9 +355,7 @@ class GlobalHTADecoder(Module): ...@@ -361,9 +355,7 @@ class GlobalHTADecoder(Module):
pos_tokens = self.emb(tokens).permute(0, 2, 1) pos_tokens = self.emb(tokens).permute(0, 2, 1)
# Add 1D Positional Encoding # Add 1D Positional Encoding
if self.use_1d_pe: pos_tokens = self.pe_1d(pos_tokens, start=start).permute(2, 0, 1)
pos_tokens = self.pe_1d(pos_tokens, start=start)
pos_tokens = pos_tokens.permute(2, 0, 1)
if num_pred is None: if num_pred is None:
num_pred = tokens.size(1) num_pred = tokens.size(1)
......
...@@ -151,8 +151,6 @@ def get_config(): ...@@ -151,8 +151,6 @@ def get_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer "dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention "dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers "dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"attention_win": 100, # length of attention window "attention_win": 100, # length of attention window
# Curriculum dropout # Curriculum dropout
"dropout_scheduler": { "dropout_scheduler": {
......
...@@ -58,7 +58,6 @@ parameters: ...@@ -58,7 +58,6 @@ parameters:
l_max: int l_max: int
dec_pred_dropout: float dec_pred_dropout: float
attention_win: int attention_win: int
use_1d_pe: bool
vocab_size: int vocab_size: int
h_max: int h_max: int
w_max: int w_max: int
......
...@@ -136,8 +136,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa ...@@ -136,8 +136,6 @@ For a detailed description of all augmentation transforms, see the [dedicated pa
| `model_params.dec_pred_dropout` | Dropout rate before decision layer. | `float` | `0.1` | | `model_params.dec_pred_dropout` | Dropout rate before decision layer. | `float` | `0.1` |
| `model_params.dec_att_dropout` | Dropout rate in multi head attention. | `float` | `0.1` | | `model_params.dec_att_dropout` | Dropout rate in multi head attention. | `float` | `0.1` |
| `model_params.dec_dim_feedforward` | Number of dimensions for feedforward layer in transformer decoder layers. | `int` | `256` | | `model_params.dec_dim_feedforward` | Number of dimensions for feedforward layer in transformer decoder layers. | `int` | `256` |
| `model_params.use_2d_pe` | Whether to use 2D positional embedding. | `bool` | `True` |
| `model_params.use_1d_pe` | Whether to use 1D positional embedding. | `bool` | `True` |
| `model_params.attention_win` | Length of attention window. | `int` | `100` | | `model_params.attention_win` | Length of attention window. | `int` | `100` |
| `model_params.dropout_scheduler.function` | Curriculum dropout scheduler. | custom class | `exponential_dropout_scheduler` | | `model_params.dropout_scheduler.function` | Curriculum dropout scheduler. | custom class | `exponential_dropout_scheduler` |
| `model_params.dropout_scheduler.T` | Exponential factor. | `float` | `5e4` | | `model_params.dropout_scheduler.T` | Exponential factor. | `float` | `5e4` |
......
...@@ -99,8 +99,6 @@ def training_config(): ...@@ -99,8 +99,6 @@ def training_config():
"dec_pred_dropout": 0.1, # dropout rate before decision layer "dec_pred_dropout": 0.1, # dropout rate before decision layer
"dec_att_dropout": 0.1, # dropout rate in multi head attention "dec_att_dropout": 0.1, # dropout rate in multi head attention
"dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers "dec_dim_feedforward": 256, # number of dimension for feedforward layer in transformer decoder layers
"use_2d_pe": True, # use 2D positional embedding
"use_1d_pe": True, # use 1D positional embedding
"attention_win": 100, # length of attention window "attention_win": 100, # length of attention window
# Curriculum dropout # Curriculum dropout
"dropout_scheduler": { "dropout_scheduler": {
......
...@@ -10,7 +10,6 @@ parameters: ...@@ -10,7 +10,6 @@ parameters:
l_max: 15000 l_max: 15000
dec_pred_dropout: 0.1 dec_pred_dropout: 0.1
attention_win: 100 attention_win: 100
use_1d_pe: True
vocab_size: 96 vocab_size: 96
h_max: 500 h_max: 500
w_max: 1000 w_max: 1000
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment