expand architecture

spozdn · Jan 3, 2024 · 49c377f · 49c377f
1 parent 003e354
commit 49c377f
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 4 deletions.
diff --git a/default_hypers/default_hypers.yaml b/default_hypers/default_hypers.yaml
@@ -22,6 +22,7 @@ ARCHITECTURAL_HYPERS:
   USE_BOND_ENERGIES: True
   USE_ADDITIONAL_SCALAR_ATTRIBUTES: False
   SCALAR_ATTRIBUTES_SIZE: None
+  TRANSFORMER_TYPE: PostLN # PostLN or PreLN
 
 
 FITTING_SCHEME:  

diff --git a/src/pet.py b/src/pet.py
@@ -90,7 +90,8 @@ def __init__(self, hypers, d_model, n_head,
         self.trans_layer = TransformerLayer(d_model=d_model, n_heads = n_head,
                                                 dim_feedforward = dim_feedforward,
                                                         dropout = dropout,
-                                                        activation = get_activation(hypers))
+                                                        activation = get_activation(hypers),
+                                                        transformer_type = hypers.TRANSFORMER_TYPE)
         self.trans = Transformer(self.trans_layer, 
                                                    num_layers=n_layers)
 

diff --git a/src/transformer.py b/src/transformer.py
@@ -49,11 +49,15 @@ def forward(self, x, multipliers = None):
 
 class TransformerLayer(torch.nn.Module):
     def __init__(self, d_model, n_heads, dim_feedforward = 512, dropout = 0.0,
-                 activation = F.silu):
+                 activation = F.silu, transformer_type = 'PostLN'):
 
         super(TransformerLayer, self).__init__()
         self.attention = AttentionBlock(d_model, n_heads, dropout = dropout) 
 
+        if transformer_type not in ['PostLN', 'PreLN']:
+            raise ValueError("unknown transformer type")
+        self.transformer_type = transformer_type
+
         self.norm_attention = nn.LayerNorm(d_model)
         self.norm_mlp = nn.LayerNorm(d_model)
         self.dropout = nn.Dropout(dropout)        
@@ -68,8 +72,12 @@ def __init__(self, d_model, n_heads, dim_feedforward = 512, dropout = 0.0,
 
 
     def forward(self, x, multipliers = None): 
-        x = self.norm_attention(x + self.dropout(self.attention(x, multipliers)))
-        x = self.norm_mlp(x + self.mlp(x))
+        if self.transformer_type == 'PostLN':
+            x = self.norm_attention(x + self.dropout(self.attention(x, multipliers)))
+            x = self.norm_mlp(x + self.mlp(x))
+        if self.transformer_type == 'PreLN':
+            x = x + self.dropout(self.attention(self.norm_attention(x), multipliers))
+            x = x + self.mlp(self.norm_mlp(x))
         return x
 
 class Transformer(torch.nn.Module):