Skip to content

Commit

Permalink
fixed neox attention_adapters
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesujoba Alabi committed Apr 4, 2023
1 parent f920882 commit 034dea1
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/transformers/models/gpt_neox/modeling_gpt_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,11 @@ def forward(
# pseudocode:
# x = x + attn(ln1(x))
# x = x + mlp(ln2(x))
attn_output = attn_output + hidden_states
mlp_output = self.mlp(self.post_attention_layernorm(attn_output))
hidden_states = self.attention_adapters(attn_output, hidden_states, None) #attn_output = attn_output + hidden_states
residual = hidden_states
mlp_output = self.mlp(self.post_attention_layernorm(hidden_states))
# residual connection
hidden_states = self.output_adapters(mlp_output, attn_output, None)
hidden_states = self.output_adapters(mlp_output, residual, None)
#hidden_states = mlp_output + attn_output

if use_cache:
Expand Down

0 comments on commit 034dea1

Please sign in to comment.