diff --git a/models.py b/models.py index 00c1f74..3fe61b4 100644 --- a/models.py +++ b/models.py @@ -896,7 +896,7 @@ class PoolFormerBlock(nn.Module): # self.token_mixer = FNetBlock() self.window_size = 8 self.attn_mask = None - self.token_mixer = WindowAttention(dim=dim, window_size=to_2tuple(self.window_size), num_heads=8) + self.token_mixer = WindowAttention(dim=dim, window_size=to_2tuple(self.window_size), num_heads=4) self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,