diff --git a/models.py b/models.py index f0b6d18..dc314da 100644 --- a/models.py +++ b/models.py @@ -1094,7 +1094,7 @@ class PoolFormerBlock(nn.Module): self.attn_heads = 4 self.attn_mask = None # self.token_mixer = WindowAttention(dim=dim, window_size=to_2tuple(self.window_size), num_heads=4) - self.token_mixer = DSSA(dim, heads=self.attn_heads, window_size=self.window_size, dropout=0.5) + self.token_mixer = DSSA(dim, heads=self.attn_heads, window_size=self.window_size) self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,