diff --git a/models.py b/models.py index 09ded1e..fa6ad20 100644 --- a/models.py +++ b/models.py @@ -532,9 +532,9 @@ class FouriER(torch.nn.Module): self.patch_embed = PatchEmbed(in_chans=channels, patch_size=self.p.patch_size, embed_dim=self.p.embed_dim, stride=4, padding=2) network = [] - layers = [2, 2, 6, 2] - embed_dims = [self.p.embed_dim, 320, 256, 128] - mlp_ratios = [4, 4, 8, 12] + layers = [4, 4, 12, 4] + embed_dims = [self.p.embed_dim, 128, 320, 128] + mlp_ratios = [4, 4, 4, 4] num_heads = [2, 4, 8, 16] downsamples = [True, True, True, True] pool_size=3 @@ -949,7 +949,7 @@ class PoolFormerBlock(nn.Module): self.window_size = window_size self.shift_size = shift_size self.input_resolution = input_resolution - self.token_mixer = WindowAttention(dim=dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, attn_drop=0.1, proj_drop=0.2) + self.token_mixer = WindowAttention(dim=dim, window_size=to_2tuple(self.window_size), num_heads=num_heads) self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,