diff --git a/models.py b/models.py index 2967903..b262ffd 100644 --- a/models.py +++ b/models.py @@ -818,8 +818,6 @@ class WindowAttention(nn.Module): self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww relative_position_bias = 16 * torch.sigmoid(relative_position_bias) - print(attn.shape) - print(relative_position_bias.shape) attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: