diff --git a/models.py b/models.py index 9e199fb..8441dea 100644 --- a/models.py +++ b/models.py @@ -808,6 +808,8 @@ class WindowAttention(nn.Module): self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww relative_position_bias = 16 * torch.sigmoid(relative_position_bias) + print(attn.shape) + print(relative_position_bias.shape) attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: