硬注意力与软注意力
软注意力机制就是虽然词语权重不同,但是在训练模型的时候雨露均沾,每个词语都用到,焦点词语的权重大。软性注意力(Soft Attention)机制是指在选择信息的时候,不是从N个信息中只选择1个,而是计算N个输入信息的加权平均,再输入到神经网络中计算。
硬注意力机制是从存储的多个信息中只挑出一条信息来,可能是概率最大的那个词向量
代码:
(一)软注意力
class SpatialAttn(nn.Module):
"""Spatial Attention (Sec. 3.1.I.1)"""
def __init__(self):
super(SpatialAttn, self).__init__()
self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
self.conv2 = ConvBlock(1, 1, 1)
def forward(self, x):
# global cross-channel averaging
x = x.mean(1, keepdim=True) # 由hwc 变为 hw1
# 3-by-3 conv
h = x.size(2)
x = self.conv1(x)
# bilinear resizing
x = F.upsample(x, (h,h), mode='bilinear', align_corners=True)
# scaling conv
x = self.conv2(x)
return x
## 返回的是h*w*1 的 soft map
class ChannelAttn(nn.Module):
"""通道注意力机制"""
def __init__(self, in_channels, reduction_rate=16):
super(ChannelAttn, self).__init__()
assert in_channels%reduction_rate == 0
self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
def forward(self, x):
# 压缩操作(全局平均池化)
x = F.avg_pool2d(x, x.size()[2:])
# 激励操作(2个卷积层)
x = self.conv1(x)
x = self.conv2(x)
return x
'''
空间和通道上的attention 融合
就是空间和通道上的attention做一个矩阵乘法
'''
class SoftAttn(nn.Module):
"""Soft Attention (Sec. 3.1.I)
Aim: Spatial Attention + Channel Attention
Output: attention maps with shape identical to input.
"""
def __init__(self, in_channels):
super(SoftAttn, self).__init__()
self.spatial_attn = SpatialAttn()
self.channel_attn = ChannelAttn(in_channels)
self.conv = ConvBlock(in_channels, in_channels, 1)
def forward(self, x):
y_spatial = self.spatial_attn(x) # 空间注意力输出
y_channel = self.channel_attn(x) # 通道注意力输出
y = y_spatial * y_channel # 空间注意力和通道注意力相乘
y = torch.sigmoid(self.conv(y)) # 卷积块输出
return y
class SpatialAttn(nn.Module):
"""Spatial Attention (Sec. 3.1.I.1)"""
def __init__(self):
super(SpatialAttn, self).__init__()
self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
self.conv2 = ConvBlock(1, 1, 1)
def forward(self, x):
# global cross-channel averaging
x = x.mean(1, keepdim=True) # 由hwc 变为 hw1
# 3-by-3 conv
h = x.size(2)
x = self.conv1(x)
# bilinear resizing
x = F.upsample(x, (h,h), mode='bilinear', align_corners=True)
# scaling conv
x = self.conv2(x)
return x
## 返回的是h*w*1 的 soft map
class ChannelAttn(nn.Module):
"""通道注意力机制"""
def __init__(self, in_channels, reduction_rate=16):
super(ChannelAttn, self).__init__()
assert in_channels%reduction_rate == 0
self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
def forward(self, x):
# 压缩操作(全局平均池化)
x = F.avg_pool2d(x, x.size()[2:])
# 激励操作(2个卷积层)
x = self.conv1(x)
x = self.conv2(x)
return x
'''
空间和通道上的attention 融合
就是空间和通道上的attention做一个矩阵乘法
'''
class SoftAttn(nn.Module):
"""Soft Attention (Sec. 3.1.I)
Aim: Spatial Attention + Channel Attention
Output: attention maps with shape identical to input.
"""
def __init__(self, in_channels):
super(SoftAttn, self).__init__()
self.spatial_attn = SpatialAttn()
self.channel_attn = ChannelAttn(in_channels)
self.conv = ConvBlock(in_channels, in_channels, 1)
def forward(self, x):
y_spatial = self.spatial_attn(x) # 空间注意力输出
y_channel = self.channel_attn(x) # 通道注意力输出
y = y_spatial * y_channel # 空间注意力和通道注意力相乘
y = torch.sigmoid(self.conv(y)) # 卷积块输出
return y
(二)硬注意力
class HardAttn(nn.Module):
"""Hard Attention (Sec. 3.1.II)"""
def __init__(self, in_channels):
super(HardAttn, self).__init__()
self.fc = nn.Linear(in_channels, 4*2)
self.init_params()
def init_params(self):
self.fc.weight.data.zero_()
# 初始化 参数
# if x_t = 0 the performance is very low
self.fc.bias.data.copy_(torch.tensor([0.3, -0.3, 0.3, 0.3, -0.3, 0.3, -0.3, -0.3], dtype=torch.float))
def forward(self, x):
# squeeze operation (global average pooling)
x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
# predict transformation parameters
theta = torch.tanh(self.fc(x))
theta = theta.view(-1, 4, 2)
return theta
# 返回的是 2T T为区域数量。 因为尺度会固定。 所以只要学位移的值
# 并不太理解,只是记录一下
原文链接:https://blog.csdn.net/qq_40905284/article/details/130726310
浙公网安备 33010602011771号