SENet (CVPR18)
class se_block(nn.Module):
# 初始化, in_channel代表输入特征图的通道数, ratio代表第一个全连接下降通道的倍数
def __init__(self, in_channel, ratio=4):
super(se_block, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
self.fc1 = nn.Linear(in_features=in_channel, out_features=in_channel//ratio, bias=False)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(in_features=in_channel//ratio, out_features=in_channel, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, inputs):
b, c, h, w = inputs.shape
# 全局平均池化 [b,c,h,w]==>[b,c,1,1]
x = self.avg_pool(inputs)
# 维度调整 [b,c,1,1]==>[b,c]
x = x.view([b,c])
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
x = x.view([b,c,1,1])
# outputs = x * inputs # 这里需要注意返回的是权重还是加权后的特征
outputs = x
return outputs
CBAM (ECCV18)
class ChannelAttention(nn.Module):
def __init__(self, in_planes, ratio=16):
super(ChannelAttention, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 16, 1, bias=False),
nn.ReLU(),
nn.Conv2d(in_planes // 16, in_planes, 1, bias=False))
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc(self.avg_pool(x))
max_out = self.fc(self.max_pool(x))
out = avg_out + max_out
return self.sigmoid(out)
DANet (CVPR19)
class DANet_CAM_Module(nn.Module):
""" Channel attention module"""
def __init__(self, in_dim):
super(DANet_CAM_Module, self).__init__()
self.chanel_in = in_dim
self.gamma = nn.Parameter(torch.zeros(1))
self.softmax = nn.Softmax(dim=-1)
def forward(self,x):
m_batchsize, C, height, width = x.size()
proj_query = x.view(m_batchsize, C, -1) # BCN
proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) # BNC
energy = torch.bmm(proj_query, proj_key) # BCC
energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy
attention = self.softmax(energy_new)
proj_value = x.view(m_batchsize, C, -1)
out = torch.bmm(attention, proj_value)
out = out.view(m_batchsize, C, height, width)
out = self.gamma*out + x
# out = self.gamma*out # 没有 +x 残差连接的话,AP70 效果会很差
return out
ECANet (CVPR20)
class eca_block(nn.Module):
# 初始化, in_channel代表特征图的输入通道数, b和gama代表公式中的两个系数
def __init__(self, in_channel, b=1, gama=2):
super(eca_block, self).__init__()
# 根据输入通道数自适应调整卷积核大小
kernel_size = int(abs((math.log(in_channel, 2)+b)/gama))
# 如果卷积核大小是奇数,就使用它
if kernel_size % 2:
kernel_size = kernel_size
# 如果卷积核大小是偶数,就把它变成奇数
else:
kernel_size = kernel_size + 1
# 卷积时,为例保证卷积前后的size不变,需要0填充的数量
padding = kernel_size // 2
# 全局平均池化,输出的特征图的宽高=1
self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
# 1D卷积,输入和输出通道数都=1,卷积核大小是自适应的
self.conv = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=kernel_size,
bias=False, padding=padding)
# sigmoid激活函数,权值归一化
self.sigmoid = nn.Sigmoid()
# 前向传播
def forward(self, inputs):
b, c, h, w = inputs.shape
# 全局平均池化 [b,c,h,w]==>[b,c,1,1]
x = self.avg_pool(inputs)
# 维度调整,变成序列形式 [b,c,1,1]==>[b,1,c]
x = x.view([b,1,c])
# 1D卷积 [b,1,c]==>[b,1,c]
x = self.conv(x)
# 权值归一化
x = self.sigmoid(x)
# 维度调整 [b,1,c]==>[b,c,1,1]
x = x.view([b,c,1,1])
# 将输入特征图和通道权重相乘[b,c,h,w]*[b,c,1,1]==>[b,c,h,w]
# outputs = x * inputs # 这里需要注意返回的是权重还是加权后的特征
outputs = x
return outputs
CoordAttention (CVPR21)
SENet、CBAM、CoordAttention 对比图:
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class CoordAtt(nn.Module):
def __init__(self, inp, oup, reduction=32):
super(CoordAtt, self).__init__()
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
self.pool_w = nn.AdaptiveAvgPool2d((1, None))
mip = max(8, inp // reduction)
self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
self.bn1 = nn.BatchNorm2d(mip)
self.act = h_swish()
self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
def forward(self, x):
identity = x
n,c,h,w = x.size()
x_h = self.pool_h(x)
x_w = self.pool_w(x).permute(0, 1, 3, 2)
y = torch.cat([x_h, x_w], dim=2)
y = self.conv1(y)
y = self.bn1(y)
y = self.act(y)
x_h, x_w = torch.split(y, [h, w], dim=2)
x_w = x_w.permute(0, 1, 3, 2)
a_h = self.conv_h(x_h).sigmoid()
a_w = self.conv_w(x_w).sigmoid()
out = identity * a_w * a_h
return out