昨天学习了CBAM模块和ACNet。
今天就想试一下CBAM模块的效果,所以编写了代码在MNIST数据集上做分类任务,但是看不出什么差别,而且没加CBAM模块的效果反而好一些。
我觉得原因可能是因为数据集太小了没法做到这一点,改天在VOC数据集上试一试效果看看如何。
今天先把实验结果报道一下,学习率,epoch次数和batch_size的大小完全保持不变
先上Pytorch代码
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
import math
import torchvision
import torch.optim as optim # Pytoch常用的优化方法都封装在torch.optim里面
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
}
# 定义卷积核3*3,padding=1,stride=1的卷积,这个卷积的特点是不改变特征图的size,但可能改变channel
def conv3x3(in_channels,out_channels,stride=1):
return nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1,bias=False)
# 定义CBAM中的channel attention模块
class ChannelAttention(nn.Module):
# 需要传入输入通道数和压缩倍数reduction,默认是16
def __init__(self,in_channels,reduction = 16):
super(ChannelAttention,self).__init__()
#定义需要的全局平均池化和全局最大池化,传入输出通道数output_size = 1
self.avg = nn.AdaptiveAvgPool2d(output_size=1)
self.max = nn.AdaptiveMaxPool2d(output_size=1)
## 定义共享感知机MLP
self.fc1 = nn.Conv2d(in_channels=in_channels,out_channels=in_channels//reduction,kernel_size=1,bias=False)
self.relu1 = nn.ReLU()
self.fc2 = nn.Conv2d(in_channels=in_channels//reduction,out_channels=in_channels,kernel_size=1,bias=False)
## 定义线性激活函数
self.sigmod = nn.Sigmoid()
def forward(self,x):
avg_out = self.fc2(self.relu1(self.fc1(self.avg(x))))
max_out = self.fc2(self.relu1(self.fc1(self.max(x))))
out = self.sigmod(avg_out + max_out)
return out
## 定义spatial attention模块
class SpatialAttention(nn.Module):
# 空间注意力模块有一个卷积层,需要传入kernel_size
def __init__(self,kernel_size = 7):
super(SpatialAttention,self).__init__()
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(in_channels = 2,out_channels = 1,kernel_size=kernel_size,padding = padding,bias=False)
self.sigmod = nn.Sigmoid()
def forward(self,x):
avg_out = torch.mean(x, dim = 1, keepdim = True)
max_out, _ = torch.max(x, dim = 1, keepdim = True)
# 将avg_out 和 max_out在channel维度上进行拼接
x = torch.cat([avg_out,max_out],dim=1)
x = self.conv1(x)
return self.sigmod(x)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self,in_channels,out_channels,stride = 1,downsample=None):
super(BasicBlock,self).__init__()
self.conv1 = conv3x3(in_channels,out_channels,stride=stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels,out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
# self.ca = ChannelAttention(out_channels)
# self.sa = SpatialAttention()
self.downsample = downsample
self.stride = stride
def forward(self,x):
residual = x
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# out = self.ca(out) * out
# out = self.sa(out) * out
if self.downsample is not None:
residual = self.downsample(x)
out = out + residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,block,layers,num_classes=1000):
self.in_channels = 64
super(ResNet,self).__init__()
# MNIST是灰度图像,只有一个通道,所以in_channels = 1
self.conv1 = nn.Conv2d(in_channels=1,out_channels=64,kernel_size=7,stride=2,padding=3,bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3,stride = 2, padding = 1)
self.layer1 = self._make_layer(block,64,layers[0])
self.layer2 = self._make_layer(block,128,layers[1],stride=2)
self.layer3 = self._make_layer(block,256,layers[2],stride=2)
self.layer4 = self._make_layer(block,512,layers[3],stride=2)
# self.Avgpool = nn.AvgPool2d(2,stride=1)
self.fc = nn.Linear(512 * block.expansion,num_classes)
# 权重初始化
for m in self.modules():
if isinstance(m,nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0,math.sqrt(2./n))
elif isinstance(m,nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self,block,planes,blocks,stride = 1):
downsample = None
if stride != 1 or self.in_channels != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channels,planes * block.expansion,
kernel_size=1,stride=stride,bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.in_channels,planes,stride,downsample))
self.in_channels = planes * block.expansion
for i in range(1,blocks):
layers.append(block(self.in_channels,planes))
return nn.Sequential(*layers)
def forward(self,x):
x = self.relu(self.bn1(self.conv1(x)))
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# x = self.Avgpool(x) # MNIST数据集28*28,太小了,不适合做pooling
x = x.view(x.size(0),-1)
x = self.fc(x)
return F.log_softmax(x,dim = 1)
def resnet18_cbam(pretrained = False,**kwargs):
model = ResNet(BasicBlock,[2,2,2,2],**kwargs)
if pretrained:
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
now_state_dict = model.state_dict()
now_state_dict.update(pretrained_state_dict)
model.load_state_dict(now_state_dict)
return model
"""
## pretrained的预训练参数都是输入3个通道的卷积,训练MNIST根本没有办法使用预训练权重
my_model = resnet18_cbam(pretrained=False,num_classes = 10)
x = torch.Tensor(10,1,28,28)
print(my_model(x).size())
"""
# 网络训练
def train(epoch):
network.train() # 用model.train()会把所有的module设置为训练模式。
# 如果是测试或验证阶段,需要使模型处于验证阶段,即调用model.eval()
for batch_idx, (data, target) in enumerate(train_loader): # 将一个可遍历的数据对象组合成一个索引序列,同时列出数据和数据下标
#batch_idx 每个训练集批次的id; data 每个训练集批次的数据集合 Size = [64,1,28,28]; target 每个训练集批次代表的真实数字的集合Size = [1000]
optimizer.zero_grad() # 梯度清理,因为PyTorch在默认情况下会累积梯度
# 缺省情况下梯度是累加的,需要手工把梯度初始化或清零,调用optimizer.zero_grad()即可
output = network(data) # data是图像输入值,通过network得到out输出值
loss = F.nll_loss(output, target) # 通过网络输出out和目标target比较计算损失值loss
loss.backward() # 误差反向传播 损失函数(loss)调用backward()即可
optimizer.step() # 基于当前梯度(存储在参数的.grad属性中)更新参数
if batch_idx % log_interval == 0:
print("Train Epoch:{}[{}/{}({:.0f}]\tLoss:{:.6f})".format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item())) # print
train_losses.append(loss.item()) # 本次epoch的loss保存到train_losses列表
train_counter.append( # 本次epoch的counter保存到train_counter列表
(batch_idx * 64) + ((epoch - 1) * len(train_loader.dataset)))
torch.save(network.state_dict(), './model.pth') # 神经网络模块使用.state_dict()保存和加载它们的内部状态
torch.save(optimizer.state_dict(), './optimizer.pth') # 优化器使用.state_dict()保存和加载它们的内部状态
# 评估输出网络的性能 输出loss和accuracy
def test():
network.eval() # 验证阶段,需要使模型处于验证阶段,即调用model.eval()
test_loss = 0 # loss初始值
correct = 0 # correct初始值
with torch.no_grad(): # torch.no_grad:一个tensor(命名为x)的requires_grad = True,由x得到的新tensor(命名为w-标量)requires_grad也为False,且grad_fn也为None,即不会对w求导
for data, target in test_loader: # 测试集下载生成器
# data是测试集的图像数据,target是测试集图片所代表的真实数字的值
output = network(data) # 计算网络输出 out是一个什么样的格式类型呢?
test_loss += F.nll_loss(output, target, size_average=False).item() # 计算测试集上的loss
# 计算分类的准确率
pred = output.data.max(1, keepdim=True)[1] # pred才是针对每一个图片的最大可能性的预测值的输出
correct += pred.eq(target.data.view_as(pred)).sum() # 计算分类正确率
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset))) # print
batch_size_train = 64
batch_size_test = 1000
# 利用Pytorch内置函数mnist下载数据
# 下载MNIST训练集,以固定格式保存
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./data/', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,)) # Normalize((0.1307,),(0.3081,))对张量进行归一化 只有一个通道 所以只有一维
])),
batch_size=batch_size_train, shuffle=True)
# 下载MNIST测试集,以固定格式保存
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./data/', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,)) # Normalize((0.1307,),(0.3081,))对张量进行归一化 只有一个通道 所以只有一维
# 分别表示对张量进行归一化的均值和方差
])),
batch_size=batch_size_test, shuffle=True)
# train_loader test_loader为生成器
# torchvision.transforms.Compose可以把一些转换函数组合在一起
# 因MNIST图像是灰色的只有一个通道,如果有多个通道,需要有多个数字,如三个通道,应该是Normalize([m1,m2,m3], [n1,n2,n3])
# download参数控制是否需要下载,如果./data目录下已有MNIST,可选择False
# 用DataLoader得到生成器,这可节省内存
n_epochs = 3 # epoch个数
# one epoch = one forward pass and one backward pass of all the training examples
learning_rate = 0.01 # 学习率
momentum = 0.5 # 动量
random_seed = 1 # 随机种子编号
torch.manual_seed(random_seed) # 为CPU设置种子用于生成随机数,以使结果是确定的
# 实现一个网络
network = resnet18_cbam(pretrained=False,num_classes = 10)
# SGD是最基础的优化方法
# SGD会把数据拆分后再分批不断放入 NN 中计算.
# 每次使用批数据, 虽然不能反映整体数据的情况, 不过却很大程度上加速了 NN 的训练过程, 而且也不会丢失太多准确率.
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
# momentum是一个参数 当前权值的改变会收到上一次权值的改变的影响,
Step_LR = torch.optim.lr_scheduler.StepLR(optimizer,step_size = 1,gamma=0.4)
# 就像小球滚动时候一样,由于惯性,当前状态会受到上一个状态影响,这样可以加快速度。
log_interval = 10 # 10个训练小批次对应一个训练epoch
train_losses = []
train_counter = []
test_losses = []
test_counter = [i * len(train_loader.dataset) for i in range(n_epochs + 1)]
test() # 测试初始化网络参数的网络性能
# Test set: Avg. loss: 2.3004, Accuracy: 751/10000 (8%)
# 循环训练网络
for epoch in range(1, n_epochs + 1):
train(epoch) # 训练一个epoch
test() # 训练一个epoch之后,实时测试网络性能,输出loss和accuracy
Step_LR.step() # 启动学习率衰减
# 最后循环训练结束之后的输出
# Test set: Avg. loss: 0.0927, Accuracy: 9713/10000 (97%)
# 绘制训练曲线图
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue') # x轴是train_counter列表 y轴是train_losses列表 绘制曲线
plt.scatter(test_counter, test_losses, color='red') # 绘制散点图
plt.legend(['Train Loss', 'Test Loss'], loc='upper right') # 给图加上图例
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
plt.show() # 展示曲线图
没加CBAM模块的效果
精确度:9907/10000
加上CBAM模块的结果
精确度:9890/10000
总结
所以说完全是看不出加上了CBAM模块的效果。
明天等GPU空闲了在VOC数据集上试试。