PS:对前端和机器学习领域感兴趣的朋友们,可以关注下我的《技术博客》哦!如果对你有帮助,欢迎赠个⭐️,会常更新内容,敬请期待!❤️❤️
本文主要介绍深度学习种卷积神经网络的发展历史,以及典型的卷积神经网络。如下图所示:
LeNet-雏形网络
第一个真正的卷积神经网络在1998年提出,称为LeNet。模型共有8层(不计输入层),包括3个卷积层、2个下采样层、1个全连接层、1个输出层。模型结构如下:
import tensorflow as tf
import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.InteractiveSession()
# 训练数据
x = tf.placeholder("float", shape=[None, 784])
# 训练标签数据
y_ = tf.placeholder("float", shape=[None, 10])
# 把x更改为4维张量,第1维代表样本数量,第2维和第3维代表图像长宽, 第4维代表图像通道数, 1表示黑白
x_image = tf.reshape(x, [-1, 28, 28, 1])
# 第一层:卷积层
# 过滤器大小为5*5, 当前层深度为1, 过滤器的深度为32
conv1_weights = tf.get_variable("conv1_weights", [5, 5, 1, 32], initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("conv1_biases", [32], initializer=tf.constant_initializer(0.0))
# 移动步长为1, 使用全0填充
conv1 = tf.nn.conv2d(x_image, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
# 激活函数Relu去线性化
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
#第二层:最大池化层
#池化层过滤器的大小为2*2, 移动步长为2,使用全0填充
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#第三层:卷积层
conv2_weights = tf.get_variable("conv2_weights", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1)) #过滤器大小为5*5, 当前层深度为32, 过滤器的深度为64
conv2_biases = tf.get_variable("conv2_biases", [64], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') #移动步长为1, 使用全0填充
relu2 = tf.nn.relu( tf.nn.bias_add(conv2, conv2_biases) )
#第四层:最大池化层
#池化层过滤器的大小为2*2, 移动步长为2,使用全0填充
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#第五层:全连接层
fc1_weights = tf.get_variable("fc1_weights", [7 * 7 * 64, 1024], initializer=tf.truncated_normal_initializer(stddev=0.1)) #7*7*64=3136把前一层的输出变成特征向量
fc1_baises = tf.get_variable("fc1_baises", [1024], initializer=tf.constant_initializer(0.1))
pool2_vector = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc1 = tf.nn.relu(tf.matmul(pool2_vector, fc1_weights) + fc1_baises)
#为了减少过拟合,加入Dropout层
keep_prob = tf.placeholder(tf.float32)
fc1_dropout = tf.nn.dropout(fc1, keep_prob)
#第六层:全连接层
fc2_weights = tf.get_variable("fc2_weights", [1024, 10], initializer=tf.truncated_normal_initializer(stddev=0.1)) #神经元节点数1024, 分类节点10
fc2_biases = tf.get_variable("fc2_biases", [10], initializer=tf.constant_initializer(0.1))
fc2 = tf.matmul(fc1_dropout, fc2_weights) + fc2_biases
#第七层:输出层
# softmax
y_conv = tf.nn.softmax(fc2)
#定义交叉熵损失函数
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
#选择优化器,并让优化器最小化损失函数/收敛, 反向传播
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# tf.argmax()返回的是某一维度上其数据最大所在的索引值,在这里即代表预测值和真实值
# 判断预测值y和真实值y_中最大数的索引是否一致,y的值为1-10概率
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
# 用平均值来统计测试准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#开始训练
sess.run(tf.global_variables_initializer())
for i in range(10000):
batch = mnist.train.next_batch(100)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0}) #评估阶段不使用Dropout
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) #训练阶段使用50%的Dropout
#在测试数据上测试准确率
print("test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
AlexNet
AlexNet包含5个卷积层(进行了3次最大池化)、3个全连接层。模型结构如下:
AlexNet与LeNet相比,有了很多的改进:
- 使用了ReLU激活函数,提高训练速度(ReLU是一种非饱和函数,在训练时间上比饱和函数sigmoid、tanh快,而且ReLU利用了分片线性结构实现了非线性的表达能力,梯度消失现象较弱,有助于训练更深的网络)
- 使用GPU训练(可以提供数十倍乃至于上百倍于CPU的性能)
- 使用重叠池化(传统池化窗口没有重叠,不同窗口池化过程分别独立计算,有助于缓解过拟合)
- 局部响应归一化(不过后来验证没效果,采用的是BN?)
- 数据扩充-图像平移和反转、丢失输出-随即丢弃节点(减少了过拟合)
第一层:卷积层
该层的输入是原始图像的像素值,以MNIST数据集为例,则是28x28x1,第一层过滤器尺寸为5x5,深度设置为6,不适用0去填充,因此该层的输出尺寸是28-5+1=24,深度也为6.
第二层:池化层
接受第一层的输出作为输入,过滤器大小选为2x2,步长2.
第三层:卷积层
卷积和大小5x5,深度为16,同样不使用0填充,步长为1.
第四层:池化层
卷积核采用2x2,步长2
第五层:全连接
卷积核为5x5,输出节点为120
第六层:全连接层
输入节点数120,输出节点数84
第七层:全连接层
输入84,输出10
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
import argparse
import sys
import input_data
import tensorflow as tf
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 定义网络超参数
learning_rate = 1e-4
training_iters = 300000
batch_size = 64
display_step = 20
# 定义网络参数
n_input = 784 # 输入的维度
n_classes = 10 # 标签的维度
dropout = 0.5 # Dropout 的概率
# 占位符输入
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
# 卷积操作
def conv2d(name, l_input, w, b, k):
return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input,
w, strides=[1, k, k, 1],
padding='SAME'), b), name=name)
# 最大下采样操作
def max_pool(name, l_input, k1, k2):
return tf.nn.max_pool(l_input, ksize=[1, k1, k1, 1], strides=[1, k2, k2, 1], padding='SAME', name=name)
# 归一化操作
def norm(name, l_input, lsize=4):
return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)
# 定义整个网络
def alex_net(_X, _weights, _biases, _dropout):
# 向量转为矩阵
_X = tf.reshape(_X, shape=[-1, 28, 28, 1])
# 卷积层
conv1 = conv2d('conv1', _X, _weights['wc1'], _biases['bc1'], 2)
# 归一化层
norm1 = norm('norm1', conv1, lsize=4)
# 下采样层
pool1 = max_pool('pool1', norm1, k1=3, k2=2)
# Dropout
norm1 = tf.nn.dropout(pool1, _dropout)
# 卷积
conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2'], 1)
# 归一化
norm2 = norm('norm2', conv2, lsize=4)
# 下采样
pool2 = max_pool('pool2', norm2, k1=3, k2=2)
# Dropout
norm2 = tf.nn.dropout(pool2, _dropout)
# 卷积
conv3 = conv2d('conv3', norm2, _weights['wc3'], _biases['bc3'], 1)
# 归一化384
norm3 = norm('norm3', conv3, lsize=4)
# 下采样
# pool3 = max_pool('pool3', norm3, k=2)
# Dropoutize of tensor shape you provided is 150528 : 224x224x
norm3 = tf.nn.dropout(norm3, _dropout)
'''
# 卷积
conv4 = conv2d('conv4', norm3, _weights['wc4'], _biases['bc4'], 1)
# 归一化
norm4 = norm('norm4', conv4, lsize=4)
# 下采样
# pool3 = max_pool('pool3', norm3, k=2)
# Dropout
norm4 = tf.nn.dropout(norm4, _dropout)
# 卷积
conv5 = conv2d('conv5', norm4, _weights['wc5'], _biases['bc5'], 1)
# 归一化256
norm5 = norm('norm5', conv5, lsize=4)
# 下采样
pool5 = max_pool('pool5', norm5, k1=3, k2=2)
# Dropout
norm5 = tf.nn.dropout(pool5, _dropout)
'''
# 全连接层,先把特征图转为向量
dense1 = tf.reshape(norm3, [-1, _weights['wd1'].get_shape().as_list()[0]])
dense1 = tf.nn.dropout(tf.nn.relu(tf.matmul(dense1, _weights['wd1']) + _biases['bd1'], name='fc1'), _dropout)
# 全连接层4096
dense2 = tf.nn.relu(tf.matmul(dense1, _weights['wd2']) + _biases['bd2'], name='fc2') # Relu activation
# 网络输出层384
out = tf.matmul(dense2, _weights['out']) + _biases['out']
return out
# 存储所有的网络参数48
'''
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
'wd2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 10]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([64])),
'bc2': tf.Variable(tf.random_normal([128])),
'bc3': tf.Variable(tf.random_normal([256])),
'bd1': tf.Variable(tf.random_normal([1024])),
'bd2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
'''
# 以字典的形式设置权重和偏置
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
'wd2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 10]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([64])),
'bc2': tf.Variable(tf.random_normal([128])),
'bc3': tf.Variable(tf.random_normal([256])),
'bd1': tf.Variable(tf.random_normal([1024])),
'bd2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# 构建模型
pred = alex_net(x, weights, biases, keep_prob)
# 定义损失函数和学习步骤
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost)
# 测试网络
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 初始化所有的共享变量
init = tf.initialize_all_variables()
# 开启一个训练
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# 获取批数据
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
if step % display_step == 0:
# 计算精度
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
# 计算损失值
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) +
", Training Accuracy = " + "{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# 计算测试精度
print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 0.5}))
print('**********************')
print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 1.0}))
SPPNet-空间金字塔
空间金字塔池化网络,在最后一个卷积层和第一个全连接层之间插入了一个空间金字塔池化层,用来池化特征并产生固定长度的输出。无需对输入图像进行裁剪和变形,就可以处理输入图像大小不同的情况。
SPP有几个引人注目的特征:
- SPP对于任意输入大小都能产生一个固定长度的输出,而滑动窗口池化不能
- SPP使用多级大小空间窗口,而滑动窗口池化只使用一个窗口大小
- SPP可以在不同尺度上提取特征并进行池化
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import pandas as pd
def spp_layer(input_, levels=4, name = 'SPP_layer',pool_type = 'max_pool'):
shape = input_.get_shape().as_list()
with tf.variable_scope(name):
for l in range(levels):
#设置池化参数
l = l + 1
ksize = [1, np.ceil(shape[1]/ l + 1).astype(np.int32), np.ceil(shape[2] / l + 1).astype(np.int32), 1]
strides = [1, np.floor(shape[1] / l + 1).astype(np.int32), np.floor(shape[2] / l + 1).astype(np.int32), 1]
if pool_type == 'max_pool':
pool = tf.nn.max_pool(input_, ksize=ksize, strides=strides, padding='SAME')
pool = tf.reshape(pool,(shape[0],-1),)
else :
pool = tf.nn.avg_pool(input_, ksize=ksize, strides=strides, padding='SAME')
pool = tf.reshape(pool,(shape[0],-1))
print("Pool Level {:}: shape {:}".format(l, pool.get_shape().as_list()))
if l == 1:
x_flatten = tf.reshape(pool,(shape[0],-1))
else:
x_flatten = tf.concat((x_flatten,pool),axis=1) #四种尺度进行拼接
print("Pool Level {:}: shape {:}".format(l, x_flatten.get_shape().as_list()))
return x_flatten
VGGNet
有两种基本类型:VGGNet-16、VGGNet-19。VGGNet全部使用3X3的卷积核和2X2的池化核。VGG 块的组成规律是:连续使用若干个相同的填充为 1 、窗口形状为3X3的卷积层后接上一个步幅为 2 、窗口形状为2X2的最大池化层。
对于给定的感受野(与输出有关的输入图片的局部大小),采用堆积的小卷积核优于采用大的卷积核,因为可以增加网络深度来保证学习更复杂的模式,而且代价还比较小(参数更少)。
常见的VGG网络有:VGG-11、VGG-13、VGG-16、VGG-19
from datetime import datetime
import tensorflow as tf
import math
import time
batch_size = 32
num_batches = 100
# 用来创建卷积层并把本层的参数存入参数列表
# input_op:输入的tensor name:该层的名称 kh:卷积层的高 kw:卷积层的宽 n_out:输出通道数,dh:步长的高 dw:步长的宽,p是参数列表
def conv_op(input_op,name,kh,kw,n_out,dh,dw,p):
# 输入的通道数
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name) as scope:
kernel = tf.get_variable(scope + "w",shape=[kh,kw,n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
conv = tf.nn.conv2d(input_op, kernel, (1,dh,dw,1),padding='SAME')
bias_init_val = tf.constant(0.0, shape=[n_out],dtype=tf.float32)
biases = tf.Variable(bias_init_val , trainable=True , name='b')
z = tf.nn.bias_add(conv,biases)
activation = tf.nn.relu(z,name=scope)
p += [kernel,biases]
return activation
# 定义全连接层
def fc_op(input_op,name,n_out,p):
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name) as scope:
kernel = tf.get_variable(scope+'w',shape=[n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name='b')
# tf.nn.relu_layer()用来对输入变量input_op与kernel做乘法并且加上偏置b
activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope)
p += [kernel,biases]
return activation
# 定义最大池化层
def mpool_op(input_op,name,kh,kw,dh,dw):
return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding='SAME',name=name)
#定义网络结构
def inference_op(input_op,keep_prob):
p = []
conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
pool1 = mpool_op(conv1_2,name='pool1',kh=2,kw=2,dw=2,dh=2)
conv2_1 = conv_op(pool1,name='conv2_1',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
conv2_2 = conv_op(conv2_1,name='conv2_2',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2, dw=2, dh=2)
conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2, dw=2, dh=2)
conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dw=2, dh=2)
conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dw=2, dh=2)
shp = pool5.get_shape()
flattened_shape = shp[1].value * shp[2].value * shp[3].value
resh1 = tf.reshape(pool5,[-1,flattened_shape],name="resh1")
fc6 = fc_op(resh1,name="fc6",n_out=4096,p=p)
fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop')
fc7 = fc_op(fc6_drop,name="fc7",n_out=4096,p=p)
fc7_drop = tf.nn.dropout(fc7,keep_prob,name="fc7_drop")
fc8 = fc_op(fc7_drop,name="fc8",n_out=1000,p=p)
softmax = tf.nn.softmax(fc8)
predictions = tf.argmax(softmax,1)
return predictions,softmax,fc8,p
def time_tensorflow_run(session,target,feed,info_string):
num_steps_burn_in = 10 # 预热轮数
total_duration = 0.0 # 总时间
total_duration_squared = 0.0 # 总时间的平方和用以计算方差
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target,feed_dict=feed)
duration = time.time() - start_time
if i >= num_steps_burn_in: # 只考虑预热轮数之后的时间
if not i % 10:
print('%s:step %d,duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / num_batches # 平均每个batch的时间
vr = total_duration_squared / num_batches - mn * mn # 方差
sd = math.sqrt(vr) # 标准差
print('%s: %s across %d steps, %.3f +/- %.3f sec/batch' % (datetime.now(), info_string, num_batches, mn, sd))
def run_benchmark():
with tf.Graph().as_default():
image_size = 224 # 输入图像尺寸
images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))
keep_prob = tf.placeholder(tf.float32)
prediction,softmax,fc8,p = inference_op(images,keep_prob)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
time_tensorflow_run(sess, prediction,{keep_prob:1.0}, "Forward")
# 用以模拟训练的过程
objective = tf.nn.l2_loss(fc8) # 给一个loss
grad = tf.gradients(objective, p) # 相对于loss的 所有模型参数的梯度
time_tensorflow_run(sess, grad, {keep_prob:0.5},"Forward-backward")
GoogLeNet
GoogLeNet专注于如何建立更深的网络结构,通视引入新型的基本结构-Inception模块,以加深网络宽度。GoogLeNet包括V1、V2、V3、V4版本。
Inception的作用: 代替人工确定卷积层中的过滤器类型或者确定是否需要创建卷积层和池化层,即:不需要人为的决定使用哪个过滤器,是否需要池化层等,由网络自行决定这些参数,可以给网络添加所有可能值,将输出连接起来,网络自己学习它需要什么样的参数。
1. Inception V1结构
由上图可以看出,Inception 块里有4条并行的线路,它通过不同窗口形状的卷积层和最大池化层来并行抽取信息,并使用1X1卷积层减少通道数从而降低模型复杂度。
2. Inception V2结构
用2个连续的3x3卷积层(stride=1)组成的小网络来代替单个的5x5卷积层,这便是Inception V2结构,保持感受野范围的同时又减少了参数量
3. Inception V3结构
考虑了 nx1 卷积核,如下图所示的取代3x3卷积:于是,任意nxn的卷积都可以通过1xn卷积后接nx1卷积来替代。
这里实现的是Inception V3卷积网络
# -*- coding:utf-8 -*-
import tensorflow as tf
from datetime import datetime
import time
import math
slim=tf.contrib.slim
#产生截断的正态分布
trunc_normal =lambda stddev:tf.truncated_normal_initializer(0.0,stddev)
parameters =[] #储存参数
#why?为什么要定义这个函数?
#因为若事先定义好slim.conv2d各种默认参数,包括激活函数、标准化器,后面定义卷积层将会非常容易:
# 1.代码整体美观
# 2.网络设计的工作量会大大减轻
def inception_v3_arg_scope(weight_decay=0.00004,
stddev=0.1,
batch_norm_var_collection='moving_vars'):
"""
#定义inception_v3_arg_scope(),
#用来生成网络中经常用到的函数的默认参数(卷积的激活函数、权重初始化方式、标准化器等)
:param weight_decay: 权值衰减系数
:param stddev: 标准差
:param batch_norm_var_collection:
:return:
"""
batch_norm_params={
'decay':0.9997, #衰减系数decay
'epsilon':0.001, #极小值
'updates_collections':tf.GraphKeys.UPDATE_OPS,
'variables_collections':{
'beta':None,
'gamma':None,
'moving_mean':[batch_norm_var_collection],
'moving_variance':[batch_norm_var_collection],
}
}
with slim.arg_scope([slim.conv2d,slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
"""
slim.arg_scope()是一个非常有用的工具,可以给函数的参数自动赋予某些默认值
例如:
slim.arg_scope([slim.conv2d,slim.fully_connected],weights_regularizer=slim.l2_regularizer(weight_decay)):
会对[slim.conv2d,slim.fully_connected]这两个函数的参数自动赋值,
将参数weights_regularizer的默认值设为slim.l2_regularizer(weight_decay)
备注:使用了slim.arg_scope后就不需要每次重复设置参数,只需在修改时设置即可。
"""
# 设置默认值:对slim.conv2d函数的几个参数赋予默认值
with slim.arg_scope(
[slim.conv2d],
weights_initializer=tf.truncated_normal_initializer(stddev=stddev), #权重初始化
activation_fn=tf.nn.relu, #激励函数
normalizer_fn=slim.batch_norm, #标准化器
normalizer_params=batch_norm_params ) as sc: #normalizer_params标准化器的参数
return sc #返回定义好的scope
def inception_V3_base(input,scope=None):
end_points= {}
# 第一部分--基础部分:卷积和池化交错
with tf.variable_scope(scope,'inception_V3',[input]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,padding='VALID'):
net1=slim.conv2d(input,32,[3,3],stride=2,scope='conv2d_1a_3x3')
net2 = slim.conv2d(net1, 32, [3, 3],scope='conv2d_2a_3x3')
net3 = slim.conv2d(net2, 64, [3, 3], padding='SAME',
scope='conv2d_2b_3x3')
net4=slim.max_pool2d(net3,[3,3],stride=2,scope='maxPool_3a_3x3')
net5 = slim.conv2d(net4, 80, [1, 1], scope='conv2d_4a_3x3')
net6 = slim.conv2d(net5, 192, [3, 3], padding='SAME',
scope='conv2d_4b_3x3')
net = slim.max_pool2d(net6, [3, 3], stride=2, scope='maxPool_5a_3x3')
#第二部分--Inception模块组:inception_1\inception_2\inception_2
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,padding='SAME'):
#inception_1:第一个模块组(共含3个inception_module)
#inception_1_m1: 第一组的1号module
with tf.variable_scope('inception_1_m1'):
with tf.variable_scope('Branch_0'):
branch_0=slim.conv2d(net,64,[1,1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 32, [1, 1],
scope='conv2d_3b_1x1')
#使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net=tf.concat([branch_0,branch1_2,branch2_3,branch3_2],3)
# inception_1_m2: 第一组的 2号module
with tf.variable_scope('inception_1_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 64, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3, branch3_2], 3)
# inception_1_m2: 第一组的 3号module
with tf.variable_scope('inception_1_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 64, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3, branch3_2], 3)
#inception_2:第2个模块组(共含5个inception_module)
# inception_2_m1: 第2组的 1号module
with tf.variable_scope('inception_2_m1'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 384, [3, 3],stride=2,
padding='VALID',scope='conv2d_0a_3x3')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 96, [3, 3],
scope='conv2d_1b_3x3')
branch1_3 = slim.conv2d(branch1_2, 96, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_1c_3x3')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.max_pool2d(net, [3, 3],
stride=2,
padding='VALID',
scope='maxPool_2a_3x3')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_1], 3)
# inception_2_m2: 第2组的 2号module
with tf.variable_scope('inception_2_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 128, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 128, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 128, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 128, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 128, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 128, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 128, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 128, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)
# inception_2_m3: 第2组的 3号module
with tf.variable_scope('inception_2_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)
# inception_2_m4: 第2组的 4号module
with tf.variable_scope('inception_2_m4'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)
# inception_2_m5: 第2组的 5号module
with tf.variable_scope('inception_2_m5'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)
#将inception_2_m5存储到end_points中,作为Auxiliary Classifier辅助模型的分类
end_points['inception_2_m5']=net
# 第3组
# inception_3_m1: 第3组的 1号module
with tf.variable_scope('inception_3_m1'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0,320, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_0b_3x3')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 192, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 192, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
branch1_4 = slim.conv2d(branch1_3, 192, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_1c_3x3')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.max_pool2d(net, [3, 3],
stride=2,
padding='VALID',
scope='maxPool_3a_3x3')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_4, branch2_1], 3)
# inception_3_m2: 第3组的 2号module
with tf.variable_scope('inception_3_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_1a_1x1')
#特殊
branch1_2 = tf.concat([
slim.conv2d(branch1_1, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch1_1, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 488, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 384, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = tf.concat([
slim.conv2d(branch2_2, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch2_2, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3,branch3_2], 3)
# inception_3_m3: 第3组的 3号module
with tf.variable_scope('inception_3_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_1a_1x1')
#特殊
branch1_2 = tf.concat([
slim.conv2d(branch1_1, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch1_1, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 488, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 384, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = tf.concat([
slim.conv2d(branch2_2, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch2_2, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3,branch3_2], 3)
return net,end_points
############################# 卷积部分完成 ########################################
#第三部分:全局平均池化、softmax、Auxiliary Logits
def inception_v3(input,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.8,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
scope='inceptionV3'):
with tf.variable_scope(scope,'inceptionV3',[input,num_classes],
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm,slim.dropout],
is_training=is_training):
net,end_points=inception_V3_base(input,scope=scope)
#Auxiliary Logits
with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],
stride=1,padding='SAME'):
aux_logits=end_points['inception_2_m5']
with tf.variable_scope('Auxiliary_Logits'):
aux_logits=slim.avg_pool2d(
aux_logits,[5,5],stride=3,padding='VALID',
scope='AvgPool_1a_5x5' )
aux_logits=slim.conv2d(aux_logits,128,[1,1],
scope='conv2d_1b_1x1')
aux_logits=slim.conv2d(aux_logits,768,[5,5],
weights_initializer=trunc_normal(0.01),
padding='VALID',
scope='conv2d_2a_5x5')
aux_logits = slim.conv2d(aux_logits, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
weights_initializer=trunc_normal(0.001),
scope='conv2d_2b_1x1')
if spatial_squeeze:
aux_logits =tf.squeeze(aux_logits,[1,2],name='SpatialSqueeze')
end_points['Auxiliary_Logits']=aux_logits
with tf.variable_scope('Logits'):
net=slim.avg_pool2d(net,[8,8],padding='VALID',
scope='avgPool_1a_8x8')
net=slim.dropout(net,keep_prob=dropout_keep_prob,
scope='dropout_1b')
end_points['PreLogits']=net
logits=slim.conv2d(net,num_classes,[1,1],activation_fn=None,
normalizer_fn=None,
scope='conv2d_1c_1x1')
if spatial_squeeze:
logits=tf.squeeze(logits,[1,2],name='SpatialSqueeze')
end_points['Logits']=logits
end_points['Predictions']=prediction_fn(logits,scope='Predictions')
return logits,end_points
def time_compute(session, target, info_string):
num_batch = 100 #100
num_step_burn_in = 10 # 预热轮数,头几轮迭代有显存加载、cache命中等问题可以因此跳过
total_duration = 0.0 # 总时间
total_duration_squared = 0.0
for i in range(num_batch + num_step_burn_in):
start_time = time.time()
_ = session.run(target )
duration = time.time() - start_time
if i >= num_step_burn_in:
if i % 10 == 0: # 每迭代10次显示一次duration
print("%s: step %d,duration=%.5f " % (datetime.now(), i - num_step_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
time_mean = total_duration / num_batch
time_variance = total_duration_squared / num_batch - time_mean * time_mean
time_stddev = math.sqrt(time_variance)
# 迭代完成,输出
print("%s: %s across %d steps,%.3f +/- %.3f sec per batch " %
(datetime.now(), info_string, num_batch, time_mean, time_stddev))
def main():
with tf.Graph().as_default():
batch_size=32
height,weight=299,299
input=tf.random_uniform( (batch_size,height,weight,3) )
with slim.arg_scope(inception_v3_arg_scope()):
logits,end_points=inception_v3(input,is_training=False)
init=tf.global_variables_initializer()
sess=tf.Session()
# 将网络结构图写到文件中
writer = tf.summary.FileWriter('logs/', sess.graph)
sess.run(init)
num_batches=100
time_compute(sess,logits,'Forward')
if __name__=='__main__':
main()
4. Inception V4结构
它结合了残差神经网络ResNet。
ResNet-残差网络
随着网络结构的加深, 梯度消失或梯度爆炸问题会越来越严重,可能导致神经网络学习和训练变得越来越困难。通过初始化、随机丢弃、归一化等技巧可以得到一定程度的缓和,而ResNet使用了在网络中增加信息传递快速通道的方法,信息可以无障碍地跨越多层直接传递到后面的层。
残差网络引入了跨层连接,构造了残差模块。基于残差模块,深层残差网络可以具有非常深的结构,深度甚至可以达到1000层以上。
import os
import config
import random
import numpy as np
import tensorflow as tf
from config import resnet_config
from data_loader import DataLoader
from eval.evaluate import accuracy
class ResNet(object):
def __init__(self,
depth=resnet_config.depth,
height=config.height,
width=config.width,
channel=config.channel,
num_classes=config.num_classes,
learning_rate=resnet_config.learning_rate,
learning_decay_rate=resnet_config.learning_decay_rate,
learning_decay_steps=resnet_config.learning_decay_steps,
epoch=resnet_config.epoch,
batch_size=resnet_config.batch_size,
model_path=resnet_config.model_path,
summary_path=resnet_config.summary_path):
self.depth = depth
self.height = height
self.width = width
self.channel = channel
self.learning_rate = learning_rate
self.learning_decay_rate = learning_decay_rate
self.learning_decay_steps = learning_decay_steps
self.epoch = epoch
self.batch_size = batch_size
self.num_classes = num_classes
self.model_path = model_path
self.summary_path = summary_path
self.num_block_dict = {18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3]}
self.bottleneck_dict = {18: False,
34: False,
50: True,
101: True}
self.filter_out = [64, 128, 256, 512]
self.filter_out_last_layer = [256, 512, 1024, 2048]
self.conv_out_depth = self.filter_out[-1] if self.depth < 50 else self.filter_out_last_layer[-1]
assert self.depth in self.num_block_dict, 'depth should be in [18,34,50,101]'
self.num_block = self.num_block_dict[self.depth]
self.bottleneck = self.bottleneck_dict[self.depth]
self.input_x = tf.placeholder(tf.float32, shape=[None, self.height, self.width, self.channel], name='input_x')
self.input_y = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='input_y')
self.prediction = None
self.loss = None
self.acc = None
self.global_step = None
self.data_loader = DataLoader()
self.model()
def model(self):
x = self.conv(x=self.input_x, k_size=7, filters_out=64, strides=2, activation=True, name='First_Conv')
x = tf.layers.max_pooling2d(x, pool_size=[3, 3], strides=2, padding='same', name='max_pool')
x = self.stack_block(x)
x = tf.layers.average_pooling2d(x, pool_size=x.get_shape()[1:3], strides=1, name='average_pool')
x = tf.reshape(x, [-1, 1 * 1 * self.conv_out_depth])
fc_W = tf.truncated_normal_initializer(stddev=0.1)
logits = tf.layers.dense(inputs=x, units=self.num_classes,kernel_initializer=fc_W)
# 预测值
self.prediction = tf.argmax(logits,axis=-1)
# 计算准确率
self.acc = accuracy(logits, self.input_y)
# 损失值
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.input_y))
# 全局步数
self.global_step = tf.train.get_or_create_global_step()
# 递减学习率
learning_rate = tf.train.exponential_decay(learning_rate=self.learning_rate,
global_step=self.global_step,
decay_rate=self.learning_decay_rate,
decay_steps=self.learning_decay_steps,
staircase=True)
self.optimize = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
def stack_block(self, input_x):
for stack in range(4):
stack_strides = 1 if stack == 0 else 2
stack_name = 'stack_%s' % stack
with tf.name_scope(stack_name):
for block in range(self.num_block[stack]):
shortcut = input_x
block_strides = stack_strides if block == 0 else 1
block_name = stack_name + '_block_%s' % block
with tf.name_scope(block_name):
if self.bottleneck:
for layer in range(3):
with tf.name_scope(block_name + '_layer_%s' % layer):
filters = self.filter_out[stack] if layer < 2 else self.filter_out_last_layer[stack]
k_size = 3 if layer == 1 else 1
layer_strides = block_strides if layer < 1 else 1
activation = True if layer < 2 else False
layer_name = block_name + '_conv_%s' % layer
input_x = self.conv(x=input_x, filters_out=filters, k_size=k_size,
strides=layer_strides, activation=activation, name=layer_name)
else:
for layer in range(2):
with tf.name_scope(block_name + '_layer_%s' % layer):
filters = self.filter_out[stack]
k_size = 3
layer_strides = block_strides if layer < 1 else 1
activation = True if layer < 1 else False
layer_name = block_name + '_conv_%s' % layer
input_x = self.conv(x=input_x, filters_out=filters, k_size=k_size,
strides=layer_strides, activation=activation, name=layer_name)
shortcut_depth = shortcut.get_shape()[-1]
input_x_depth = input_x.get_shape()[-1]
with tf.name_scope('shortcut_connect'):
if shortcut_depth != input_x_depth:
connect_k_size = 1
connect_strides = block_strides
connect_filter = filters
shortcut_name = block_name + '_shortcut'
shortcut = self.conv(x=shortcut, filters_out=connect_filter, k_size=connect_k_size,
strides=connect_strides, activation=False, name=shortcut_name)
input_x = tf.nn.relu(shortcut + input_x)
return input_x
def conv(self, x, k_size, filters_out, strides, activation, name):
x = tf.layers.conv2d(x, filters=filters_out, kernel_size=k_size, strides=strides, padding='same', name=name)
x = tf.layers.batch_normalization(x, name=name + '_BN')
if activation:
x = tf.nn.relu(x)
return x
def fit(self, train_id_list, valid_img, valid_label):
"""
training model
:return:
"""
# 模型存储路径初始化
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
if not os.path.exists(self.summary_path):
os.makedirs(self.summary_path)
# train_steps初始化
train_steps = 0
best_valid_acc = 0.0
# summary初始化
tf.summary.scalar('loss', self.loss)
merged = tf.summary.merge_all()
# session初始化
sess = tf.Session()
writer = tf.summary.FileWriter(self.summary_path, sess.graph)
saver = tf.train.Saver(max_to_keep=10)
sess.run(tf.global_variables_initializer())
for epoch in range(self.epoch):
shuffle_id_list = random.sample(train_id_list.tolist(), len(train_id_list))
batch_num = int(np.ceil(len(shuffle_id_list) / self.batch_size))
train_id_batch = np.array_split(shuffle_id_list, batch_num)
for i in range(batch_num):
this_batch = train_id_batch[i]
batch_img, batch_label = self.data_loader.get_batch_data(this_batch)
train_steps += 1
feed_dict = {self.input_x: batch_img, self.input_y: batch_label}
_, train_loss, train_acc = sess.run([self.optimize, self.loss, self.acc], feed_dict=feed_dict)
if train_steps % 1 == 0:
val_loss, val_acc = sess.run([self.loss, self.acc],
feed_dict={self.input_x: valid_img, self.input_y: valid_label})
msg = 'epoch:%s | steps:%s | train_loss:%.4f | val_loss:%.4f | train_acc:%.4f | val_acc:%.4f' % (
epoch, train_steps, train_loss, val_loss, train_acc, val_acc)
print(msg)
summary = sess.run(merged, feed_dict={self.input_x: valid_img, self.input_y: valid_label})
writer.add_summary(summary, global_step=train_steps)
if val_acc >= best_valid_acc:
best_valid_acc = val_acc
saver.save(sess, save_path=self.model_path, global_step=train_steps)
sess.close()
def predict(self, x):
"""
predicting
:param x:
:return:
"""
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
ckpt = tf.train.get_checkpoint_state(self.model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
prediction = sess.run(self.prediction, feed_dict={self.input_x: x})
return prediction
DenseNet-密连网络
残差网络在层间加入跨层连接,使得即使成百上千层的网络,也可以得到精准地训练。不过,残差网络一般只采用2~3层的跨层连接形成残差模块,密连卷积网络(DenseNet)通过引入密连模块代替残差模块进一步扩展了残差网络的结构。与残差模块的区别在于,密连模块内部允许任意两个非相邻层之间进行跨层连接。
import tensorflow as tf
import tensorflow.contrib.slim as slim
def conv_layer(input, filters,kernel_size,stride=1, layer_name="conv"):
with tf.name_scope(layer_name):
net = slim.conv2d(input, filters, kernel_size, scope=layer_name)
return net
class DenseNet():
def __init__(self,x,nb_blocks, filters, sess):
self.nb_blocks = nb_blocks
self.filters = filters
self.model = self.build_model(x)
self.sess = sess
def bottleneck_layer(self,x, scope):
# [BN --> ReLU --> conv11 --> BN --> ReLU -->conv33]
with tf.name_scope(scope):
x = slim.batch_norm(x)
x = tf.nn.relu(x)
x = conv_layer(x,self.filters,kernel_size=(1,1), layer_name=scope+'_conv1')
x = slim.batch_norm(x)
x = tf.nn.relu(x)
x = conv_layer(x,self.filters,kernel_size=(3,3), layer_name=scope+'_conv2')
return x
def transition_layer(self,x, scope):
# [BN --> conv11 --> avg_pool2]
with tf.name_scope(scope):
x = slim.batch_norm(x)
x = conv_layer(x,self.filters,kernel_size=(1,1), layer_name=scope+'_conv1')
x = slim.avg_pool2d(x,2)
return x
def dense_block(self,input_x, nb_layers, layer_name):
with tf.name_scope(layer_name):
layers_concat = []
layers_concat.append(input_x)
x = self.bottleneck_layer(input_x,layer_name +'_bottleN_'+str(0))
layers_concat.append(x)
for i in xrange(nb_layers):
x = tf.concat(layers_concat,axis=3)
x = self.bottleneck_layer(x,layer_name+'_bottleN_'+str(i+1))
layers_concat.append(x)
return x
def build_model(self,input_x):
x = conv_layer(input_x,self.filters,kernel_size=(7,7), layer_name='conv0')
x = slim.max_pool2d(x,(3,3))
for i in xrange(self.nb_blocks):
print(i)
x = self.dense_block(x,4, 'dense_'+str(i))
x = self.transition_layer(x,'trans_'+str(i))
return x
以上展示了卷积神经网络的演变历史以及每个CNN的结构和实现,读者可以手动实现一遍,加深对各类模型的理解~