DNN的Softmax分类实现手势图像识别

767 阅读4分钟

一、数据集

数据集使用的是手势识别的图像,从0到5六种不同的标签。 传送门

二、原理和方法

  • 深层神经网络DNN对数据集进行训练
  • Softmax实现多分类,并取最大值的位置作为预测标签
  • 独热编码one-hot实现标签0-5的向量映射

三、调库

import tensorflow as tf
import numpy as np
import tf_utils
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops #用于tf.get_variable重新运行而不覆盖变量

四、数据集处理

(一)查看图像

train_x, train_y, test_x, test_y, classes = tf_utils.load_dataset()
index = 5
plt.imshow(train_x[index])
print("y =", np.squeeze(train_y[: ,index]))
print("train_x.shape:", train_x.shape)

在这里插入图片描述

(二)拉伸降维

# 降维
train_x_flatten = train_x.reshape(train_x.shape[0], -1).T
test_x_flatten = test_x.reshape(test_x.shape[0], -1).T
print("train_x_flatten.shape:", train_x_flatten.shape)
train_x_flatten.shape: (12288, 1080)

(三)归一化

# 归一化
train_x1 = train_x_flatten / 255
test_x1 = test_x_flatten / 255

(四)标签独热编码

# 标签独热编码
train_y1 = tf_utils.convert_to_one_hot(train_y, 6)
test_y1 = tf_utils.convert_to_one_hot(test_y, 6)

(五)查看数据概况

print("训练集样本数:", train_x1.shape[1])
print("测试集样本数:", test_x1.shape[1])
print("训练集图像:", train_x1.shape)
print("训练集标签:", train_y1.shape)
print("测试集图像:", test_x1.shape)
print("测试集标签", test_y1.shape)
训练集样本数: 1080
测试集样本数: 120
训练集图像: (12288, 1080)
训练集标签: (6, 1080)
测试集图像: (12288, 120)
测试集标签 (6, 120)

五、各个过程函数

# 生成placeholder
def creat_placeholder(n_x, n_y):
    x = tf.placeholder(tf.float32, [n_x, None], name="x")
    y = tf.placeholder(tf.float32, [n_y, None], name="y")
    return x,y

# 初始化参数
def initialize_parameters():
    w1 = tf.get_variable("w1",[25,12288],initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1",[25,1],initializer=tf.zeros_initializer())
    w2 = tf.get_variable("w2", [12, 25], initializer = tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable("b2", [12, 1], initializer = tf.zeros_initializer())
    w3 = tf.get_variable("w3", [6, 12], initializer = tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable("b3", [6, 1], initializer = tf.zeros_initializer())
#     w1 = tf.Variable(tf.random_normal([25, 12288]))
#     b1 = tf.Variable(tf.zeros([25, 1]))
#     w2 = tf.Variable(tf.random_normal([12, 25]))
#     b2 = tf.Variable(tf.zeros([12, 1]))
#     w3 = tf.Variable(tf.random_normal([6, 12]))
#     b3 = tf.Variable(tf.zeros([6, 1]))
    
    parameters = {
        "w1": w1,
        "b1": b1,
        "w2": w2,
        "b2": b2,
        "w3": w3,
        "b3": b3
    }
    return parameters

# 前向传播
def forward_propagation(X, parameters):
    w1 = parameters["w1"]
    b1 = parameters["b1"]
    w2 = parameters["w2"]
    b2 = parameters["b2"]
    w3 = parameters["w3"]
    b3 = parameters["b3"]
    
    z1 = tf.matmul(w1, X) + b1
    a1 = tf.nn.relu(z1)
    z2 = tf.matmul(w2, a1) + b2
    a2 = tf.nn.relu(z2)
    z3 = tf.matmul(w3, a2) + b3
    #a3 = tf.nn.softmax(z3)
    return z3

# 计算成本值
def compute_cost(z3, Y):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.transpose(z3), labels=tf.transpose(Y)))
    return cost

六、神经网络模型搭建

# DNN模型搭建
def nn_model(train_x, train_y, test_x, test_y, learning_rate=0.0001, num_epoch=2000, minibatch_size=32, print_cost=True, is_plot=True):
    ops.reset_default_graph()
    (n_x, m) = train_x.shape 
    n_y = train_y.shape[0]
    x, y = creat_placeholder(n_x, n_y)
    
    parameters = initialize_parameters()
    z3 = forward_propagation(x, parameters)
    cost = compute_cost(z3, y)
    train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    seed = 3
    costs = []
    with tf.Session() as session:
        session.run(init)
        for epoch in range(num_epoch):
            epoch_cost = 0
            seed = seed + 1
            num_minibatch = int(m / minibatch_size)
            minibatches = tf_utils.random_mini_batches(train_x, train_y, minibatch_size, seed)
            
            for minibatch in minibatches:
                (minibatch_x, minibatch_y) = minibatch
                _, minibatch_cost = session.run([train, cost], feed_dict={x:minibatch_x, y:minibatch_y})
                epoch_cost = epoch_cost + minibatch_cost / num_minibatch
            if epoch%5 == 0:
                costs.append(epoch_cost)
                if print_cost and epoch%100 == 0:
                    print("epoch =", epoch, "epoch_cost =", epoch_cost)
        if is_plot:
            plt.plot(np.squeeze(costs))
            plt.title("learning_rate =" + str(learning_rate))
            plt.xlabel("epoch")
            plt.ylabel("cost")
            plt.show()
        parameters = session.run(parameters)
        correct_prediction = tf.equal(tf.argmax(z3), tf.argmax(y))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("训练集准确率:", accuracy.eval({x:train_x, y:train_y}))
        print("测试集准确率:", accuracy.eval({x:test_x, y:test_y}))
        return parameters

七、喂数据集训练模型

# 训练模型
parameters = nn_model(train_x1, train_y1, test_x1, test_y1)
epoch = 0 epoch_cost = 1.8915627219460225
epoch = 100 epoch_cost = 1.0814920374841401
epoch = 200 epoch_cost = 0.9045906644878967
epoch = 300 epoch_cost = 0.7937486514900668
epoch = 400 epoch_cost = 0.7092051903406779
epoch = 500 epoch_cost = 0.6288680759343233
epoch = 600 epoch_cost = 0.5487878710934612
epoch = 700 epoch_cost = 0.4825858437653743
epoch = 800 epoch_cost = 0.42227065834132105
epoch = 900 epoch_cost = 0.3695441674102436
epoch = 1000 epoch_cost = 0.31023081188852136
epoch = 1100 epoch_cost = 0.25832218676805496
epoch = 1200 epoch_cost = 0.23583885846715985
epoch = 1300 epoch_cost = 0.1781336689988772
epoch = 1400 epoch_cost = 0.15422452421802463
epoch = 1500 epoch_cost = 0.12832456477212184
epoch = 1600 epoch_cost = 0.10107426119573189
epoch = 1700 epoch_cost = 0.08046883931665708
epoch = 1800 epoch_cost = 0.06646379613966652
epoch = 1900 epoch_cost = 0.05467078682373871

训练集准确率: 0.99722224
测试集准确率: 0.75

在这里插入图片描述

八、图像预测

(一)预测训练集图像

index = 999

x = tf.placeholder(tf.float32, name="x")
plt.imshow(train_x[index])

image = (train_x[index]).reshape(64*64*3, 1) #把图像拉伸降维成列向量
print(image.shape)

with tf.Session() as session: 
    image_prediction = session.run(forward_propagation(x, parameters), feed_dict={x:image})
    prediction_label = np.squeeze(session.run(tf.argmax(image_prediction))) # argmax取softmax分类后向量最大位置
    print("prediction_label:", prediction_label)
    true_label = np.squeeze(train_y[: ,index])
    print("true_label:", true_label)
    if prediction_label == true_label:
        print("预测结果正确!")
    else:
        print("预测结果错误!")
(12288, 1)
prediction_label: 2
true_label: 2
预测结果正确!

在这里插入图片描述

(二)预测测试集图像

index = 46

x = tf.placeholder(tf.float32, name="x")
plt.imshow(test_x[index])

image = (test_x[index]).reshape(64*64*3, 1)
print(image.shape)

with tf.Session() as session:
    image_prediction = session.run(forward_propagation(x, parameters), feed_dict={x:image})
    prediction_label = np.squeeze(session.run(tf.argmax(image_prediction)))
    print("prediction_label:", prediction_label)
    true_label = np.squeeze(test_y[: ,index])
    print("true_label:", true_label)
    if prediction_label == true_label:
        print("预测结果正确!")
    else:
        print("预测结果错误!")
(12288, 1)
prediction_label: 1
true_label: 2
预测结果错误!

在这里插入图片描述

九、总结

  • tf.get_variable()和tf.Variable()的区别,前者在没有该变量时建立,反之则直接返回变量。但重新运行时需要用ops进行不覆盖处理,否则会报错。
  • Softmax分类器需要多个输出单元,这个例子中需要六个输出单元,所以得到的z3/a3是(6, 1)的矩阵,用argmax可以取到其中最大值所在的位置,即独热编码后的标签。
  • xavier_initializer进行w的初始化:为了使得网络中信息更好的流动,每一层输出的方差应该尽量相等。
  • 损失函数 tf.nn.softmax_cross_entropy_with_logits ⚠不支持tf2.0版本,可以用:tf.nn.softmax_cross_entropy_with_logits_v2
  • tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.transpose(z3), labels=tf.transpose(Y))中logits是输入前向传播的最后一层线性输出值,labels是输入独热编码后的标签。
  • parameters = session.run(parameters)并不是没有用,这样可以保存最终的parameters到session中。
  • minibatch在每一轮训练中随机划分,指定的随机种子seed变化。
  • 可以观察到训练结果:训练集效果很好,而测试集效果不是特别好,在测试集中像上图中的例子,2的两手指靠得很近,那么非常容易识别成1.
  • _, minibatch_cost = session.run([train, cost], feed_dict={x:minibatch_x, y:minibatch_y}) 这种写法可以同时跑train和cost函数,功能二合一。