【Kaggle】手写数字识别

191 阅读2分钟

一些名词

  • 神经网络别名,Multi-Layer Perceptron,MLP,多层感知机
  • 1~4层神经网络,Shallow Neural Network,浅层神经网络
  • 卷积神经网络,convolutional neural network
  • 卷积层,convolutional layer
  • 池化层,pooling layer
  • 全连接层,fully-connected layer

1 数据导入

# 导入数据并分割
def load_data():
    train, test = pd.read_csv("train.csv"), pd.read_csv("test.csv")
    
    train_data = train.drop(["label"], axis = 1)
    label = np.array(pd.get_dummies(train["label"]))  # 有多少类就变成多少维向量
    train_data = np.array(train_data)
    train_data = train_data.reshape(-1, 28, 28)
    
    test = np.array(test)
    test = test.reshape(-1, 28, 28)
    
    x_train, x_valid, y_train, y_valid = \
        train_test_split(train_data, label, test_size = 0.2, random_state = 2021) 
    
    # 扩展维度
    x_train = np.expand_dims(x_train, axis=3)
    x_valid = np.expand_dims(x_valid, axis=3)
    test = np.expand_dims(test, axis=3)
    
    return x_train, x_valid, y_train, y_valid, test

x_train, x_valid, y_train, y_valid, test = load_data()
x_train.shape, x_valid.shape
((33600, 28, 28, 1), (8400, 28, 28, 1))

第四个维度表示通道,此处通道数为1


2 设计卷积神经网络

def myCNN():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters = 32, kernel_size = [5, 5],
                               padding = "same", activation = tf.nn.relu),
        
        tf.keras.layers.MaxPool2D(pool_size = [2, 2], strides = 2),
        
        tf.keras.layers.Conv2D(filters = 64, kernel_size = [7, 7],
                               padding = "same", activation = tf.nn.relu),
       	
        tf.keras.layers.MaxPool2D(pool_size = [2, 2], strides = 2),
        
        tf.keras.layers.Reshape(target_shape = (7 * 7 * 64,)),
        
        tf.keras.layers.Dense(units = 256, activation = tf.nn.relu),
        
        tf.keras.layers.Dropout(0.5),
        
        tf.keras.layers.Dense(units = 10, activation = tf.nn.softmax)])
    
    model.build(input_shape = (None, 28, 28, 1))
    print(model.summary())
    
    return model
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        100416    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
reshape (Reshape)            (None, 3136)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               803072    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                2570      
=================================================================
Total params: 906,890
Trainable params: 906,890
Non-trainable params: 0
_________________________________________________________________

参数个数计算

  • 卷积层1:5×5×32+325\times5\times32+32
  • 卷积层2:32×7×7×64+6432\times7\times7\times64+64
  • 全连接层1:3136×256+2563136\times256+256
  • 全连接层2:256×10+10256\times10+10

3 模型训练

这里只是简单的训练,没有搞模型选择那套。

def trainMyCNN(x_train, x_valid, y_train, y_valid, model):
    # 模型装配
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = 'adam', metrics = ['accuracy'])

    # 开始训练
    # test_dataset=test_dataset.batch(batch_size)
    history = model.fit(x_train, y_train, batch_size = 100, 
                        epochs = 10, verbose = 1, 
                        validation_data = (x_valid, y_valid))

    # 模型评估(有点多余了)
    # valid_loss, valid_accuracy = model.evaluate(x_valid, y_valid
    #                         steps=math.ceil(8400 / 100)) 
    # print('Accuracy on valid_dataset', valid_accuracy)
    
    return model, history
  • verbose=0verbose = 0
  • verbose=1verbose = 1 输出日志信息
  • verbose=2verbose = 2

试一下就知道三者的区别了。


4 模型预测

def predictTest(test, model):
    predictions = model.predict(test)
    y_test = np.argmax(predictions, axis = 1)
    return y_test

5 一键启动

x_train, x_valid, y_train, y_valid, test = load_data()
model = myCNN()
model, history = trainMyCNN(x_train, x_valid, y_train, y_valid, model)
y_test = predictTest(test, model)

submission = pd.DataFrame({"ImageId" : np.array(range(1, 28001)),
                           "Label" : y_test})
submission.to_csv("submission.csv", index = False)   # 去掉行索引

截图庆祝

Rank