一些名词
- 神经网络别名,Multi-Layer Perceptron,MLP,多层感知机
- 1~4层神经网络,Shallow Neural Network,浅层神经网络
- 卷积神经网络,convolutional neural network
- 卷积层,convolutional layer
- 池化层,pooling layer
- 全连接层,fully-connected layer
1 数据导入
# 导入数据并分割
def load_data():
train, test = pd.read_csv("train.csv"), pd.read_csv("test.csv")
train_data = train.drop(["label"], axis = 1)
label = np.array(pd.get_dummies(train["label"])) # 有多少类就变成多少维向量
train_data = np.array(train_data)
train_data = train_data.reshape(-1, 28, 28)
test = np.array(test)
test = test.reshape(-1, 28, 28)
x_train, x_valid, y_train, y_valid = \
train_test_split(train_data, label, test_size = 0.2, random_state = 2021)
# 扩展维度
x_train = np.expand_dims(x_train, axis=3)
x_valid = np.expand_dims(x_valid, axis=3)
test = np.expand_dims(test, axis=3)
return x_train, x_valid, y_train, y_valid, test
x_train, x_valid, y_train, y_valid, test = load_data()
x_train.shape, x_valid.shape
((33600, 28, 28, 1), (8400, 28, 28, 1))
第四个维度表示通道,此处通道数为1
2 设计卷积神经网络
def myCNN():
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(filters = 32, kernel_size = [5, 5],
padding = "same", activation = tf.nn.relu),
tf.keras.layers.MaxPool2D(pool_size = [2, 2], strides = 2),
tf.keras.layers.Conv2D(filters = 64, kernel_size = [7, 7],
padding = "same", activation = tf.nn.relu),
tf.keras.layers.MaxPool2D(pool_size = [2, 2], strides = 2),
tf.keras.layers.Reshape(target_shape = (7 * 7 * 64,)),
tf.keras.layers.Dense(units = 256, activation = tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(units = 10, activation = tf.nn.softmax)])
model.build(input_shape = (None, 28, 28, 1))
print(model.summary())
return model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 832
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 14, 14, 64) 100416
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64) 0
_________________________________________________________________
reshape (Reshape) (None, 3136) 0
_________________________________________________________________
dense (Dense) (None, 256) 803072
_________________________________________________________________
dropout (Dropout) (None, 256) 0
_________________________________________________________________
dense_1 (Dense) (None, 10) 2570
=================================================================
Total params: 906,890
Trainable params: 906,890
Non-trainable params: 0
_________________________________________________________________
参数个数计算
- 卷积层1:
- 卷积层2:
- 全连接层1:
- 全连接层2:
3 模型训练
这里只是简单的训练,没有搞模型选择那套。
def trainMyCNN(x_train, x_valid, y_train, y_valid, model):
# 模型装配
model.compile(loss = 'categorical_crossentropy',
optimizer = 'adam', metrics = ['accuracy'])
# 开始训练
# test_dataset=test_dataset.batch(batch_size)
history = model.fit(x_train, y_train, batch_size = 100,
epochs = 10, verbose = 1,
validation_data = (x_valid, y_valid))
# 模型评估(有点多余了)
# valid_loss, valid_accuracy = model.evaluate(x_valid, y_valid
# steps=math.ceil(8400 / 100))
# print('Accuracy on valid_dataset', valid_accuracy)
return model, history
- 输出日志信息
试一下就知道三者的区别了。
4 模型预测
def predictTest(test, model):
predictions = model.predict(test)
y_test = np.argmax(predictions, axis = 1)
return y_test
5 一键启动
x_train, x_valid, y_train, y_valid, test = load_data()
model = myCNN()
model, history = trainMyCNN(x_train, x_valid, y_train, y_valid, model)
y_test = predictTest(test, model)
submission = pd.DataFrame({"ImageId" : np.array(range(1, 28001)),
"Label" : y_test})
submission.to_csv("submission.csv", index = False) # 去掉行索引
截图庆祝