从零开始
导包
import d2lzh as d2l
from mxnet import nd
from mxnet.gluon import loss as gloss
读取数据集
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
定义模型参数
因为有28*28=784且共10类,因而分别设置num_inputs,num_outputs=784,10
而隐藏单元格数是可以随意更改的
提前剧透
下图是设置num_hiddens=256时
num_inputs,num_outputs,num_hiddens=784,10,256
W1 = nd.random.normal(scale=0.01,shape=(num_inputs,num_hiddens))
b1 = nd.zeros(num_hiddens)
W2 = nd.random.normal(scale=0.01,shape=(num_hiddens,num_outputs))
b2 = nd.zeros(num_outputs)
params=[W1,b1,W2,b2]
for param in params:
param.attach_grad()
定义激活函数
def relu(X):
return nd.maximum(X,0)
定义模型
def net(X):
X = X.reshape((-1,num_inputs))
H =relu(nd.dot(X,W1)+b1)
return nd.dot(H,W2)+b2
损失函数
loos = gloss.SoftmaxCrossEntropyLoss()
训练模型
num_epochs,lr=5,0.5
d2l.train_ch3(net,train_iter,test_iter,loos,num_epochs,batch_size,params,lr)
多层感知机的简洁实现
导包
import d2lzh as d2l
from mxnet import gluon,init
from mxnet.gluon import loss as gloss,nn
定义模型
nn.Sequential()也是老朋友了,加网络用的
add加网络,先加一个全连接层(256个参数,激活函数为relu)
net = nn.Sequential()
net.add(nn.Dense(256,activation='relu'),nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
训练模型
batch_size=256 train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
loss= gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.5}) num_epochs=5 d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,trainer)