First use the simplest three-layer fully connected neural network, then add the activation layer to view the experimental results, and finally add batch standardization to verify whether it is effective
Firstly, the network structure SimpleNet is defined according to the existing template, named as net.py
import torch
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from torch import nn,optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
#Three layer fully connected neural network is defined
class simpleNet(nn.Module):
def __ init__ (self,in_ dim,n_ hidden_ 1,n_ hidden_ 2,out_ Dim: input dimension, the number of neurons in the first layer, the number of neurons in the second layer, and the number of neurons in the third layer
super(simpleNet,self).__init__()
self.layer1=nn.Linear(in_dim,n_hidden_1)
self.layer2=nn.Linear(n_hidden_1,n_hidden_2)
self.layer3=nn.Linear(n_hidden_2,out_dim)
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
return x
#Add activation function
class Activation_Net(nn.Module):
def __init__(self,in_dim,n_hidden_1,n_hidden_2,out_dim):
super(NeutalNetwork,self).__init__()
self.layer1=nn . sequential (ා sequential composite structure
nn.Linear(in_dim,n_hidden_1),nn.ReLU(True))
self.layer2=nn.Sequential(
nn.Linear(n_hidden_1,n_hidden_2),nn.ReLU(True))
self.layer3=nn.Sequential(
nn.Linear(n_hidden_2,out_dim))
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
return x
#Add batch standardization module. Skin standardization is placed behind the full connection and in front of nonlinearity
class Batch_Net(nn.Module):
def _init__(self,in_dim,n_hidden_1,n_hidden_2,out_dim):
super(Batch_net,self).__init__()
self.layer1=nn.Sequential(nn.Linear(in_dim,n_hidden_1),nn.BatchNormld(n_hidden_1),nn.ReLU(True))
self.layer2=nn.Sequential(nn.Linear(n_hidden_1,n_hidden_2),nn.BatchNormld(n_hidden_2),nn.ReLU(True))
self.layer3=nn.Sequential(nn.Linear(n_hidden_2,out_dim))
def forword(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
return x
Training network,
import torch
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from torch import nn,optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
#Define some hyperparameters
import net
batch_size=64
learning_rate=1e-2
num_epoches=20
#Pretreatment
data_tf=transforms.Compose(
[ transforms.ToTensor () transforms.Normalize ([0.5], [0.5])) ා convert the image to tensor, and then continue to standardize, that is, reduce the mean value, divide by the variance
#Read dataset
train_dataset=datasets.MNIST(root='./data',train=True,transform=data_tf,download=True)
test_dataset=datasets.MNIST(root='./data',train=False,transform=data_tf)
#Importing data sets using built-in functions
train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)
#Import network, define loss function and optimization method
model=net.simpleNet(28*28,300,100,10)
If torch.cuda.is_ Available (): ා whether to use CUDA acceleration
model=model.cuda()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=learning_rate)
import net
n_epochs=5
for epoch in range(n_epochs):
running_loss=0.0
running_correct=0
print("epoch {}/{}".format(epoch,n_epochs))
print("-"*10)
for data in train_loader:
img,label=data
img=img.view(img.size(0),-1)
if torch.cuda.is_available():
img=img.cuda()
label=label.cuda()
else:
img=Variable(img)
label=Variable(label)
Out = model (IMG) ා get the result of forward propagation
Loss = criterion (out, label) ා get the loss function
print_loss=loss.data.item()
optimizer.zero_ Grad () ා return to 0 gradient
loss.backward () reverse propagation
optimizer.step () optimization
running_loss+=loss.item()
epoch+=1
if epoch%50==0:
print('epoch:{},loss:{:.4f}'.format(epoch,loss.data.item()))
The results of the training are as follows:
Test network
#Test network
model.eval () ා turn model into test mode
eval_loss=0
eval_acc=0
for data in test_loader:
img,label=data
img= img.view ( img.size (0), - 1) # the test set does not need back propagation, so the memory can be released and the memory space can be saved when the preceding item is propagated
if torch.cuda.is_available():
img=Variable(img,volatile=True).cuda()
label=Variable(label,volatile=True).cuda()
else:
img=Variable(img,volatile=True)
label=Variable(label,volatile=True)
out=model(img)
loss=criterion(out,label)
eval_loss+=loss.item()*label.size(0)
_,pred=torch.max(out,1)
num_correct=(pred==label).sum()
eval_acc+=num_correct.item()
print('test loss:{:.6f},ac:{:.6f}'.format(eval_loss/(len(test_dataset)),eval_acc/(len(test_dataset))))
When training, you can also add some dropout, regularize, modify the number of hidden layer neurons, increase the number of hidden layers, you can add them yourself.
The above three-layer full connection layer of Python realizes handwritten letter recognition, which is the whole content shared by Xiaobian. I hope to give you a reference, and I hope you can support developpaer more.