200字范文 > 【PyTorch 深度学习】5.PyTorch实现L1 L2正则化以及Dropout

【PyTorch 深度学习】5.PyTorch实现L1 L2正则化以及Dropout

时间：2024-01-20 00:43:24

1.Dropout原理

Droupout是指在深度网络的训练中，以一定的概率随机地“临时丢弃”一部分神经元。

具体来讲，Dropout作用于每份小批量训练数据，由于其随机丢弃部分神经元的机制，相当于每次迭代都在训练不同结构的神经网络。类似于Bagging方法，dropout可被认为是一种实用的大规模深度神经网络的模型集成算法。

Dropout的具体实现中，要求某个神经元节点激活值以一定的概率p被“丢弃”，即该神经元暂时停止工作。对于任意神经元，每次训练中都与一组随机挑选的不同的神经元集合共同进行优化，这个过程会减弱全体神经元之间的联合适应性，减少过拟合的风险，增加泛化能力。

在训练时，随机选出隐藏层的神经元，然后将其删除。被删除的神经元不再进行信号的传递，训练时，每传递一次数据，就会随机选择要删除的神经元。然后，测试时，虽然会传递所有的神经元信号，但是对于各个神经元的输出，要乘上训练时的删除比例后再输出。

2 实现L1、L2正则化

参考：

/guyuealian/article/details/88426648

正则化的类：

class Regularization(torch.nn.Module):def __init__(self,model,weight_decay,p=2):''':param model 模型:param weight_decay:正则化参数:param p: 范数计算中的幂指数值，默认求2范数,当p=0为L2正则化,p=1为L1正则化'''super(Regularization, self).__init__()if weight_decay <= 0:print("param weight_decay can not <=0")exit(0)self.model=modelself.weight_decay=weight_decayself.p=pself.weight_list=self.get_weight(model)self.weight_info(self.weight_list)def to(self,device):'''指定运行模式:param device: cude or cpu:return:'''self.device=devicesuper().to(device)return selfdef forward(self, model):self.weight_list=self.get_weight(model)#获得最新的权重reg_loss = self.regularization_loss(self.weight_list, self.weight_decay, p=self.p)return reg_lossdef get_weight(self,model):'''获得模型的权重列表:param model::return:'''weight_list = []for name, param in model.named_parameters():if 'weight' in name:weight = (name, param)weight_list.append(weight)return weight_listdef regularization_loss(self,weight_list, weight_decay, p=2):'''计算张量范数:param weight_list::param p: 范数计算中的幂指数值，默认求2范数:param weight_decay::return:'''# weight_decay=Variable(torch.FloatTensor([weight_decay]).to(self.device),requires_grad=True)# reg_loss=Variable(torch.FloatTensor([0.]).to(self.device),requires_grad=True)# weight_decay=torch.FloatTensor([weight_decay]).to(self.device)# reg_loss=torch.FloatTensor([0.]).to(self.device)reg_loss=0for name, w in weight_list:l2_reg = torch.norm(w, p=p)reg_loss = reg_loss + l2_regreg_loss=weight_decay*reg_lossreturn reg_lossdef weight_info(self,weight_list):'''打印权重列表信息:param weight_list::return:'''print("---------------regularization weight---------------")for name ,w in weight_list:print(name)print("---------------------------------------------------")

文章中有使用方法，我用在我的数据集里（上一篇博客的数据集,数据处理见上一篇博客）如下：

class module_net(nn.Module):def __init__(self, num_input, num_hidden, num_output):super(module_net, self).__init__()self.layer1 = nn.Linear(num_input, num_hidden)self.layer2 = nn.ReLU()self.layer3 = nn.Linear(num_hidden, num_hidden)self.layer4 = nn.ReLU()self.layer5 = nn.Linear(num_hidden, num_hidden)self.layer6 = nn.ReLU()self.layer7 = nn.Linear(num_hidden, num_output)def forward(self, x):x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)x = self.layer5(x)x = self.layer6(x)x = self.layer7(x)x = self.layer8(x)return xtorch.backends.cudnn.benchmark = Truedevice ="cpu"weight_decay=0.01 # 正则化参数model = module_net(8,10,1).to(device)#初始化正则化if weight_decay>0:reg_loss=Regularization(model, weight_decay, p=1).to(device) #p=1为L1正则化，P=2为L2正则化else:print("no regularization")criterion = nn.BCEWithLogitsLoss().to(device) # CrossEntropyLoss=softmax+cross entropyoptimizer = torch.optim.SGD(model.parameters(),lr=0.01)#不需要指定参数weight_decayLoss_list = [] #用来装loss值，以便之后画图Accuracy_list = [] #用来装准确率，以便之后画图for e in range(15000):out = model.forward(Variable(x)) #这里省略了 mo_net.forward()loss = criterion(out, Variable(y))Loss_list.append(loss.data[0])#--------------------用于求准确率-------------------------#out_class=(out[:]>0).float() #将out矩阵中大于0的转化为1，小于0的转化为0，存入a中right_num=torch.sum(y==out_class).float() #分类对的数值precision=right_num/out.shape[0] #准确率#--------------------求准确率结束-------------------------#Accuracy_list.append(precision)optimizer.zero_grad() loss.backward()optimizer.step()if (e + 1) % 1000 == 0:print('epoch: {}, loss: {}，precision{},right_num{}'.format(e+1, loss.data[0],precision,right_num))x1=list(range(15000))plt.plot(x1, Loss_list,c='red',label='loss')plt.plot(x1, Accuracy_list,c='blue',label='precision')plt.legend()

结果如下：

在测试集上：

3. Dropout

3.1 numpy实现Dropout

class Dropout:def __init__(self,dropout_ratio=0.5):self.dropout_ratio=dropout_ratioself.mask=Nonedef forward(self,x,train_flg=True):if train_flg:self.mask=np.random.rand(*x.shape)>self.dropot_ratioreturn x*self.maskelse:return x*(1.0-self.dropout_ratio)def backward(self,dout):return dout*self.mask

这里的要点是，每次正向传播时，self.mask中都会以False的形式保存要删除的神经元。self.mask会随机生成和x形状相同的数组，并将值比dropout_ratio大的元素设为True。反向传播时的行为和Relu相同，也就是说正向传播时传递了的信号的神经元，反向传播时按原样传递信号，正向传播时没有传递信号的神经元，反向传播时信号将停在那里。

3.2pytorch实现Dropout

pytorch实现Dropout只需要在构建网络的时候加上 nn.Dropout§层，括号里面这里的 p 指的是随机有 p 的神经元会被关闭/丢弃,我这里改变的网络结构如下：

后面的训练代码与上面一样，训练一万次结果如下：

在测试集里表现如下：

可以看到测试集和训练集的误差之差小了很多，过拟合情况减少了很多。

4.全部代码

包括数据集的制作，全部代码如下：

import torchimport numpy as npfrom torch import nnfrom torch.autograd import Variableimport torch.nn.functional as Fimport matplotlib.pyplot as pltimport pandas as pdfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScaler%matplotlib inline#--------------------------------------正则化类----------------------------------------------------------------#class Regularization(torch.nn.Module):def __init__(self,model,weight_decay,p=2):''':param model 模型:param weight_decay:正则化参数:param p: 范数计算中的幂指数值，默认求2范数,当p=0为L2正则化,p=1为L1正则化'''super(Regularization, self).__init__()if weight_decay <= 0:print("param weight_decay can not <=0")exit(0)self.model=modelself.weight_decay=weight_decayself.p=pself.weight_list=self.get_weight(model)self.weight_info(self.weight_list)def to(self,device):'''指定运行模式:param device: cude or cpu:return:'''self.device=devicesuper().to(device)return selfdef forward(self, model):self.weight_list=self.get_weight(model)#获得最新的权重reg_loss = self.regularization_loss(self.weight_list, self.weight_decay, p=self.p)return reg_lossdef get_weight(self,model):'''获得模型的权重列表:param model::return:'''weight_list = []for name, param in model.named_parameters():if 'weight' in name:weight = (name, param)weight_list.append(weight)return weight_listdef regularization_loss(self,weight_list, weight_decay, p=2):'''计算张量范数:param weight_list::param p: 范数计算中的幂指数值，默认求2范数:param weight_decay::return:'''# weight_decay=Variable(torch.FloatTensor([weight_decay]).to(self.device),requires_grad=True)# reg_loss=Variable(torch.FloatTensor([0.]).to(self.device),requires_grad=True)# weight_decay=torch.FloatTensor([weight_decay]).to(self.device)# reg_loss=torch.FloatTensor([0.]).to(self.device)reg_loss=0for name, w in weight_list:l2_reg = torch.norm(w, p=p)reg_loss = reg_loss + l2_regreg_loss=weight_decay*reg_lossreturn reg_lossdef weight_info(self,weight_list):'''打印权重列表信息:param weight_list::return:'''print("---------------regularization weight---------------")for name ,w in weight_list:print(name)print("---------------------------------------------------")#----------------------------数据处理-----------------------------------#data = pd.read_csv('diabetes.csv')data1=data.copy()y=data1.loc[:,['Outcome']] #数据标签del data1['Outcome']x = data1 #数据x_train, x_test,y_train,y_test= train_test_split(x, y, test_size=0.3,random_state=) #数据集三七分，随机种子ss = StandardScaler() x_train = ss.fit_transform(x_train) #数据标准化x_test = ss.fit_transform(x_test) #数据标准化#-----------------------------转化为tensor--------------------------#x_train_tensor=torch.from_numpy(x_train)x_test_tensor=torch.from_numpy(x_test)y_train_numpy=np.array(y_train)y_train_tensor=torch.from_numpy(y_train_numpy)y_test_numpy=np.array(y_test)y_test_tensor=torch.from_numpy(y_test_numpy)x=x_train_tensor.float()y=y_train_tensor.float()#-----------------------------网络构建--------------------------#class module_net(nn.Module):def __init__(self, num_input, num_hidden, num_output):super(module_net, self).__init__()self.layer1 = nn.Linear(num_input, num_hidden)self.layer2 = nn.ReLU()self.layer3 = nn.Linear(num_hidden, num_hidden)self.dropout3 = nn.Dropout(p=0.5)self.layer4 = nn.ReLU()self.layer5 = nn.Linear(num_hidden, num_hidden)self.dropout5 = nn.Dropout(p=0.5)self.layer6 = nn.ReLU()self.layer7 = nn.Linear(num_hidden, num_output)def forward(self, x):x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.dropout3(x)x = self.layer4(x)x = self.layer5(x)x = self.dropout5(x)x = self.layer6(x)x = self.layer7(x)return x#----------------------------模型训练--------------------------#torch.backends.cudnn.benchmark = Truedevice ="cpu"weight_decay=0.01 # 正则化参数model = module_net(8,10,1).to(device)#初始化正则化if weight_decay>0:reg_loss=Regularization(model, weight_decay, p=2).to(device)else:print("no regularization")criterion = nn.BCEWithLogitsLoss().to(device) # CrossEntropyLoss=softmax+cross entropyoptimizer = torch.optim.SGD(model.parameters(),lr=0.01)#不需要指定参数weight_decayLoss_list = [] #用来装loss值，以便之后画图Accuracy_list = [] #用来装准确率，以便之后画图for e in range(10000):out = model.forward(Variable(x)) #这里省略了 mo_net.forward()loss = criterion(out, Variable(y))Loss_list.append(loss.data[0])#--------------------用于求准确率-------------------------#out_class=(out[:]>0).float() #将out矩阵中大于0的转化为1，小于0的转化为0，存入a中right_num=torch.sum(y==out_class).float() #分类对的数值precision=right_num/out.shape[0] #准确率#--------------------求准确率结束-------------------------#Accuracy_list.append(precision)optimizer.zero_grad() loss.backward()optimizer.step()if (e + 1) % 1000 == 0:print('epoch: {}, loss: {}，precision{},right_num{}'.format(e+1, loss.data[0],precision,right_num))x1=list(range(10000))plt.plot(x1, Loss_list,c='red',label='loss')plt.plot(x1, Accuracy_list,c='blue',label='precision')plt.legend()#-----------------------------模型预测--------------------------#x_test_tensor=x_test_tensor.float()y_test_tensor=y_test_tensor.float()out_test=model.forward(Variable(x_test_tensor)) loss_test = criterion(out_test, Variable(y_test_tensor))out_test_class=(out_test[:]>0).float() #将out矩阵中大于0的转化为1，小于0的转化为0，存入a中right_num_test=torch.sum(y_test_tensor==out_test_class).float() #分类对的数值precision_test=right_num_test/out_test.shape[0] #准确率loss_test=loss_test.data[0]print('loss_test:{},precision_test:{},right_num_test:{}'.format(loss_test,precision_test,right_num_test))

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。