机器学习与数据挖掘实验五
(编程实现误差逆传播算法BP算法)
实验目的:
掌握误差逆传播算法(BP算法)的工作流程
实验环境:
Anaconda/Jupyter notebook/Pycharm
实验内容:
编码实现标准BP算法,在西瓜数据集3.0上用这个算法训练一个单隐层网络,并进行测试。
实验步骤:
import pandas as pdimport numpy as npfrom sklearn.preprocessing import LabelEncoderfrom sklearn.preprocessing import StandardScalerimport matplotlib.pyplot as pltseed = import random# -*- coding:UTF-8 -*-np.random.seed(seed) # Numpy module.random.seed(seed) # Python random module.plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号plt.close('all')
def preprocess(data):#将非数映射数字for title in data.columns:if data[title].dtype=='object':encoder = LabelEncoder()data[title] = encoder.fit_transform(data[title]) #去均值和方差归一化ss = StandardScaler()X = data.drop('好瓜',axis=1)Y = data['好瓜']X = ss.fit_transform(X)x,y = np.array(X),np.array(Y).reshape(Y.shape[0],1)return x,y
#定义Sigmoid def sigmoid(x):return 1/(1+np.exp(-x))
#求导def d_sigmoid(x):return x*(1-x)
def standard_BP(x,y,dim,eta,max_iter): n_samples = 1w1 = np.random.random((x.shape[1],dim))w2 = np.random.random((dim,1))b1 = np.random.random((n_samples,dim))b2 = np.random.random((n_samples,1))losslist = []for ite in range(max_iter):loss_per_ite = []for m in range(x.shape[0]):xi,yi = x[m,:],y[m,:]xi,yi = xi.reshape(1,xi.shape[0]),yi.reshape(1,yi.shape[0])##前向传播 u1 = np.dot(xi,w1)+b1out1 = sigmoid(u1)u2 = np.dot(out1,w2)+b2out2 = sigmoid(u2)loss = np.square(yi - out2)/2loss_per_ite.append(loss)print('iter:%d loss:%.4f'%(ite,loss))##反向传播##补充反向传播代码g=(yi-out2)*d_sigmoid(out2)d_w2=np.dot(np.transpose(out1),g)d_b2=-gd_out1=np.dot(g,np.transpose(w2))e=d_out1*d_sigmoid(out1)d_w1=np.dot(np.transpose(xi),e)d_b1=-e##补充参数更新代码w1=w1+eta*d_w1w2=w2+eta*d_w2b1=b1+eta*d_b1b2=b2+eta*d_b2 losslist.append(np.mean(loss_per_ite))##Loss可视化plt.figure()##补充Loss可视化代码plt.plot([i+1 for i in range(max_iter)],losslist)plt.xlabel('iteration')plt.ylabel('loss')plt.show()return w1,w2,b1,b2
def main():data = pd.read_table('watermelon30.txt',delimiter=',')data.drop('编号',axis=1,inplace=True)x,y = preprocess(data)print(x)print(y)dim = 10# _,_,_,_ = standard_BP(x,y,dim)eta=0.8max_iter=500w1,w2,b1,b2 = standard_BP(x,y,dim,eta,max_iter)#根据当前的x,预测其类别;u1 = np.dot(x,w1)+b1out1 = sigmoid(u1)u2 = np.dot(out1,w2)+b2out2 = sigmoid(u2) y_pred = np.round(out2)result = pd.DataFrame(np.hstack((y,y_pred)),columns=['真值','预测'] )result.to_excel('result.xlsx',index=False)
if __name__=='__main__':main()
实验结果: