200字范文,内容丰富有趣,生活中的好帮手!
200字范文 > Python学习:利用线性回归预测双色球开奖号码(纯娱乐升级版)

Python学习:利用线性回归预测双色球开奖号码(纯娱乐升级版)

时间:2022-07-05 18:05:03

相关推荐

Python学习:利用线性回归预测双色球开奖号码(纯娱乐升级版)

在网上看了一下有相关的例子,不过没有完整源码,运行还会报错,稍微做修改和优化,修复部分已知错误。

特别提醒:仅供娱乐,请勿当真,双色球属于随机数,线性回归无法预测随机数的哦~~

编译运行环境:Python 3.7x

原版源码如下:(URL暂时无法正常使用了,改用升级版)

#!/usr/bin/python# -*- coding:UTF-8 -*-# 导入需要的包import osimport pandas as pdimport refrom urllib import requestfrom bs4 import BeautifulSoupfrom sklearn import datasets, linear_modeldef get_http_content(href):"""读取网页内容"""request_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/0101 Firefox/81.0'}req = request.Request(href, headers = request_headers)try:res = request.urlopen(req)return res.read()except request.HTTPError as e:raise edef get_page_num(url):"""获取总页数"""num = 0content = get_http_content(url)soup = BeautifulSoup(content, features='lxml')strong = soup.find('td', colspan='7')# print(strong)if strong:result = strong.get_text().split(' ')list_num = re.findall("[0-9]{1}", result[1])for i in range(len(list_num)):num = num * 10 + int(list_num[i])return numelse:return 0def get_history_lotto(page):"""按页爬取号码数据"""url = '/zhcw/html/ssq/list_%d.html'href = url % pageprint('load......', page, href)content = get_http_content(href)soup = BeautifulSoup(content, features='lxml')em_list = soup.find_all('em')div_list = soup.find_all('td', {'align': 'center'})n = 0m = 0results = []group = []text = ''for div in em_list:if n == 0:# 日期group.append(str(div_list[m+0].get_text()))# 期数group.append(str(div_list[m+1].get_text()))# 号码lotto = str(div.get_text())group.append(lotto)# print(lotto)n = n + 1# 每7个号码组成一组if n == 7:group.append(text + lotto)# 将每组结果汇总到结果列表results.append(group)group = []text = ''n = 0 # 重置计数m += + 5 # 跳过5列else:text += lotto + ','return resultsdef get_history_result(out_file):"""爬取历史数据"""url = '/zhcw/html/ssq/list_%d.html'# 获取页数num = get_page_num(url % 1)results = []# 按页数循环抓取数据for i in range(1, num):try:# 合并分页查询结果results += get_history_lotto(i)except Exception as e:# 爬取失败时,提示失败原因print('error......', i, e)# 表头columns = {'date': str,'id': str, # 期数'r1': int, # 红1'r2': int, # 红2'r3': int, # 红3'r4': int, # 红4'r5': int, # 红5'r6': int, # 红6'b1': int, # 蓝1'note': str}# 生成 DataFramedf = pd.DataFrame(columns=columns, data=results)print('保存结果')df.to_csv(out_file, index=False, encoding='utf-8-sig')print(df)return dfdef get_lotto_data(file, lotto, lotto_id):"""取读历史数据,指定标识"""data = pd.read_csv(file)data['lotto_id'] = lotto_idX = []Y = []# single_square_feet, single_price_valuefor s, p in zip(data['lotto_id'], data[lotto]):X.append([float(s)])Y.append(float(p))return X, Y# 训练线性回归模型def linear_model_test(X, Y, predict_value):regr = linear_model.LinearRegression()regr.fit(X, Y)predict_outcome = regr.predict(predict_value)predictions = {}predictions['intercept'] = regr.intercept_predictions['coefficient'] = regr.coef_predictions['predicted_value'] = predict_outcomereturn predictionsdef get_predicted_num(file, lotto, lotto_id, num):"""使用线性回归推测中奖号码"""X, Y = get_lotto_data(file, lotto, lotto_id)# print(X)# print(Y)predict_value = [[51]]result = linear_model_test(X, Y, predict_value)print("num", num,'Intercept value', result['intercept'],'Coefficient', result['coefficient'],'Predicted value', result['predicted_value'])if __name__ == '__main__':force_refresh = False # 强制刷新数据file_name = './union_lotto_history_result.csv'if not os.path.exists(file_name) or force_refresh:# 获取历史数据get_history_result(file_name)# 使用线性回归模型进行预测 (o_o 预测结果纯属娱乐,请勿深信)get_predicted_num(file_name, 'r1', 1, 1) # 预测红1get_predicted_num(file_name, 'r2', 2, 2) # 预测红2get_predicted_num(file_name, 'r3', 3, 3) # 预测红3get_predicted_num(file_name, 'r4', 4, 4) # 预测红4get_predicted_num(file_name, 'r5', 5, 5) # 预测红5get_predicted_num(file_name, 'r6', 6, 28) # 预测红6get_predicted_num(file_name, 'b1', 7, 9) # 预测蓝7

升级版源码如下:

import requestsimport jsonimport pandas as pdfrom sklearn import datasets, linear_modeldef get_history_lotto():"""爬取号码数据"""url = 'http://www./cwl_admin/kjxx/findDrawNotice?name=ssq&issueCount=100'headers = {'Host': 'www.','Referer': 'http://www./kjxx/ssq/kjgg/','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/0101 Firefox/81.0'}columns = {'date': str,'id': str, # 期数'r1': int, # 红1'r2': int, # 红2'r3': int, # 红3'r4': int, # 红4'r5': int, # 红5'r6': int, # 红6'b1': int, # 蓝1'note': str}response = requests.get(url=url, headers=headers)# print(response.text)response_data = json.loads(response.text)result = response_data['result']results = []for i in result:dit = {}cnt = 1dit['id'] = i['code']dit['date'] = i['date']for j in str(i['red']).split(','):dit['r' + str(cnt)] = jcnt += 1dit['b1'] = i['blue']dit['note'] = i['red'] + ',' + i['blue']# print(dit)results.append(dit)return pd.DataFrame(columns=columns, data=results)def get_lotto_data(data, lotto, lotto_id):"""取读历史数据,指定标识"""data['lotto_id'] = lotto_idX = []Y = []# single_square_feet, single_price_valuefor s, p in zip(data['lotto_id'], data[lotto]):X.append([float(s)])Y.append(float(p))return X, Y# 训练线性回归模型def linear_model_test(X, Y, predict_value):regr = linear_model.LinearRegression()regr.fit(X, Y)predict_outcome = regr.predict(predict_value)predictions = {}predictions['intercept'] = regr.intercept_predictions['coefficient'] = regr.coef_predictions['predicted_value'] = predict_outcomereturn predictionsdef get_predicted_num(file, lotto, lotto_id, num):"""使用线性回归推测中奖号码"""X, Y = get_lotto_data(file, lotto, lotto_id)# print(X)# print(Y)predict_value = [[33]]result = linear_model_test(X, Y, predict_value)print("num", num,'Intercept value', result['intercept'],'Coefficient', result['coefficient'],'Predicted value', result['predicted_value'])if __name__ == '__main__':force_refresh = False # 强制刷新数据file_name = './data/union_lotto_history_result.csv'df = get_history_lotto()df.to_csv(file_name, encoding='utf-8-sig', index=False)df = df.sample(100)# 使用线性回归模型进行预测 (o_o 预测结果纯属娱乐,请勿深信)get_predicted_num(df, 'r1', 1, 5) # 预测红1get_predicted_num(df, 'r2', 2, 10) # 预测红2get_predicted_num(df, 'r3', 3, 15) # 预测红3get_predicted_num(df, 'r4', 4, 20) # 预测红4get_predicted_num(df, 'r5', 5, 24) # 预测红5get_predicted_num(df, 'r6', 6, 29) # 预测红6get_predicted_num(df, 'b1', 7, 9) # 预测蓝7

测试结果:

num 1 Intercept value 4.73266129032258 Coefficient [0.] Predicted value [4.73266129]

num 2 Intercept value 9.621774193548386 Coefficient [0.] Predicted value [9.62177419]

num 3 Intercept value 14.393145161290322 Coefficient [0.] Predicted value [14.39314516]

num 4 Intercept value 19.2625 Coefficient [0.] Predicted value [19.2625]

num 5 Intercept value 24.161290322580644 Coefficient [0.] Predicted value [24.16129032]

num 28 Intercept value 28.938709677419354 Coefficient [0.] Predicted value [28.93870968]

num 9 Intercept value 8.578225806451613 Coefficient [0.] Predicted value [8.57822581]

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。