查看对应的版本
# 下载和安装Python和SciPy# Python versionimport sysprint('Python: {}'.format(sys.version))# scipyimport scipyprint('scipy: {}'.format(scipy.__version__))# numpyimport numpyprint('numpy: {}'.format(numpy.__version__))# matplotlibimport matplotlibprint('matplotlib: {}'.format(matplotlib.__version__))# pandasimport pandasprint('pandas: {}'.format(pandas.__version__))# scikit-learnimport sklearnprint('sklearn: {}'.format(sklearn.__version__))
新建一个简单的Dataframe(数据帧)
import numpyimport pandasimport matplotlib.pyplot as plt# 散点矩形from pandas.plotting import scatter_matrix# 特征缩放from sklearn.preprocessing import StandardScaler# 交叉验证法from sklearn.model_selection import KFold# 逻辑回归from sklearn.linear_model import LogisticRegressionmyarray = numpy.array([[1, 2, 3], [4, 5, 6]])rownames = ['a', 'b']colnames = ['one', 'two', 'three']mydataframe = pandas.DataFrame(myarray, index=rownames, columns=colnames)print(mydataframe)
one two threea 1 2 3b 4 5 6
从csv加载数据 输出的为行列
import pandasurl = "/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']data = pandas.read_csv(url, names=names)# 行列print(data.shape)
csv数据转换 用描述性统计理解数据
data = pandas.read_csv(url, names=names)description = data.describe()print(description)
绘图 用可视化理解数据
data = pandas.read_csv(url, names=names)scatter_matrix(data)plt.show()
为预处理数据建模做准备
dataframe = pandas.read_csv(url, names=names)array = dataframe.values# 数组分成输入和输出组件X = array[:,0:8]Y = array[:,8]scaler = StandardScaler().fit(X)# 通过定心和定标来实现标准化rescaledX = scaler.transform(X)# 汇总转换后的数据# 设置打印选项numpy.set_printoptions(precision=3)print(rescaledX[0:5,:])
\
想获取python学习资料的小伙伴可以加QQ:728711576