200字范文,内容丰富有趣,生活中的好帮手!
200字范文 > 【python】matplotlib 绘制火山图 条形图 直方图

【python】matplotlib 绘制火山图 条形图 直方图

时间:2019-03-20 12:30:36

相关推荐

【python】matplotlib 绘制火山图 条形图 直方图

文章目录

火山图条形图直方图

火山图

绘制火山图,输入是两个datafreme,行是样本名,列是基因名。使用T-test检验绘制基因表达情况。

def minmax_scale(data):import numpy as np# # 示例数据# data = np.array([2, 4, 6, 8, 10])# 进行Min-Max标准化min_val = np.min(data)max_val = np.max(data)scaled_data = (data - min_val) / (max_val - min_val)# 输出标准化后的数据# print("标准化后的数据:", scaled_data)return scaled_data# t-test检验 表达量上调还是下调def gene_exp(group1, group2, is_scale=False):from scipy import statsimport numpy as np# # 两组样本数据# group1 = np.array([1.2, 1.5, 1.8, 2.1, 2.4])# group2 = np.array([0.9, 1.3, 1.6, 2.3, 2.7, 3.0])if is_scale:group1 = minmax_scale(group1)group2 = minmax_scale(group2)# 执行差异表达分析(两组独立样本的t检验)t_statistic, p_value = stats.ttest_ind(group1, group2, equal_var=False)# 设置显著性水平(例如0.05)alpha = 0.05# # 判断基因的上调或下调情况# fold_change = np.mean(group2) / np.mean(group1)# if fold_change > 1 + alpha:## print("该基因上调")#return 'Up', fold_change, p_value# elif fold_change < 1 - alpha:## print("该基因下调")#return 'Down', fold_change, p_value# else:## print("该基因未表现出显著的上调或下调")#return 'Unknown', fold_change, p_value# 计算基因的折叠变化(fold change)fold_change = np.log2(np.mean(group2) / np.mean(group1))# 判断基因的上调或下调情况if fold_change > 0 and p_value < alpha:# regulation = 'Upregulated'return 'Up', fold_change, p_valueelif fold_change < 0 and p_value < alpha:# regulation = 'Downregulated'return 'Down', fold_change, p_valueelse:# regulation = 'No significant change'return 'Unknown', fold_change, p_value# 生成需要的颜色listdef clst(lst):tlst = []for i in lst:if i == 'Up':tlst.append('r')elif i == 'Down':tlst.append('g')else:tlst.append('k')return tlstdef data_exp(df1, df2, is_scale=False):"""df1: 数据1df2: 数据2数据1和数据2 dataframe格式,行是样本名(可以是不同样本数),列是基因名(相同的列)"""from collections import OrderedDictcols = df1.columnsrmk_lst = []fc_lst = []pv_lst = []# exp_dic = OrderedDict()for k in cols:rmk, fc, pv = gene_exp(list(df1[k]), list(df2[k]), is_scale=is_scale)# 可能出现nan和infif np.isnan(fc) or np.isnan(pv) or np.isinf(fc) or np.isinf(pv):print(':'.join(k)+'\t'+rmk+'\t'+str(fc)+'\t'+str(pv))continuermk_lst.append(rmk)fc_lst.append(fc)pv_lst.append(pv)# exp_dic[k] = [rmk, pv]print(':'.join(k)+'\t'+rmk+'\t'+str(fc)+'\t'+str(pv))# return exp_dicreturn rmk_lst, fc_lst, pv_lstdef plot_volcano(fold_change, p_value, title, pltcolor=False, regulation=[]):# 绘制火山图fig, ax = plt.subplots()# 计算-log10(p-value)# neg_log_pval = -np.log10(p_value) # 计算-log10(p-value),避免出现负数问题neg_log_pval = -np.log10(np.maximum(p_value, np.finfo(float).eps))# 绘制散点图if pltcolor:# 根据上调/下调与否设置点的颜色# colors = np.where(regulation=='Up', 'r', np.where(regulation=='Down', 'g', 'k'))colors = clst(regulation)ax.scatter(fold_change, neg_log_pval, c=colors)else:ax.scatter(fold_change, neg_log_pval) # , c=colors)# 设置坐标轴标签和标题ax.set_xlabel('Fold Change (log2)')ax.set_ylabel('-log10(p-value)')ax.set_title('Volcano Plot[%s]' % title)# 绘制水平线(显著性水平)ax.axhline(-np.log10(alpha), color='gray', linestyle='--')# 显示图形plt.show()

使用方法:

# 获取表达上下调结果、foldchange、pvalue:mklst, fclst, pvlst = data_exp(data_df1, data_df2, is_scale=True)# 绘制火山图plot_volcano(list(fclst), list(pvlst), title="mytitle", pltcolor=True, regulation=rmklst)

条形图

代码问题:左右标签没有对齐

import pandas as pdimport matplotlib.pyplot as plt# 创建示例的 DataFrame 1data1 = {'城市': ['北京', '上海', '广州', '深圳', '成都'],'人口': [2154, 2423, 1404, 1303, 1682]}df1 = pd.DataFrame(data1)# 创建示例的 DataFrame 2data2 = {'城市': ['纽约', '伦敦', '巴黎', '东京', '新加坡'],'人口': [8537, 9304, 2141, 13929, 5894]}df2 = pd.DataFrame(data2)# 计算辅助变量n = len(df1) # 数据长度width = 0.35 # 每个条形图的宽度# 创建画布和主轴fig, ax = plt.subplots(figsize=(10, 6))# 绘制左侧的条形图ax.barh(df1.index, df1['人口'], height=width, color='blue', label='DataFrame 1')# 绘制右侧的条形图ax.barh(df2.index, -df2['人口'], height=width, color='red', label='DataFrame 2')# 设置纵坐标标签和刻度ax.set_yticks(df1.index)ax.set_yticklabels(df1['城市'])# 设置右侧纵坐标标签和刻度,并逆序显示ax2 = ax.twinx()ax2.set_yticks(df1.index)ax2.set_yticklabels(df2['城市'][::-1]) # 逆序显示右侧坐标标签# 调整左右坐标轴的位置对齐ax.tick_params(axis="y", direction="inout", length=6, pad=10) # 左侧标签ax2.tick_params(axis="y", direction="inout", length=6, pad=10) # 右侧标签# 显示图例ax.legend()# 显示图表plt.show()

直方图

自定义划分的bin区间

import numpy as npimport matplotlib.pyplot as pltdef histplot(df0, df1, label, xmax=None):# 手动指定区间bins = np.arange(0, 1.1, 0.1) # 根据需要自定义区间names = df0.columnsfig, axs = plt.subplots(nrows=8, ncols=5, figsize=(12, 20), sharex=True, sharey=True)for i, idx_name in enumerate(names):cx = i // 5rx = i % 5# 计算频次划binhist, edges_0 = np.histogram(df0[idx_name], bins=bins)hist, edges_1 = np.histogram(df1[idx_name], bins=bins)axs[cx, rx].hist(df0[idx_name], bins=edges_0, label=label+'-0')axs[cx, rx].hist(df1[idx_name], bins=edges_1, alpha=0.5, label=label+'-1')axs[cx, rx].set_title(idx_name, fontsize=8)axs[cx, rx].grid(axis="y")axs[cx, rx].legend()if xmax is not None:axs[cx, rx].set_xlim(0, xmax) # (0, 0.1) # (0, 0.5)plt.show()histplot(data_df0, data_df1, 'label')def bins_lst(lst1, lst2, n=10):import numpy as npmlst = list(lst1) + list(lst2)minl = round(min(mlst), 4)maxl = round(max(mlst), 4)# print('minmax:', minl, maxl)step = round(float(maxl - minl) / n, 3)bins = np.arange(minl, maxl+step, step)return binsdef histplot_bin(hl_df, pc_df, label, nrows=2, hsize=6, nbin=10, xmax=None):names = hl_df.columnsfig, axs = plt.subplots(nrows=nrows, ncols=5, figsize=(9, hsize)) # , sharex=True) # , sharey=True)for i, idx_name in enumerate(names):cx = i // 5rx = i % 5bins = bins_lst(hl_df[idx_name], pc_df[idx_name], n=nbin)# 计算频次划binhist, edges_hl = np.histogram(hl_df[idx_name], bins=bins)hist, edges_pc = np.histogram(pc_df[idx_name], bins=bins)axs[cx, rx].hist(hl_df[idx_name], bins=edges_hl, label=label+'-0')axs[cx, rx].hist(pc_df[idx_name], bins=edges_pc, alpha=0.5, label=label+'-1')axs[cx, rx].set_title(idx_name, fontsize=8)axs[cx, rx].grid(axis="y")if xmax is not None:axs[cx, rx].set_xlim(0, xmax) # (0, 0.1) # (0, 0.5)axs[cx, rx].legend()plt.show()# 使用histplot2(df1[features], df2[features], label="label", nrows=3, hsize=8)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。