酷不酷炫!想不想学!带统计学的PCoA完美解决打样本量多组数据不好区分的问题!!-腾讯云开发者社区-腾讯云
import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec from skbio import DistanceMatrix from skbio.stats.ordination import pcoa from skbio.stats.distance import permanova from scipy.stats import mannwhitneyu import seaborn as sns import pandas as pd from scipy.spatial.distance import squareform from matplotlib.colors import ListedColormap from sklearn.linear_model import LinearRegression from matplotlib.colors import Normalize from matplotlib.cm import ScalarMappable data = pd.read_csv('all.otus.abu.txt', delimiter='\t', header=0, index_col=0) group_data = pd.read_csv('分组.txt', delimiter='\t', index_col=0) array_data = data.values.T # 计算样本间的 Bray-Curtis 距离矩阵 distance_matrix_data = np.zeros((len(array_data), len(array_data))) for i in range(len(array_data)): for j in range(len(array_data)): distance_matrix_data[i, j] = np.abs(array_data[i] - array_data[j]).sum() / np.abs(array_data[i] + array_data[j]).sum() # 将距离矩阵转换为DistanceMatrix对象 distance_matrix = DistanceMatrix(distance_matrix_data) # PCoA分析 pcoa_results = pcoa(distance_matrix) pc1, pc2 = pcoa_results.samples['PC1'], pcoa_results.samples['PC2'] # 分组信息 grouping = group_data.values.T[0] groups = list(np.unique(grouping)) values =[groups.index(name) for name in grouping] dic_pc1={} for g in groups: dic_pc1[g]=[] for i in range(len(pc1)): dic_pc1[grouping[i]].append(pc1[i]) dic_pc2={} for g in groups: dic_pc2[g]=[] for i in range(len(pc2)): dic_pc2[grouping[i]].append(pc2[i]) # PERMANOVA结果 perm_results = permanova(distance_matrix, grouping) # 计算自由度 df = len(pc1) - 2 # 创建线性回归模型并进行拟合,计算 R² model = LinearRegression().fit(np.array(pc1).reshape(-1, 1), pc2) r2 = model.score(np.array(pc1).reshape(-1, 1), pc2) # 设置图形布局 fig = plt.figure(figsize=(10, 10)) gs = GridSpec(4, 4, figure=fig) # 主散点图 main_ax = fig.add_subplot(gs[1:4, 0:3]) main_ax.set_xlabel('PC1') main_ax.set_ylabel('PC2') scatter = main_ax.scatter(pc1, pc2, c=values, cmap='jet') main_ax.legend(handles=scatter.legend_elements()[0], labels=groups, title="Group", loc='lower right') result_text = f"PERMANOVA\n df={df}\nR² = {r2:.4f}\np-value = {perm_results['p-value']:.4f}" ax_res = fig.add_subplot(gs[0, 3], sharey=main_ax) ax_res.text(0.95, 0.95, result_text, transform=ax_res.transAxes, ha='right', va='top') ax_res.xaxis.set_visible(False) ax_res.yaxis.set_visible(False) cmap = plt.get_cmap('jet') colors = cmap(np.linspace(0, 1, len(groups))) color_dict = {label: color for label, color in zip(groups, colors)} # 上侧箱型图 top_ax = fig.add_subplot(gs[0, :-1], sharex=main_ax) df_pc1 = pd.DataFrame.from_dict(dic_pc1, orient='index').T sns.boxplot(data=df_pc1, ax=top_ax, orient='h', palette=color_dict) top_ax.set(xlabel='') # 右侧箱型图 right_ax = fig.add_subplot(gs[1:, -1], sharey=main_ax) df_pc2= pd.DataFrame.from_dict(dic_pc2, orient='index').T sns.boxplot(data=df_pc2, ax=right_ax, orient='v', palette=color_dict) plt.setp(right_ax.get_xticklabels(), rotation=45) right_ax.set(ylabel='') plt.tight_layout() plt.show()
还没有评论,来说两句吧...