[docs]defmain(self,cluster_res_key:str,pca_res_key:Optional[str]='pca',use_raw:Optional[bool]=False,cor_method:str='pearson',linkage_method:str='complete',optimal_ordering:bool=False,res_key:str='dendrogram',):""" Computes a hierarchical clustering for the given `cluster_res_key` categories. .. note:: The computation of the hierarchical clustering is based on predefined groups and not per cell. The correlation matrix is computed using by default pearson but other methods are available. :param cluster_res_key: a key or a list of keys which specify the cluster result in data.tl.result. :param pca_res_key: a key which specify the pca result in data.tl.result, if None, using exp_matrix instead. :param use_raw: whether to use raw exp_matrix, defaults to False. :param cor_method: correlation method to use, options are 'pearson', 'kendall', and 'spearman'. :param linkage_method: linkage method to use. See :func:`scipy.cluster.hierarchy.linkage` for more information. :param optimal_ordering: Same as the optimal_ordering argument of :func:`scipy.cluster.hierarchy.linkage` which reorders the linkage matrix so that the distance between successive leaves is minimal. :param res_key: a key to store dendrogram result in data.tl.result. """ifisinstance(cluster_res_key,str):# if not a list, turn into a listcluster_res_key=[cluster_res_key]cluster_res_list=[]forkeyincluster_res_key:ifkeynotinself.pipeline_res:raiseValueError(f"there is no cluster result specified by key '{key}' in data.tl.result")ifkeyinself.stereo_exp_data.cells._obs:cluster_res_list.append(self.stereo_exp_data.cells._obs[key].astype('category'))else:cluster_res_list.append(self.pipeline_res[key]['group'].astype('category'))# if gene_names is None:# rep_df: pd.DataFrame = self._choose_representation(pca_res_key, use_raw)# else:# if gene_names == 'all':# gene_names = self.stereo_exp_data.genes.gene_name# elif isinstance(gene_names, str):# gene_names = [gene_names]# rep_df: pd.DataFrame = self._prepare_dataframe(gene_names, use_raw)rep_df:pd.DataFrame=self._choose_representation(pca_res_key,use_raw)categorical:pd.Series=cluster_res_list[0]iflen(cluster_res_list)>1:forgroupincluster_res_list[1:]:# create new category by merging the given groupby categoriescategorical=(categorical.astype(str)+"_"+group.astype(str)).astype('category')categorical.name="_".join(cluster_res_key)rep_df.set_index(categorical,inplace=True)categories=rep_df.index.categories# aggregate values within categories using 'mean'mean_df=rep_df.groupby(level=0).mean()importscipy.cluster.hierarchyasschfromscipy.spatialimportdistancecorr_matrix=mean_df.T.corr(method=cor_method)corr_condensed=distance.squareform(1-corr_matrix)z_var=sch.linkage(corr_condensed,method=linkage_method,optimal_ordering=optimal_ordering)dendro_info=sch.dendrogram(z_var,labels=list(categories),no_plot=True)dat=dict(linkage=z_var,cluster_res_key=cluster_res_key,pca_res_key=pca_res_key,cor_method=cor_method,linkage_method=linkage_method,categories_ordered=dendro_info['ivl'],categories_idx_ordered=dendro_info['leaves'],dendrogram_info=dendro_info,correlation_matrix=corr_matrix.values,)self.pipeline_res[res_key]=dat
def_choose_representation(self,pca_res_key,use_raw):ifpca_res_keyisnotNone:ifpca_res_keynotinself.pipeline_res:raiseValueError(f"Can not get PCA result from data.tl.result by key '{pca_res_key}'.")returnself.pipeline_res[pca_res_key].copy()else:exp_matrix=self.stereo_exp_data.exp_matrixifnotuse_rawelseself.stereo_exp_data.raw.exp_matrixreturnpd.DataFrame(exp_matrix.toarray())def_prepare_dataframe(self,gene_names,use_raw):all_gene_names=pd.Index(self.stereo_exp_data.genes.gene_name)gene_idx=all_gene_names.get_indexer(gene_names)ifuse_rawandself.stereo_exp_data.rawisNone:logger.warning("there is no raw data, using current data instead.")use_raw=Falsesub_exp_matrix=self.stereo_exp_data.exp_matrix[:,gene_idx]ifnotuse_rawelse \
self.stereo_exp_data.raw.exp_matrix[:,gene_idx]sub_exp_matrix_df=pd.DataFrame(sub_exp_matrix.toarray(),columns=gene_names)returnsub_exp_matrix_df