Skip to content

Api mofa

omicverse.single.pyMOFA

Bases: object

MOFA class.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class pyMOFA(object):
    r"""
    MOFA class.
    """
    def __init__(self,omics:list,omics_name:list):
        r"""
        Initialize the MOFA class.

        Arguments:
            omics: The list of omics data.
            omics_name: The list of omics name.
        """
        self.omics=omics 
        self.omics_name=omics_name
        self.M=len(omics)

    def mofa_preprocess(self):
        r"""
        Preprocess the data.
        """
        self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
        self.feature_name=[]
        for m in range(self.M):
            if issparse(self.omics[m].X)==True:
                self.data_mat[m][0]=self.omics[m].X.toarray()
            else:
                self.data_mat[m][0]=self.omics[m].X
            self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])

    def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
                spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True, 
                verbose:bool = False, seed:int = 112,scale_groups:bool = False, 
                scale_views:bool = False,center_groups:bool=True,)->None:
        r"""
        Train the MOFA model.

        Arguments:
            outfile: The path of output file.
            factors: The number of factors.
            iter: The number of iterations.
            convergence_mode: The mode of convergence.
            spikeslab_weights: Whether to use spikeslab weights.
            startELBO: The start of ELBO.
            freqELBO: The frequency of ELBO.
            dropR2: The drop of R2.
            gpu_mode: Whether to use gpu mode.
            verbose: Whether to print the information.
            seed: The seed of random number.
            scale_groups: Whether to scale groups.
            scale_views: Whether to scale views.
            center_groups: Whether to center groups.

        """
        ent1 = entry_point()
        ent1.set_data_options(
            scale_groups = scale_groups, 
            scale_views = scale_views,
            center_groups=center_groups,
        )
        ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
            views_names=self.omics_name,
            samples_names=[self.omics[0].obs.index],
            features_names=self.feature_name)
        # set param
        ent1.set_model_options(
            factors = factors, 
            spikeslab_weights = spikeslab_weights, 
            ard_factors = True,
            ard_weights = True
        )
        ent1.set_train_options(
            iter = iter, 
            convergence_mode = convergence_mode, 
            startELBO = startELBO, 
            freqELBO = freqELBO, 
            dropR2 = dropR2, 
            gpu_mode = gpu_mode, 
            verbose = verbose, 
            seed = seed
        )
        # 
        ent1.build()
        ent1.run()
        ent1.save(outfile=outfile)

__init__(omics, omics_name)

Initialize the MOFA class.

Parameters:

Name Type Description Default
omics list

The list of omics data.

required
omics_name list

The list of omics name.

required
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,omics:list,omics_name:list):
    r"""
    Initialize the MOFA class.

    Arguments:
        omics: The list of omics data.
        omics_name: The list of omics name.
    """
    self.omics=omics 
    self.omics_name=omics_name
    self.M=len(omics)

mofa_preprocess()

Preprocess the data.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def mofa_preprocess(self):
    r"""
    Preprocess the data.
    """
    self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
    self.feature_name=[]
    for m in range(self.M):
        if issparse(self.omics[m].X)==True:
            self.data_mat[m][0]=self.omics[m].X.toarray()
        else:
            self.data_mat[m][0]=self.omics[m].X
        self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])

mofa_run(outfile='res.hdf5', factors=20, iter=1000, convergence_mode='fast', spikeslab_weights=True, startELBO=1, freqELBO=1, dropR2=0.001, gpu_mode=True, verbose=False, seed=112, scale_groups=False, scale_views=False, center_groups=True)

Train the MOFA model.

Parameters:

Name Type Description Default
outfile str

The path of output file.

'res.hdf5'
factors int

The number of factors.

20
iter int

The number of iterations.

1000
convergence_mode str

The mode of convergence.

'fast'
spikeslab_weights bool

Whether to use spikeslab weights.

True
startELBO int

The start of ELBO.

1
freqELBO int

The frequency of ELBO.

1
dropR2 float

The drop of R2.

0.001
gpu_mode bool

Whether to use gpu mode.

True
verbose bool

Whether to print the information.

False
seed int

The seed of random number.

112
scale_groups bool

Whether to scale groups.

False
scale_views bool

Whether to scale views.

False
center_groups bool

Whether to center groups.

True
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
            spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True, 
            verbose:bool = False, seed:int = 112,scale_groups:bool = False, 
            scale_views:bool = False,center_groups:bool=True,)->None:
    r"""
    Train the MOFA model.

    Arguments:
        outfile: The path of output file.
        factors: The number of factors.
        iter: The number of iterations.
        convergence_mode: The mode of convergence.
        spikeslab_weights: Whether to use spikeslab weights.
        startELBO: The start of ELBO.
        freqELBO: The frequency of ELBO.
        dropR2: The drop of R2.
        gpu_mode: Whether to use gpu mode.
        verbose: Whether to print the information.
        seed: The seed of random number.
        scale_groups: Whether to scale groups.
        scale_views: Whether to scale views.
        center_groups: Whether to center groups.

    """
    ent1 = entry_point()
    ent1.set_data_options(
        scale_groups = scale_groups, 
        scale_views = scale_views,
        center_groups=center_groups,
    )
    ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
        views_names=self.omics_name,
        samples_names=[self.omics[0].obs.index],
        features_names=self.feature_name)
    # set param
    ent1.set_model_options(
        factors = factors, 
        spikeslab_weights = spikeslab_weights, 
        ard_factors = True,
        ard_weights = True
    )
    ent1.set_train_options(
        iter = iter, 
        convergence_mode = convergence_mode, 
        startELBO = startELBO, 
        freqELBO = freqELBO, 
        dropR2 = dropR2, 
        gpu_mode = gpu_mode, 
        verbose = verbose, 
        seed = seed
    )
    # 
    ent1.build()
    ent1.run()
    ent1.save(outfile=outfile)

omicverse.single.pyMOFAART

Bases: object

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class pyMOFAART(object):

    def __init__(self,model_path:str):
        """
        Initialize the MOFAART class.

        Arguments:
            model_path: The path of MOFA model.
        """
        check_mofax()
        global mofax_install
        if mofax_install==True:
            global_imports("mofax","mfx")

        self.model_path=model_path
        mfx_model=mfx.mofa_model(model_path)
        self.factors=mfx_model.get_factors()
        plot_data=pd.DataFrame()
        for i in mfx_model.get_r2()['View'].unique():
            plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
        self.r2=plot_data
        mfx_model.close()


    def get_factors(self,adata:anndata.AnnData):
        """
        Get the factors of MOFA to anndata object.

        Arguments:
            adata: The anndata object.

        """
        print('......Add factors to adata and store to adata.obsm["X_mofa"]')
        adata.obsm['X_mofa']=self.factors
        adata=factor_exact(adata,hdf5_path=self.model_path)

    def get_r2(self,)->pd.DataFrame:
        """
        Get the varience of each factor

        Returns:
            r2: the varience of each factor
        """

        return self.r2

    def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
                ticks_fontsize:int=10,labels_fontsize:int=12,
                save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        plot the varience of each factor.

        Arguments:
            figsize: The size of figure.
            cmap: The color map.
            ticks_fontsize: The size of ticks.
            labels_fontsize: The size of labels.
            save: Whether to save the figure.

        Returns:
            fig: The figure of varience.
            ax: The axes of varience.

        """
        fig, ax = plt.subplots(figsize=figsize)
        sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
                    cbar_kws={'shrink':0.5})
        plt.xticks(fontsize=ticks_fontsize)
        plt.yticks(fontsize=ticks_fontsize)
        plt.ylabel('Factor',fontsize=labels_fontsize)
        plt.xlabel('View',fontsize=labels_fontsize)
        plt.title('Varience',fontsize=labels_fontsize)
        if save:
            fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
        return fig,ax

    def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
        """
        get the correlation of each factor with cluster type in anndata object.

        Arguments:
            adata: The anndata object.
            cluster: The cluster type.
            factor_list: The list of factors.

        Returns:
            plot_data1: The correlation of each factor with cluster type.

        """

        if factor_list==None:
            factor_list=[i+1 for i in range(self.r2.shape[0])]
        plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
        return plot_data1

    def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
                 cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
                 save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        Plot the correlation of each factor with cluster type in anndata object.

        Arguments:
            adata: The anndata object in MOFA pre trained.
            cluster: The cluster type in adata.obs.
            factor_list: The list of factors.
            figsize: The size of figure.
            cmap: The color map.
            ticks_fontsize: The font size of ticks.
            labels_fontsize: The font size of labels.
            title: The title of figure.
            save: Whether to save the figure.

        Returns:
            fig: The figure of correlation.
            ax: The axes of correlation.

        """

        if factor_list==None:
            factor_list=[i+1 for i in range(self.r2.shape[0])]
        plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
        fig, ax = plt.subplots(figsize=figsize)
        sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
                    cbar_kws={'shrink':0.5})
        plt.xticks(fontsize=ticks_fontsize)
        plt.yticks(fontsize=ticks_fontsize)
        plt.xlabel('Factor',fontsize=labels_fontsize)
        plt.ylabel(cluster,fontsize=labels_fontsize)
        plt.title(title,fontsize=labels_fontsize)
        if save:
            fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
        return fig,ax

    def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
                    factor1:int=1,factor2:int=2,palette:list=None,
                    save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        Plot the factor of MOFA in anndata object.

        Arguments:
            adata: The anndata object.
            cluster: The cluster type in adata.obs.
            title: The title of figure.
            figsize: The size of figure.
            factor1: The first factor.
            factor2: The second factor.
            palette: The color map.
            save: Whether to save the figure.

        Returns:
            fig: The figure of factor.
            ax: The axes of factor.

        """

        if 'X_mofa' not in adata.obsm.keys():
            self.get_factors(adata)
        if palette==None:
            palette=pyomic_palette()
        fig, ax = plt.subplots(figsize=figsize)
        #factor1,factor2=4,6
        sc.pl.embedding(
            adata=adata,
            basis='X_mofa',
            color=cluster,
            title=title,
            components="{},{}".format(factor1,factor2),
            palette=palette,
            ncols=1,
            ax=ax
        )
        if save:
            fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')

        return fig,ax

    def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
                            colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
                            ticks_fontsize:int=12,labels_fontsize:int=12,
                            weith_threshold:float=0.5,figsize:tuple=(3,3),
                            save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.

        Arguments:
            view: The view of MOFA.
            factor1: The first factor.
            factor2: The second factor.
            colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
            plot_gene_num: The number of genes to plot.
            title: The title of figure.
            title_fontsize: The font size of title.
            ticks_fontsize: The font size of ticks.
            labels_fontsize: The font size of labels.
            weith_threshold: The threshold of weight.
            figsize: The size of figure.
            save: Whether to save the figure.

        Returns:
            fig: The figure of weight.
            ax: The axes of weight.

        """
        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        #factor1,factor2=6,4
        plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
        plot_data3['sig']='normal'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'

        if colors_dict==None:
            colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
        fig, ax = plt.subplots(figsize=figsize)
        ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
                color=colors_dict['normal'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
                color=colors_dict['up'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
                color=colors_dict['down'],alpha=0.5)

        plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
        plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')

        plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
        plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')

        ax.spines['top'].set_visible(True)
        ax.spines['right'].set_visible(True)
        ax.spines['bottom'].set_visible(True)
        ax.spines['left'].set_visible(True)

        plt.grid(False)

        plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
        plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)

        plt.xticks(fontsize=ticks_fontsize)
        plt.yticks(fontsize=ticks_fontsize)

        from adjustText import adjust_text

        for sig,color in zip(['up','down'],
                            [colors_dict['up'],colors_dict['down']]):
            if 'up' in sig:
                hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
            else:
                hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
            if len(hub_gene)==0:
                continue
            texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
                        plot_data3.loc[i,'factor_{}'.format(factor2)],
                        i,
                        fontdict={'size':10,'weight':'bold','color':'black'}
                        ) for i in hub_gene[:plot_gene_num]]

            adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)

        plt.title(title,fontsize=title_fontsize)
        if save:
            fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
        return fig,ax

    def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
                            colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
                            ticks_fontsize:int=12,labels_fontsize:int=12,
                            weith_threshold:float=0.5,figsize:tuple=(3,3),
                            save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.

        Arguments:
            view: The view of MOFA.
            factor1: The first factor.
            factor2: The second factor.
            colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
            plot_gene_num: The number of genes to plot.
            title: The title of figure.
            title_fontsize: The font size of title.
            ticks_fontsize: The font size of ticks.
            labels_fontsize: The font size of labels.
            weith_threshold: The threshold of weight.
            figsize: The size of figure.
            save: Whether to save the figure.

        Returns:
            fig: The figure of weight.
            ax: The axes of weight.

        """

        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        #factor1,factor2=6,4
        plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
        plot_data3['sig']='normal'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
        plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'


        if colors_dict==None:
            colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
        fig, ax = plt.subplots(figsize=figsize)
        ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
           plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
          color=colors_dict['normal'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
                color=colors_dict['up-up'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
                color=colors_dict['up-down'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
                color=colors_dict['down-up'],alpha=0.5)

        ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
                plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
                color=colors_dict['down-down'],alpha=0.5)

        plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
        plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')

        plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
        plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')

        ax.spines['top'].set_visible(True)
        ax.spines['right'].set_visible(True)
        ax.spines['bottom'].set_visible(True)
        ax.spines['left'].set_visible(True)

        plt.grid(False)

        plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
        plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)

        plt.xticks(fontsize=ticks_fontsize)
        plt.yticks(fontsize=ticks_fontsize)

        from adjustText import adjust_text

        for sig,color in zip(['up-up','up-down','down-up','down-down'],
                     [colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
            hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
            if len(hub_gene)==0:
                continue
            texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
                        plot_data3.loc[i,'factor_{}'.format(factor2)],
                        i,
                        fontdict={'size':10,'weight':'bold','color':'black'}
                        ) for i in hub_gene[:plot_gene_num]]

            adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)

        plt.title(title,fontsize=title_fontsize)
        if save:
            fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
        return fig,ax

    def plot_weights(self,view:str,factor:int,color:str='#a51616',figsize:tuple=(3,4),
                     plot_gene_num:int=10,ascending:bool=False,
                    labels_fontsize:int=12,ticks_fontsize:int=12,title_fontsize:int=12,
                     title=None,save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        """
        Plot the weights of each gene in the factor

        Arguments:
            view: str, the view of the factor
            factor: int, the factor number
            color: str, the color of the plot
            figsize: tuple, the size of the figure
            plot_gene_num: int, the number of genes to plot
            ascending: bool, whether to sort the genes by weights
            labels_fontsize: int, the fontsize of the labels
            ticks_fontsize: int, the fontsize of the ticks
            title_fontsize: int, the fontsize of the title
            title: str, the title of the plot
            save: bool, whether to save the plot

        Returns:
            fig: the figure of the plot
            ax: the axis of the plot

        """

        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        fig, ax = plt.subplots(figsize=figsize)
        plot_data4=pd.DataFrame()
        plot_data4['weight']=factor_w['factor_{}'.format(factor)].sort_values(ascending=ascending)
        plot_data4['rank']=range(len(plot_data4['weight']))
        plt.plot(plot_data4['rank'],plot_data4['weight'],color=color)

        hub_gene=plot_data4.index[:plot_gene_num]
        plt.scatter(plot_data4.loc[hub_gene,'rank'],
                plot_data4.loc[hub_gene,'weight'],color=color,
                    alpha=0.5)

        from adjustText import adjust_text
        texts=[ax.text(plot_data4.loc[i,'rank'],
                        plot_data4.loc[i,'weight'],
                        i,
                        fontdict={'size':10,'weight':'normal','color':'black'}
                        ) for i in hub_gene]

        adjust_text(texts,only_move={'text': 'xy'},
                    arrowprops=dict(arrowstyle='->', color='grey'),)

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(True)
        ax.spines['left'].set_visible(True)
        if title is None:
            plt.title('factor_{}'.format(factor),fontsize=title_fontsize,)
        else:
            plt.title(title,fontsize=title_fontsize,)
        plt.xticks(fontsize=ticks_fontsize)
        plt.yticks(fontsize=ticks_fontsize)
        plt.xlabel('Feature rank',fontsize=labels_fontsize)
        plt.ylabel('Weight',fontsize=labels_fontsize)

        plt.grid(False)
        if save:
            fig.savefig("factor{}_gene.png".format(factor),dpi=300,bbox_inches = 'tight')
        return fig,ax

    def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
        """
        Plot the top features of each factor in dotplot

        Arguments:
            view: str, the view of the factor
            cmap: str, the color map of the plot
            n_genes: int, the number of genes to plot

        Returns:
            axes: the list of the figure

        """

        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
        adata1.obs['Factor']=adata1.obs.index
        adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
        sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
        ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes, 
                                        cmap=cmap,show=False)
        return ax

    def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
        """
        Plot the top features of each factor in dotplot

        Arguments:
            view: str, the view of the factor
            cmap: str, the color map of the plot
            n_genes: int, the number of genes to plot

        Returns:
            axes: the list of the figure

        """

        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
        adata1.obs['Factor']=adata1.obs.index
        adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
        sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
        ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes, 
                                        cmap=cmap,show=False)
        return ax

    def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
        """
        Get the top features of each factor

        Arguments:
            view: str, the view of the factor
            log2fc_min: float, the minimum log2fc of the feature
            pval_cutoff: float, the maximum pval of the feature

        Returns:
            top_feature: dict, the top features of each factor

        """


        factor_w=pd.DataFrame()
        for i in range(self.factors.shape[1]):
            f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
            f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
            factor_w['factor_{}'.format(i+1)]=f1_w['weights']
        factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

        adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
        adata1.obs['Factor']=adata1.obs.index
        adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
        top_feature=get_celltype_marker(adata1,clustertype='Factor',
                            log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
        return top_feature

__init__(model_path)

Initialize the MOFAART class.

Parameters:

Name Type Description Default
model_path str

The path of MOFA model.

required
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,model_path:str):
    """
    Initialize the MOFAART class.

    Arguments:
        model_path: The path of MOFA model.
    """
    check_mofax()
    global mofax_install
    if mofax_install==True:
        global_imports("mofax","mfx")

    self.model_path=model_path
    mfx_model=mfx.mofa_model(model_path)
    self.factors=mfx_model.get_factors()
    plot_data=pd.DataFrame()
    for i in mfx_model.get_r2()['View'].unique():
        plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
    self.r2=plot_data
    mfx_model.close()

get_factors(adata)

Get the factors of MOFA to anndata object.

Parameters:

Name Type Description Default
adata anndata.AnnData

The anndata object.

required
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_factors(self,adata:anndata.AnnData):
    """
    Get the factors of MOFA to anndata object.

    Arguments:
        adata: The anndata object.

    """
    print('......Add factors to adata and store to adata.obsm["X_mofa"]')
    adata.obsm['X_mofa']=self.factors
    adata=factor_exact(adata,hdf5_path=self.model_path)

get_r2()

Get the varience of each factor

Returns:

Name Type Description
r2 pd.DataFrame

the varience of each factor

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_r2(self,)->pd.DataFrame:
    """
    Get the varience of each factor

    Returns:
        r2: the varience of each factor
    """

    return self.r2

plot_r2(figsize=(2, 3), cmap='Greens', ticks_fontsize=10, labels_fontsize=12, save=False)

plot the varience of each factor.

Parameters:

Name Type Description Default
figsize tuple

The size of figure.

(2, 3)
cmap str

The color map.

'Greens'
ticks_fontsize int

The size of ticks.

10
labels_fontsize int

The size of labels.

12
save bool

Whether to save the figure.

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of varience.

ax matplotlib.axes._axes.Axes

The axes of varience.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
            ticks_fontsize:int=10,labels_fontsize:int=12,
            save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    plot the varience of each factor.

    Arguments:
        figsize: The size of figure.
        cmap: The color map.
        ticks_fontsize: The size of ticks.
        labels_fontsize: The size of labels.
        save: Whether to save the figure.

    Returns:
        fig: The figure of varience.
        ax: The axes of varience.

    """
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
                cbar_kws={'shrink':0.5})
    plt.xticks(fontsize=ticks_fontsize)
    plt.yticks(fontsize=ticks_fontsize)
    plt.ylabel('Factor',fontsize=labels_fontsize)
    plt.xlabel('View',fontsize=labels_fontsize)
    plt.title('Varience',fontsize=labels_fontsize)
    if save:
        fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
    return fig,ax

get_cor(adata, cluster, factor_list=None)

get the correlation of each factor with cluster type in anndata object.

Parameters:

Name Type Description Default
adata anndata.AnnData

The anndata object.

required
cluster str

The cluster type.

required
factor_list

The list of factors.

None

Returns:

Name Type Description
plot_data1 pd.DataFrame

The correlation of each factor with cluster type.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
    """
    get the correlation of each factor with cluster type in anndata object.

    Arguments:
        adata: The anndata object.
        cluster: The cluster type.
        factor_list: The list of factors.

    Returns:
        plot_data1: The correlation of each factor with cluster type.

    """

    if factor_list==None:
        factor_list=[i+1 for i in range(self.r2.shape[0])]
    plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
    return plot_data1

plot_cor(adata, cluster, factor_list=None, figsize=(6, 3), cmap='Purples', ticks_fontsize=10, labels_fontsize=12, title='Correlation', save=False)

Plot the correlation of each factor with cluster type in anndata object.

Parameters:

Name Type Description Default
adata anndata.AnnData

The anndata object in MOFA pre trained.

required
cluster str

The cluster type in adata.obs.

required
factor_list

The list of factors.

None
figsize tuple

The size of figure.

(6, 3)
cmap str

The color map.

'Purples'
ticks_fontsize int

The font size of ticks.

10
labels_fontsize int

The font size of labels.

12
title str

The title of figure.

'Correlation'
save bool

Whether to save the figure.

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of correlation.

ax matplotlib.axes._axes.Axes

The axes of correlation.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
             cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
             save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    Plot the correlation of each factor with cluster type in anndata object.

    Arguments:
        adata: The anndata object in MOFA pre trained.
        cluster: The cluster type in adata.obs.
        factor_list: The list of factors.
        figsize: The size of figure.
        cmap: The color map.
        ticks_fontsize: The font size of ticks.
        labels_fontsize: The font size of labels.
        title: The title of figure.
        save: Whether to save the figure.

    Returns:
        fig: The figure of correlation.
        ax: The axes of correlation.

    """

    if factor_list==None:
        factor_list=[i+1 for i in range(self.r2.shape[0])]
    plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
                cbar_kws={'shrink':0.5})
    plt.xticks(fontsize=ticks_fontsize)
    plt.yticks(fontsize=ticks_fontsize)
    plt.xlabel('Factor',fontsize=labels_fontsize)
    plt.ylabel(cluster,fontsize=labels_fontsize)
    plt.title(title,fontsize=labels_fontsize)
    if save:
        fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
    return fig,ax

plot_factor(adata, cluster, title, figsize=(3, 3), factor1=1, factor2=2, palette=None, save=False)

Plot the factor of MOFA in anndata object.

Parameters:

Name Type Description Default
adata anndata.AnnData

The anndata object.

required
cluster str

The cluster type in adata.obs.

required
title str

The title of figure.

required
figsize tuple

The size of figure.

(3, 3)
factor1 int

The first factor.

1
factor2 int

The second factor.

2
palette list

The color map.

None
save bool

Whether to save the figure.

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of factor.

ax matplotlib.axes._axes.Axes

The axes of factor.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
                factor1:int=1,factor2:int=2,palette:list=None,
                save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    Plot the factor of MOFA in anndata object.

    Arguments:
        adata: The anndata object.
        cluster: The cluster type in adata.obs.
        title: The title of figure.
        figsize: The size of figure.
        factor1: The first factor.
        factor2: The second factor.
        palette: The color map.
        save: Whether to save the figure.

    Returns:
        fig: The figure of factor.
        ax: The axes of factor.

    """

    if 'X_mofa' not in adata.obsm.keys():
        self.get_factors(adata)
    if palette==None:
        palette=pyomic_palette()
    fig, ax = plt.subplots(figsize=figsize)
    #factor1,factor2=4,6
    sc.pl.embedding(
        adata=adata,
        basis='X_mofa',
        color=cluster,
        title=title,
        components="{},{}".format(factor1,factor2),
        palette=palette,
        ncols=1,
        ax=ax
    )
    if save:
        fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')

    return fig,ax

plot_weight_gene_d1(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)

Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.

Parameters:

Name Type Description Default
view str

The view of MOFA.

required
factor1 int

The first factor.

required
factor2 int

The second factor.

required
colors_dict dict

The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}

None
plot_gene_num int

The number of genes to plot.

5
title str

The title of figure.

''
title_fontsize int

The font size of title.

12
ticks_fontsize int

The font size of ticks.

12
labels_fontsize int

The font size of labels.

12
weith_threshold float

The threshold of weight.

0.5
figsize tuple

The size of figure.

(3, 3)
save bool

Whether to save the figure.

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of weight.

ax matplotlib.axes._axes.Axes

The axes of weight.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
                        colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
                        ticks_fontsize:int=12,labels_fontsize:int=12,
                        weith_threshold:float=0.5,figsize:tuple=(3,3),
                        save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.

    Arguments:
        view: The view of MOFA.
        factor1: The first factor.
        factor2: The second factor.
        colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
        plot_gene_num: The number of genes to plot.
        title: The title of figure.
        title_fontsize: The font size of title.
        ticks_fontsize: The font size of ticks.
        labels_fontsize: The font size of labels.
        weith_threshold: The threshold of weight.
        figsize: The size of figure.
        save: Whether to save the figure.

    Returns:
        fig: The figure of weight.
        ax: The axes of weight.

    """
    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    #factor1,factor2=6,4
    plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
    plot_data3['sig']='normal'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'

    if colors_dict==None:
        colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
    fig, ax = plt.subplots(figsize=figsize)
    ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
            color=colors_dict['normal'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
            color=colors_dict['up'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
            color=colors_dict['down'],alpha=0.5)

    plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
    plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')

    plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
    plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')

    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)

    plt.grid(False)

    plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
    plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)

    plt.xticks(fontsize=ticks_fontsize)
    plt.yticks(fontsize=ticks_fontsize)

    from adjustText import adjust_text

    for sig,color in zip(['up','down'],
                        [colors_dict['up'],colors_dict['down']]):
        if 'up' in sig:
            hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
        else:
            hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
        if len(hub_gene)==0:
            continue
        texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
                    plot_data3.loc[i,'factor_{}'.format(factor2)],
                    i,
                    fontdict={'size':10,'weight':'bold','color':'black'}
                    ) for i in hub_gene[:plot_gene_num]]

        adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)

    plt.title(title,fontsize=title_fontsize)
    if save:
        fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
    return fig,ax

plot_weight_gene_d2(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)

Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.

Parameters:

Name Type Description Default
view str

The view of MOFA.

required
factor1 int

The first factor.

required
factor2 int

The second factor.

required
colors_dict dict

The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}

None
plot_gene_num int

The number of genes to plot.

5
title str

The title of figure.

''
title_fontsize int

The font size of title.

12
ticks_fontsize int

The font size of ticks.

12
labels_fontsize int

The font size of labels.

12
weith_threshold float

The threshold of weight.

0.5
figsize tuple

The size of figure.

(3, 3)
save bool

Whether to save the figure.

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of weight.

ax matplotlib.axes._axes.Axes

The axes of weight.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
                        colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
                        ticks_fontsize:int=12,labels_fontsize:int=12,
                        weith_threshold:float=0.5,figsize:tuple=(3,3),
                        save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.

    Arguments:
        view: The view of MOFA.
        factor1: The first factor.
        factor2: The second factor.
        colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
        plot_gene_num: The number of genes to plot.
        title: The title of figure.
        title_fontsize: The font size of title.
        ticks_fontsize: The font size of ticks.
        labels_fontsize: The font size of labels.
        weith_threshold: The threshold of weight.
        figsize: The size of figure.
        save: Whether to save the figure.

    Returns:
        fig: The figure of weight.
        ax: The axes of weight.

    """

    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    #factor1,factor2=6,4
    plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
    plot_data3['sig']='normal'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
    plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'


    if colors_dict==None:
        colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
    fig, ax = plt.subplots(figsize=figsize)
    ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
       plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
      color=colors_dict['normal'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
            color=colors_dict['up-up'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
            color=colors_dict['up-down'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
            color=colors_dict['down-up'],alpha=0.5)

    ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
            plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
            color=colors_dict['down-down'],alpha=0.5)

    plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
    plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')

    plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
    plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')

    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)

    plt.grid(False)

    plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
    plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)

    plt.xticks(fontsize=ticks_fontsize)
    plt.yticks(fontsize=ticks_fontsize)

    from adjustText import adjust_text

    for sig,color in zip(['up-up','up-down','down-up','down-down'],
                 [colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
        hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
        if len(hub_gene)==0:
            continue
        texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
                    plot_data3.loc[i,'factor_{}'.format(factor2)],
                    i,
                    fontdict={'size':10,'weight':'bold','color':'black'}
                    ) for i in hub_gene[:plot_gene_num]]

        adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)

    plt.title(title,fontsize=title_fontsize)
    if save:
        fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
    return fig,ax

plot_weights(view, factor, color='#a51616', figsize=(3, 4), plot_gene_num=10, ascending=False, labels_fontsize=12, ticks_fontsize=12, title_fontsize=12, title=None, save=False)

Plot the weights of each gene in the factor

Parameters:

Name Type Description Default
view str

str, the view of the factor

required
factor int

int, the factor number

required
color str

str, the color of the plot

'#a51616'
figsize tuple

tuple, the size of the figure

(3, 4)
plot_gene_num int

int, the number of genes to plot

10
ascending bool

bool, whether to sort the genes by weights

False
labels_fontsize int

int, the fontsize of the labels

12
ticks_fontsize int

int, the fontsize of the ticks

12
title_fontsize int

int, the fontsize of the title

12
title

str, the title of the plot

None
save bool

bool, whether to save the plot

False

Returns:

Name Type Description
fig matplotlib.figure.Figure

the figure of the plot

ax matplotlib.axes._axes.Axes

the axis of the plot

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weights(self,view:str,factor:int,color:str='#a51616',figsize:tuple=(3,4),
                 plot_gene_num:int=10,ascending:bool=False,
                labels_fontsize:int=12,ticks_fontsize:int=12,title_fontsize:int=12,
                 title=None,save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    """
    Plot the weights of each gene in the factor

    Arguments:
        view: str, the view of the factor
        factor: int, the factor number
        color: str, the color of the plot
        figsize: tuple, the size of the figure
        plot_gene_num: int, the number of genes to plot
        ascending: bool, whether to sort the genes by weights
        labels_fontsize: int, the fontsize of the labels
        ticks_fontsize: int, the fontsize of the ticks
        title_fontsize: int, the fontsize of the title
        title: str, the title of the plot
        save: bool, whether to save the plot

    Returns:
        fig: the figure of the plot
        ax: the axis of the plot

    """

    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    fig, ax = plt.subplots(figsize=figsize)
    plot_data4=pd.DataFrame()
    plot_data4['weight']=factor_w['factor_{}'.format(factor)].sort_values(ascending=ascending)
    plot_data4['rank']=range(len(plot_data4['weight']))
    plt.plot(plot_data4['rank'],plot_data4['weight'],color=color)

    hub_gene=plot_data4.index[:plot_gene_num]
    plt.scatter(plot_data4.loc[hub_gene,'rank'],
            plot_data4.loc[hub_gene,'weight'],color=color,
                alpha=0.5)

    from adjustText import adjust_text
    texts=[ax.text(plot_data4.loc[i,'rank'],
                    plot_data4.loc[i,'weight'],
                    i,
                    fontdict={'size':10,'weight':'normal','color':'black'}
                    ) for i in hub_gene]

    adjust_text(texts,only_move={'text': 'xy'},
                arrowprops=dict(arrowstyle='->', color='grey'),)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    if title is None:
        plt.title('factor_{}'.format(factor),fontsize=title_fontsize,)
    else:
        plt.title(title,fontsize=title_fontsize,)
    plt.xticks(fontsize=ticks_fontsize)
    plt.yticks(fontsize=ticks_fontsize)
    plt.xlabel('Feature rank',fontsize=labels_fontsize)
    plt.ylabel('Weight',fontsize=labels_fontsize)

    plt.grid(False)
    if save:
        fig.savefig("factor{}_gene.png".format(factor),dpi=300,bbox_inches = 'tight')
    return fig,ax

plot_top_feature_dotplot(view, cmap='bwr', n_genes=3)

Plot the top features of each factor in dotplot

Parameters:

Name Type Description Default
view str

str, the view of the factor

required
cmap str

str, the color map of the plot

'bwr'
n_genes int

int, the number of genes to plot

3

Returns:

Name Type Description
axes list

the list of the figure

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
    """
    Plot the top features of each factor in dotplot

    Arguments:
        view: str, the view of the factor
        cmap: str, the color map of the plot
        n_genes: int, the number of genes to plot

    Returns:
        axes: the list of the figure

    """

    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
    adata1.obs['Factor']=adata1.obs.index
    adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
    sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
    ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes, 
                                    cmap=cmap,show=False)
    return ax

plot_top_feature_heatmap(view, cmap='bwr', n_genes=3)

Plot the top features of each factor in dotplot

Parameters:

Name Type Description Default
view str

str, the view of the factor

required
cmap str

str, the color map of the plot

'bwr'
n_genes int

int, the number of genes to plot

3

Returns:

Name Type Description
axes list

the list of the figure

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
    """
    Plot the top features of each factor in dotplot

    Arguments:
        view: str, the view of the factor
        cmap: str, the color map of the plot
        n_genes: int, the number of genes to plot

    Returns:
        axes: the list of the figure

    """

    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
    adata1.obs['Factor']=adata1.obs.index
    adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
    sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
    ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes, 
                                    cmap=cmap,show=False)
    return ax

get_top_feature(view, log2fc_min=3, pval_cutoff=0.1)

Get the top features of each factor

Parameters:

Name Type Description Default
view str

str, the view of the factor

required
log2fc_min int

float, the minimum log2fc of the feature

3
pval_cutoff float

float, the maximum pval of the feature

0.1

Returns:

Name Type Description
top_feature dict

dict, the top features of each factor

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
    """
    Get the top features of each factor

    Arguments:
        view: str, the view of the factor
        log2fc_min: float, the minimum log2fc of the feature
        pval_cutoff: float, the maximum pval of the feature

    Returns:
        top_feature: dict, the top features of each factor

    """


    factor_w=pd.DataFrame()
    for i in range(self.factors.shape[1]):
        f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
        f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
        factor_w['factor_{}'.format(i+1)]=f1_w['weights']
    factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]

    adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
    adata1.obs['Factor']=adata1.obs.index
    adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
    top_feature=get_celltype_marker(adata1,clustertype='Factor',
                        log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
    return top_feature

omicverse.single.GLUE_pair

Bases: object

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class GLUE_pair(object):

    def __init__(self,rna:anndata.AnnData,
              atac:anndata.AnnData) -> None:
        r"""
        Pair the cells between RNA and ATAC using result of GLUE.

        Arguments:
            rna: the AnnData of RNA-seq.
            atac: the AnnData of ATAC-seq.
            depth: the depth of the search for the nearest neighbor.

        """

        print('......Extract GLUE layer from obs')
        self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
        self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)

    def correlation(self):
        """
        Perform Pearson Correlation analysis in the layer of GLUE

        """

        print('......Prepare for pair')
        import gc
        len1=(len(self.rna_loc)//5000)+1
        len2=(len(self.atac_loc)//5000)+1
        if len1>len2:
            len1=len2
        p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
        n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
        print('......Start to calculate the Pearson coef')
        for j in range(len1):
            c=pd.DataFrame()
            with trange(len1) as tt:
                for i in tt:
                    t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
                    t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
                    a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
                    b=pd.DataFrame(a,index=t2.index,columns=t1.index)  
                    c=pd.concat([c,b],axis=1)
                    del t1
                    del t2
                    del a
                    del b
                    gc.collect()
                    tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
            with trange(len(c)) as t:
                for i in t:
                    t_c=c.iloc[i]
                    p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
                    n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
                    t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,j*5000+len(c),len(self.atac_loc)))
            print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc))) 
            del c
            gc.collect()
        self.rna_pd=p_pd
        self.atac_pd=n_pd

    def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
        """
        Find the neighbor cells between two omics using pearson

        Arguments:
            depth: the depth of the search for the nearest neighbor.

        Returns:
            result: the pair result

        """


        if depth>50:
            print('......depth limited to 50')
            depth=50
        rubish_c=[]
        finish_c=[]
        p_pd=self.rna_pd.copy()
        n_pd=self.atac_pd.copy()
        with trange(depth) as dt:
            for d in dt:
                p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
                p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
                for i in p_pd.index:
                    name=n_pd.loc[i,'rank_{}'.format(d)]
                    if name not in rubish_c:
                        finish_c.append(i)
                        rubish_c.append(name)
                    else:
                        continue
                p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
                n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
                dt.set_description('Now depth is {}/{}'.format(d,depth))
        result=pd.DataFrame()
        result['omic_1']=rubish_c
        result['omic_2']=finish_c
        result.index=['cell_{}'.format(i) for i in range(len(result))]
        self.pair_res=result
        return result

    def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
        """
        Pair the omics using the result of find_neighbor_cell

        Arguments:
            omic1: the AnnData of omic1.
            omic2: the AnnData of omic2.

        Returns:
            rna1: the paired AnnData of omic1.
            atac1: the paired AnnData of omic2.

        """
        rna1=omic1[self.res_pair['omic_1']].copy()
        atac1=omic2[self.res_pair['omic_2']].copy()
        rna1.obs.index=self.res_pair.index
        atac1.obs.index=self.res_pair.index
        return rna1,atac1

__init__(rna, atac)

Pair the cells between RNA and ATAC using result of GLUE.

Parameters:

Name Type Description Default
rna anndata.AnnData

the AnnData of RNA-seq.

required
atac anndata.AnnData

the AnnData of ATAC-seq.

required
depth

the depth of the search for the nearest neighbor.

required
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,rna:anndata.AnnData,
          atac:anndata.AnnData) -> None:
    r"""
    Pair the cells between RNA and ATAC using result of GLUE.

    Arguments:
        rna: the AnnData of RNA-seq.
        atac: the AnnData of ATAC-seq.
        depth: the depth of the search for the nearest neighbor.

    """

    print('......Extract GLUE layer from obs')
    self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
    self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)

correlation()

Perform Pearson Correlation analysis in the layer of GLUE

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def correlation(self):
    """
    Perform Pearson Correlation analysis in the layer of GLUE

    """

    print('......Prepare for pair')
    import gc
    len1=(len(self.rna_loc)//5000)+1
    len2=(len(self.atac_loc)//5000)+1
    if len1>len2:
        len1=len2
    p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
    n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
    print('......Start to calculate the Pearson coef')
    for j in range(len1):
        c=pd.DataFrame()
        with trange(len1) as tt:
            for i in tt:
                t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
                t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
                a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
                b=pd.DataFrame(a,index=t2.index,columns=t1.index)  
                c=pd.concat([c,b],axis=1)
                del t1
                del t2
                del a
                del b
                gc.collect()
                tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
        with trange(len(c)) as t:
            for i in t:
                t_c=c.iloc[i]
                p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
                n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
                t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,j*5000+len(c),len(self.atac_loc)))
        print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc))) 
        del c
        gc.collect()
    self.rna_pd=p_pd
    self.atac_pd=n_pd

find_neighbor_cell(depth=10, cor=0.9)

Find the neighbor cells between two omics using pearson

Parameters:

Name Type Description Default
depth int

the depth of the search for the nearest neighbor.

10

Returns:

Name Type Description
result pd.DataFrame

the pair result

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
    """
    Find the neighbor cells between two omics using pearson

    Arguments:
        depth: the depth of the search for the nearest neighbor.

    Returns:
        result: the pair result

    """


    if depth>50:
        print('......depth limited to 50')
        depth=50
    rubish_c=[]
    finish_c=[]
    p_pd=self.rna_pd.copy()
    n_pd=self.atac_pd.copy()
    with trange(depth) as dt:
        for d in dt:
            p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
            p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
            for i in p_pd.index:
                name=n_pd.loc[i,'rank_{}'.format(d)]
                if name not in rubish_c:
                    finish_c.append(i)
                    rubish_c.append(name)
                else:
                    continue
            p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
            n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
            dt.set_description('Now depth is {}/{}'.format(d,depth))
    result=pd.DataFrame()
    result['omic_1']=rubish_c
    result['omic_2']=finish_c
    result.index=['cell_{}'.format(i) for i in range(len(result))]
    self.pair_res=result
    return result

pair_omic(omic1, omic2)

Pair the omics using the result of find_neighbor_cell

Parameters:

Name Type Description Default
omic1 anndata.AnnData

the AnnData of omic1.

required
omic2 anndata.AnnData

the AnnData of omic2.

required

Returns:

Name Type Description
rna1 anndata.AnnData

the paired AnnData of omic1.

atac1 anndata.AnnData

the paired AnnData of omic2.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
    """
    Pair the omics using the result of find_neighbor_cell

    Arguments:
        omic1: the AnnData of omic1.
        omic2: the AnnData of omic2.

    Returns:
        rna1: the paired AnnData of omic1.
        atac1: the paired AnnData of omic2.

    """
    rna1=omic1[self.res_pair['omic_1']].copy()
    atac1=omic2[self.res_pair['omic_2']].copy()
    rna1.obs.index=self.res_pair.index
    atac1.obs.index=self.res_pair.index
    return rna1,atac1

omicverse.single.factor_exact(adata, hdf5_path)

Extract the factor information from hdf5 file.

Parameters:

Name Type Description Default
adata anndata.AnnData

The AnnData object.

required
hdf5_path str

The path of hdf5 file.

required

Returns:

Name Type Description
adata anndata.AnnData

The AnnData object with factor information.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def factor_exact(adata:anndata.AnnData,hdf5_path:str)->anndata.AnnData:
    r"""
    Extract the factor information from hdf5 file.

    Arguments:
        adata: The AnnData object.
        hdf5_path: The path of hdf5 file.

    Returns:
        adata: The AnnData object with factor information.

    """
    f_pos = h5py.File(hdf5_path,'r')  
    for i in range(f_pos['expectations']['Z']['group0'].shape[0]):
        adata.obs['factor{0}'.format(i+1)]=f_pos['expectations']['Z']['group0'][i] 
    return adata

omicverse.single.factor_correlation(adata, cluster, factor_list, p_threshold=500)

Calculate the correlation between factors and cluster.

Parameters:

Name Type Description Default
adata anndata.AnnData

The AnnData object.

required
cluster str

The name of cluster.

required
factor_list list

The list of factors.

required
p_threshold int

The threshold of p-value.

500

Returns:

Name Type Description
cell_pd pd.DataFrame

The correlation between factors and cluster.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def factor_correlation(adata:anndata.AnnData,cluster:str,
                       factor_list:list,p_threshold:int=500)->pd.DataFrame:
    r"""
    Calculate the correlation between factors and cluster.

    Arguments:
        adata: The AnnData object.
        cluster: The name of cluster.
        factor_list: The list of factors.
        p_threshold: The threshold of p-value.

    Returns:
        cell_pd: The correlation between factors and cluster.

    """
    plot_data=adata.obs
    cell_t=list(set(plot_data[cluster]))
    cell_pd=pd.DataFrame(index=cell_t)
    for i in factor_list:
        test=[]
        for j in cell_t:
            a=plot_data[plot_data[cluster]==j]['factor'+str(i)].values
            b=plot_data[~(plot_data[cluster]==j)]['factor'+str(i)].values
            t, p = stats.ttest_ind(a,b)
            logp=-np.log(p)
            if(logp>p_threshold):
                logp=p_threshold
            test.append(logp)
        cell_pd['factor'+str(i)]=test
    return cell_pd

omicverse.single.get_weights(hdf5_path, view, factor, scale=True)

Get the weights of each feature in a specific factor.

Parameters:

Name Type Description Default
hdf5_path str

the path of hdf5 file.

required
view str

the name of view.

required
factor int

the number of factor.

required
scale bool

whether to scale the weights.

True

Returns:

Name Type Description
res pd.DataFrame

the weights of each feature in a specific factor.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_weights(hdf5_path:str,view:str,
                factor:int,scale:bool=True)->pd.DataFrame:
    r"""
    Get the weights of each feature in a specific factor.

    Arguments:
        hdf5_path: the path of hdf5 file.
        view: the name of view.
        factor: the number of factor.
        scale: whether to scale the weights.

    Returns:
        res: the weights of each feature in a specific factor.

    """
    f = h5py.File(hdf5_path,'r')  
    view_names=f['views']['views'][:]
    group_names=f['groups']['groups'][:]
    feature_names=np.array([f['features'][i][:] for i in view_names])
    sample_names=np.array([f['samples'][i][:] for i in group_names])
    f_name=feature_names[np.where(view_names==str.encode(view))[0][0]]
    f_w=f['expectations']['W'][view][factor-1]
    if scale==True:
        f_w=normalization(f_w)
    res=pd.DataFrame()
    res['feature']=f_name
    res['weights']=f_w
    res['abs_weights']=abs(f_w)
    res['sig']='+'
    res.loc[(res.weights<0),'sig'] = '-'

    return res

omicverse.single.glue_pair(rna, atac, depth=20)

Pair the cells between RNA and ATAC using result of GLUE.

Parameters:

Name Type Description Default
rna anndata.AnnData

the AnnData of RNA-seq.

required
atac anndata.AnnData

the AnnData of ATAC-seq.

required
depth int

the depth of the search for the nearest neighbor.

20
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def glue_pair(rna:anndata.AnnData,
              atac:anndata.AnnData,depth:int=20)->pd.DataFrame:
    r"""
    Pair the cells between RNA and ATAC using result of GLUE.

    Arguments:
        rna: the AnnData of RNA-seq.
        atac: the AnnData of ATAC-seq.
        depth: the depth of the search for the nearest neighbor.

    """


    #提取GLUE层结果
    print('......Extract GLUE layer from obs')
    rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
    atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)

    #对GLUE层进行Pearson系数分析
    print('......Prepare for pair')
    import gc
    len1=(len(rna_loc)//5000)+1
    len2=(len(atac_loc)//5000)+1
    if len1>len2:
        len1=len2
    p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
    n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
    print('......Start to calculate the Pearson coef')
    for j in range(len1):
        c=pd.DataFrame()
        with trange(len1) as tt:
            for i in tt:
                t1=rna_loc.iloc[5000*(i):5000*(i+1)]
                t2=atac_loc.iloc[5000*(j):5000*(j+1)]
                a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
                b=pd.DataFrame(a,index=t2.index,columns=t1.index)  
                c=pd.concat([c,b],axis=1)
                del t1
                del t2
                del a
                del b
                gc.collect()
                tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
        with trange(len(c)) as t:
            for i in t:
                t_c=c.iloc[i]
                p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
                n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
                t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(atac_loc)))
        print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc))) 
        del c
        gc.collect()

    #寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
    def find_neighbor_cell(p_pd,n_pd,depth=10):
        if depth>50:
            print('......depth limited to 50')
            depth=50
        rubish_c=[]
        finish_c=[]
        with trange(depth) as dt:
            for d in dt:
                p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
                p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
                for i in p_pd.index:
                    name=n_pd.loc[i,'rank_{}'.format(d)]
                    if name not in rubish_c:
                        finish_c.append(i)
                        rubish_c.append(name)
                    else:
                        continue
                p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
                n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
                dt.set_description('Now depth is {}/{}'.format(d,depth))
        result=pd.DataFrame()
        result['omic_1']=rubish_c
        result['omic_2']=finish_c
        result.index=['cell_{}'.format(i) for i in range(len(result))]
        return result
    print('......Start to find neighbor')
    res_pair=find_neighbor_cell(p_pd,n_pd,depth=depth)
    return res_pair