Api mofa
omicverse.single.pyMOFA
¶
Bases: object
MOFA class.
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class pyMOFA(object):
r"""
MOFA class.
"""
def __init__(self,omics:list,omics_name:list):
r"""
Initialize the MOFA class.
Arguments:
omics: The list of omics data.
omics_name: The list of omics name.
"""
self.omics=omics
self.omics_name=omics_name
self.M=len(omics)
def mofa_preprocess(self):
r"""
Preprocess the data.
"""
self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
self.feature_name=[]
for m in range(self.M):
if issparse(self.omics[m].X)==True:
self.data_mat[m][0]=self.omics[m].X.toarray()
else:
self.data_mat[m][0]=self.omics[m].X
self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])
def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True,
verbose:bool = False, seed:int = 112,scale_groups:bool = False,
scale_views:bool = False,center_groups:bool=True,)->None:
r"""
Train the MOFA model.
Arguments:
outfile: The path of output file.
factors: The number of factors.
iter: The number of iterations.
convergence_mode: The mode of convergence.
spikeslab_weights: Whether to use spikeslab weights.
startELBO: The start of ELBO.
freqELBO: The frequency of ELBO.
dropR2: The drop of R2.
gpu_mode: Whether to use gpu mode.
verbose: Whether to print the information.
seed: The seed of random number.
scale_groups: Whether to scale groups.
scale_views: Whether to scale views.
center_groups: Whether to center groups.
"""
ent1 = entry_point()
ent1.set_data_options(
scale_groups = scale_groups,
scale_views = scale_views,
center_groups=center_groups,
)
ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
views_names=self.omics_name,
samples_names=[self.omics[0].obs.index],
features_names=self.feature_name)
# set param
ent1.set_model_options(
factors = factors,
spikeslab_weights = spikeslab_weights,
ard_factors = True,
ard_weights = True
)
ent1.set_train_options(
iter = iter,
convergence_mode = convergence_mode,
startELBO = startELBO,
freqELBO = freqELBO,
dropR2 = dropR2,
gpu_mode = gpu_mode,
verbose = verbose,
seed = seed
)
#
ent1.build()
ent1.run()
ent1.save(outfile=outfile)
__init__(omics, omics_name)
¶
Initialize the MOFA class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
omics |
list
|
The list of omics data. |
required |
omics_name |
list
|
The list of omics name. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,omics:list,omics_name:list):
r"""
Initialize the MOFA class.
Arguments:
omics: The list of omics data.
omics_name: The list of omics name.
"""
self.omics=omics
self.omics_name=omics_name
self.M=len(omics)
mofa_preprocess()
¶
Preprocess the data.
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def mofa_preprocess(self):
r"""
Preprocess the data.
"""
self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
self.feature_name=[]
for m in range(self.M):
if issparse(self.omics[m].X)==True:
self.data_mat[m][0]=self.omics[m].X.toarray()
else:
self.data_mat[m][0]=self.omics[m].X
self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])
mofa_run(outfile='res.hdf5', factors=20, iter=1000, convergence_mode='fast', spikeslab_weights=True, startELBO=1, freqELBO=1, dropR2=0.001, gpu_mode=True, verbose=False, seed=112, scale_groups=False, scale_views=False, center_groups=True)
¶
Train the MOFA model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
outfile |
str
|
The path of output file. |
'res.hdf5'
|
factors |
int
|
The number of factors. |
20
|
iter |
int
|
The number of iterations. |
1000
|
convergence_mode |
str
|
The mode of convergence. |
'fast'
|
spikeslab_weights |
bool
|
Whether to use spikeslab weights. |
True
|
startELBO |
int
|
The start of ELBO. |
1
|
freqELBO |
int
|
The frequency of ELBO. |
1
|
dropR2 |
float
|
The drop of R2. |
0.001
|
gpu_mode |
bool
|
Whether to use gpu mode. |
True
|
verbose |
bool
|
Whether to print the information. |
False
|
seed |
int
|
The seed of random number. |
112
|
scale_groups |
bool
|
Whether to scale groups. |
False
|
scale_views |
bool
|
Whether to scale views. |
False
|
center_groups |
bool
|
Whether to center groups. |
True
|
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True,
verbose:bool = False, seed:int = 112,scale_groups:bool = False,
scale_views:bool = False,center_groups:bool=True,)->None:
r"""
Train the MOFA model.
Arguments:
outfile: The path of output file.
factors: The number of factors.
iter: The number of iterations.
convergence_mode: The mode of convergence.
spikeslab_weights: Whether to use spikeslab weights.
startELBO: The start of ELBO.
freqELBO: The frequency of ELBO.
dropR2: The drop of R2.
gpu_mode: Whether to use gpu mode.
verbose: Whether to print the information.
seed: The seed of random number.
scale_groups: Whether to scale groups.
scale_views: Whether to scale views.
center_groups: Whether to center groups.
"""
ent1 = entry_point()
ent1.set_data_options(
scale_groups = scale_groups,
scale_views = scale_views,
center_groups=center_groups,
)
ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
views_names=self.omics_name,
samples_names=[self.omics[0].obs.index],
features_names=self.feature_name)
# set param
ent1.set_model_options(
factors = factors,
spikeslab_weights = spikeslab_weights,
ard_factors = True,
ard_weights = True
)
ent1.set_train_options(
iter = iter,
convergence_mode = convergence_mode,
startELBO = startELBO,
freqELBO = freqELBO,
dropR2 = dropR2,
gpu_mode = gpu_mode,
verbose = verbose,
seed = seed
)
#
ent1.build()
ent1.run()
ent1.save(outfile=outfile)
omicverse.single.pyMOFAART
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class pyMOFAART(object):
def __init__(self,model_path:str):
"""
Initialize the MOFAART class.
Arguments:
model_path: The path of MOFA model.
"""
check_mofax()
global mofax_install
if mofax_install==True:
global_imports("mofax","mfx")
self.model_path=model_path
mfx_model=mfx.mofa_model(model_path)
self.factors=mfx_model.get_factors()
plot_data=pd.DataFrame()
for i in mfx_model.get_r2()['View'].unique():
plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
self.r2=plot_data
mfx_model.close()
def get_factors(self,adata:anndata.AnnData):
"""
Get the factors of MOFA to anndata object.
Arguments:
adata: The anndata object.
"""
print('......Add factors to adata and store to adata.obsm["X_mofa"]')
adata.obsm['X_mofa']=self.factors
adata=factor_exact(adata,hdf5_path=self.model_path)
def get_r2(self,)->pd.DataFrame:
"""
Get the varience of each factor
Returns:
r2: the varience of each factor
"""
return self.r2
def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
ticks_fontsize:int=10,labels_fontsize:int=12,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
plot the varience of each factor.
Arguments:
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The size of ticks.
labels_fontsize: The size of labels.
save: Whether to save the figure.
Returns:
fig: The figure of varience.
ax: The axes of varience.
"""
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.ylabel('Factor',fontsize=labels_fontsize)
plt.xlabel('View',fontsize=labels_fontsize)
plt.title('Varience',fontsize=labels_fontsize)
if save:
fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
return fig,ax
def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
"""
get the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type.
factor_list: The list of factors.
Returns:
plot_data1: The correlation of each factor with cluster type.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
return plot_data1
def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object in MOFA pre trained.
cluster: The cluster type in adata.obs.
factor_list: The list of factors.
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
title: The title of figure.
save: Whether to save the figure.
Returns:
fig: The figure of correlation.
ax: The axes of correlation.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Factor',fontsize=labels_fontsize)
plt.ylabel(cluster,fontsize=labels_fontsize)
plt.title(title,fontsize=labels_fontsize)
if save:
fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
factor1:int=1,factor2:int=2,palette:list=None,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the factor of MOFA in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type in adata.obs.
title: The title of figure.
figsize: The size of figure.
factor1: The first factor.
factor2: The second factor.
palette: The color map.
save: Whether to save the figure.
Returns:
fig: The figure of factor.
ax: The axes of factor.
"""
if 'X_mofa' not in adata.obsm.keys():
self.get_factors(adata)
if palette==None:
palette=pyomic_palette()
fig, ax = plt.subplots(figsize=figsize)
#factor1,factor2=4,6
sc.pl.embedding(
adata=adata,
basis='X_mofa',
color=cluster,
title=title,
components="{},{}".format(factor1,factor2),
palette=palette,
ncols=1,
ax=ax
)
if save:
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'
if colors_dict==None:
colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color=colors_dict['up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color=colors_dict['down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
[colors_dict['up'],colors_dict['down']]):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'
if colors_dict==None:
colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color=colors_dict['up-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color=colors_dict['up-down'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color=colors_dict['down-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color=colors_dict['down-down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
[colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weights(self,view:str,factor:int,color:str='#a51616',figsize:tuple=(3,4),
plot_gene_num:int=10,ascending:bool=False,
labels_fontsize:int=12,ticks_fontsize:int=12,title_fontsize:int=12,
title=None,save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weights of each gene in the factor
Arguments:
view: str, the view of the factor
factor: int, the factor number
color: str, the color of the plot
figsize: tuple, the size of the figure
plot_gene_num: int, the number of genes to plot
ascending: bool, whether to sort the genes by weights
labels_fontsize: int, the fontsize of the labels
ticks_fontsize: int, the fontsize of the ticks
title_fontsize: int, the fontsize of the title
title: str, the title of the plot
save: bool, whether to save the plot
Returns:
fig: the figure of the plot
ax: the axis of the plot
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
fig, ax = plt.subplots(figsize=figsize)
plot_data4=pd.DataFrame()
plot_data4['weight']=factor_w['factor_{}'.format(factor)].sort_values(ascending=ascending)
plot_data4['rank']=range(len(plot_data4['weight']))
plt.plot(plot_data4['rank'],plot_data4['weight'],color=color)
hub_gene=plot_data4.index[:plot_gene_num]
plt.scatter(plot_data4.loc[hub_gene,'rank'],
plot_data4.loc[hub_gene,'weight'],color=color,
alpha=0.5)
from adjustText import adjust_text
texts=[ax.text(plot_data4.loc[i,'rank'],
plot_data4.loc[i,'weight'],
i,
fontdict={'size':10,'weight':'normal','color':'black'}
) for i in hub_gene]
adjust_text(texts,only_move={'text': 'xy'},
arrowprops=dict(arrowstyle='->', color='grey'),)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
if title is None:
plt.title('factor_{}'.format(factor),fontsize=title_fontsize,)
else:
plt.title(title,fontsize=title_fontsize,)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Feature rank',fontsize=labels_fontsize)
plt.ylabel('Weight',fontsize=labels_fontsize)
plt.grid(False)
if save:
fig.savefig("factor{}_gene.png".format(factor),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
"""
Get the top features of each factor
Arguments:
view: str, the view of the factor
log2fc_min: float, the minimum log2fc of the feature
pval_cutoff: float, the maximum pval of the feature
Returns:
top_feature: dict, the top features of each factor
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
top_feature=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
return top_feature
__init__(model_path)
¶
Initialize the MOFAART class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model_path |
str
|
The path of MOFA model. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,model_path:str):
"""
Initialize the MOFAART class.
Arguments:
model_path: The path of MOFA model.
"""
check_mofax()
global mofax_install
if mofax_install==True:
global_imports("mofax","mfx")
self.model_path=model_path
mfx_model=mfx.mofa_model(model_path)
self.factors=mfx_model.get_factors()
plot_data=pd.DataFrame()
for i in mfx_model.get_r2()['View'].unique():
plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
self.r2=plot_data
mfx_model.close()
get_factors(adata)
¶
Get the factors of MOFA to anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_factors(self,adata:anndata.AnnData):
"""
Get the factors of MOFA to anndata object.
Arguments:
adata: The anndata object.
"""
print('......Add factors to adata and store to adata.obsm["X_mofa"]')
adata.obsm['X_mofa']=self.factors
adata=factor_exact(adata,hdf5_path=self.model_path)
get_r2()
¶
Get the varience of each factor
Returns:
Name | Type | Description |
---|---|---|
r2 |
pd.DataFrame
|
the varience of each factor |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_r2(self,)->pd.DataFrame:
"""
Get the varience of each factor
Returns:
r2: the varience of each factor
"""
return self.r2
plot_r2(figsize=(2, 3), cmap='Greens', ticks_fontsize=10, labels_fontsize=12, save=False)
¶
plot the varience of each factor.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
figsize |
tuple
|
The size of figure. |
(2, 3)
|
cmap |
str
|
The color map. |
'Greens'
|
ticks_fontsize |
int
|
The size of ticks. |
10
|
labels_fontsize |
int
|
The size of labels. |
12
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of varience. |
ax |
matplotlib.axes._axes.Axes
|
The axes of varience. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
ticks_fontsize:int=10,labels_fontsize:int=12,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
plot the varience of each factor.
Arguments:
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The size of ticks.
labels_fontsize: The size of labels.
save: Whether to save the figure.
Returns:
fig: The figure of varience.
ax: The axes of varience.
"""
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.ylabel('Factor',fontsize=labels_fontsize)
plt.xlabel('View',fontsize=labels_fontsize)
plt.title('Varience',fontsize=labels_fontsize)
if save:
fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
return fig,ax
get_cor(adata, cluster, factor_list=None)
¶
get the correlation of each factor with cluster type in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
cluster |
str
|
The cluster type. |
required |
factor_list |
The list of factors. |
None
|
Returns:
Name | Type | Description |
---|---|---|
plot_data1 |
pd.DataFrame
|
The correlation of each factor with cluster type. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
"""
get the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type.
factor_list: The list of factors.
Returns:
plot_data1: The correlation of each factor with cluster type.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
return plot_data1
plot_cor(adata, cluster, factor_list=None, figsize=(6, 3), cmap='Purples', ticks_fontsize=10, labels_fontsize=12, title='Correlation', save=False)
¶
Plot the correlation of each factor with cluster type in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object in MOFA pre trained. |
required |
cluster |
str
|
The cluster type in adata.obs. |
required |
factor_list |
The list of factors. |
None
|
|
figsize |
tuple
|
The size of figure. |
(6, 3)
|
cmap |
str
|
The color map. |
'Purples'
|
ticks_fontsize |
int
|
The font size of ticks. |
10
|
labels_fontsize |
int
|
The font size of labels. |
12
|
title |
str
|
The title of figure. |
'Correlation'
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of correlation. |
ax |
matplotlib.axes._axes.Axes
|
The axes of correlation. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object in MOFA pre trained.
cluster: The cluster type in adata.obs.
factor_list: The list of factors.
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
title: The title of figure.
save: Whether to save the figure.
Returns:
fig: The figure of correlation.
ax: The axes of correlation.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Factor',fontsize=labels_fontsize)
plt.ylabel(cluster,fontsize=labels_fontsize)
plt.title(title,fontsize=labels_fontsize)
if save:
fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
return fig,ax
plot_factor(adata, cluster, title, figsize=(3, 3), factor1=1, factor2=2, palette=None, save=False)
¶
Plot the factor of MOFA in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
cluster |
str
|
The cluster type in adata.obs. |
required |
title |
str
|
The title of figure. |
required |
figsize |
tuple
|
The size of figure. |
(3, 3)
|
factor1 |
int
|
The first factor. |
1
|
factor2 |
int
|
The second factor. |
2
|
palette |
list
|
The color map. |
None
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of factor. |
ax |
matplotlib.axes._axes.Axes
|
The axes of factor. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
factor1:int=1,factor2:int=2,palette:list=None,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the factor of MOFA in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type in adata.obs.
title: The title of figure.
figsize: The size of figure.
factor1: The first factor.
factor2: The second factor.
palette: The color map.
save: Whether to save the figure.
Returns:
fig: The figure of factor.
ax: The axes of factor.
"""
if 'X_mofa' not in adata.obsm.keys():
self.get_factors(adata)
if palette==None:
palette=pyomic_palette()
fig, ax = plt.subplots(figsize=figsize)
#factor1,factor2=4,6
sc.pl.embedding(
adata=adata,
basis='X_mofa',
color=cluster,
title=title,
components="{},{}".format(factor1,factor2),
palette=palette,
ncols=1,
ax=ax
)
if save:
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weight_gene_d1(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)
¶
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
The view of MOFA. |
required |
factor1 |
int
|
The first factor. |
required |
factor2 |
int
|
The second factor. |
required |
colors_dict |
dict
|
The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'} |
None
|
plot_gene_num |
int
|
The number of genes to plot. |
5
|
title |
str
|
The title of figure. |
''
|
title_fontsize |
int
|
The font size of title. |
12
|
ticks_fontsize |
int
|
The font size of ticks. |
12
|
labels_fontsize |
int
|
The font size of labels. |
12
|
weith_threshold |
float
|
The threshold of weight. |
0.5
|
figsize |
tuple
|
The size of figure. |
(3, 3)
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of weight. |
ax |
matplotlib.axes._axes.Axes
|
The axes of weight. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'
if colors_dict==None:
colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color=colors_dict['up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color=colors_dict['down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
[colors_dict['up'],colors_dict['down']]):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weight_gene_d2(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)
¶
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
The view of MOFA. |
required |
factor1 |
int
|
The first factor. |
required |
factor2 |
int
|
The second factor. |
required |
colors_dict |
dict
|
The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'} |
None
|
plot_gene_num |
int
|
The number of genes to plot. |
5
|
title |
str
|
The title of figure. |
''
|
title_fontsize |
int
|
The font size of title. |
12
|
ticks_fontsize |
int
|
The font size of ticks. |
12
|
labels_fontsize |
int
|
The font size of labels. |
12
|
weith_threshold |
float
|
The threshold of weight. |
0.5
|
figsize |
tuple
|
The size of figure. |
(3, 3)
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of weight. |
ax |
matplotlib.axes._axes.Axes
|
The axes of weight. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'
if colors_dict==None:
colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color=colors_dict['up-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color=colors_dict['up-down'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color=colors_dict['down-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color=colors_dict['down-down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
[colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weights(view, factor, color='#a51616', figsize=(3, 4), plot_gene_num=10, ascending=False, labels_fontsize=12, ticks_fontsize=12, title_fontsize=12, title=None, save=False)
¶
Plot the weights of each gene in the factor
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
factor |
int
|
int, the factor number |
required |
color |
str
|
str, the color of the plot |
'#a51616'
|
figsize |
tuple
|
tuple, the size of the figure |
(3, 4)
|
plot_gene_num |
int
|
int, the number of genes to plot |
10
|
ascending |
bool
|
bool, whether to sort the genes by weights |
False
|
labels_fontsize |
int
|
int, the fontsize of the labels |
12
|
ticks_fontsize |
int
|
int, the fontsize of the ticks |
12
|
title_fontsize |
int
|
int, the fontsize of the title |
12
|
title |
str, the title of the plot |
None
|
|
save |
bool
|
bool, whether to save the plot |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
the figure of the plot |
ax |
matplotlib.axes._axes.Axes
|
the axis of the plot |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_weights(self,view:str,factor:int,color:str='#a51616',figsize:tuple=(3,4),
plot_gene_num:int=10,ascending:bool=False,
labels_fontsize:int=12,ticks_fontsize:int=12,title_fontsize:int=12,
title=None,save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weights of each gene in the factor
Arguments:
view: str, the view of the factor
factor: int, the factor number
color: str, the color of the plot
figsize: tuple, the size of the figure
plot_gene_num: int, the number of genes to plot
ascending: bool, whether to sort the genes by weights
labels_fontsize: int, the fontsize of the labels
ticks_fontsize: int, the fontsize of the ticks
title_fontsize: int, the fontsize of the title
title: str, the title of the plot
save: bool, whether to save the plot
Returns:
fig: the figure of the plot
ax: the axis of the plot
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
fig, ax = plt.subplots(figsize=figsize)
plot_data4=pd.DataFrame()
plot_data4['weight']=factor_w['factor_{}'.format(factor)].sort_values(ascending=ascending)
plot_data4['rank']=range(len(plot_data4['weight']))
plt.plot(plot_data4['rank'],plot_data4['weight'],color=color)
hub_gene=plot_data4.index[:plot_gene_num]
plt.scatter(plot_data4.loc[hub_gene,'rank'],
plot_data4.loc[hub_gene,'weight'],color=color,
alpha=0.5)
from adjustText import adjust_text
texts=[ax.text(plot_data4.loc[i,'rank'],
plot_data4.loc[i,'weight'],
i,
fontdict={'size':10,'weight':'normal','color':'black'}
) for i in hub_gene]
adjust_text(texts,only_move={'text': 'xy'},
arrowprops=dict(arrowstyle='->', color='grey'),)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
if title is None:
plt.title('factor_{}'.format(factor),fontsize=title_fontsize,)
else:
plt.title(title,fontsize=title_fontsize,)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Feature rank',fontsize=labels_fontsize)
plt.ylabel('Weight',fontsize=labels_fontsize)
plt.grid(False)
if save:
fig.savefig("factor{}_gene.png".format(factor),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_top_feature_dotplot(view, cmap='bwr', n_genes=3)
¶
Plot the top features of each factor in dotplot
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
cmap |
str
|
str, the color map of the plot |
'bwr'
|
n_genes |
int
|
int, the number of genes to plot |
3
|
Returns:
Name | Type | Description |
---|---|---|
axes |
list
|
the list of the figure |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
plot_top_feature_heatmap(view, cmap='bwr', n_genes=3)
¶
Plot the top features of each factor in dotplot
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
cmap |
str
|
str, the color map of the plot |
'bwr'
|
n_genes |
int
|
int, the number of genes to plot |
3
|
Returns:
Name | Type | Description |
---|---|---|
axes |
list
|
the list of the figure |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
get_top_feature(view, log2fc_min=3, pval_cutoff=0.1)
¶
Get the top features of each factor
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
log2fc_min |
int
|
float, the minimum log2fc of the feature |
3
|
pval_cutoff |
float
|
float, the maximum pval of the feature |
0.1
|
Returns:
Name | Type | Description |
---|---|---|
top_feature |
dict
|
dict, the top features of each factor |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
"""
Get the top features of each factor
Arguments:
view: str, the view of the factor
log2fc_min: float, the minimum log2fc of the feature
pval_cutoff: float, the maximum pval of the feature
Returns:
top_feature: dict, the top features of each factor
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
top_feature=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
return top_feature
omicverse.single.GLUE_pair
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
class GLUE_pair(object):
def __init__(self,rna:anndata.AnnData,
atac:anndata.AnnData) -> None:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
print('......Extract GLUE layer from obs')
self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
def correlation(self):
"""
Perform Pearson Correlation analysis in the layer of GLUE
"""
print('......Prepare for pair')
import gc
len1=(len(self.rna_loc)//5000)+1
len2=(len(self.atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,j*5000+len(c),len(self.atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc)))
del c
gc.collect()
self.rna_pd=p_pd
self.atac_pd=n_pd
def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
"""
Find the neighbor cells between two omics using pearson
Arguments:
depth: the depth of the search for the nearest neighbor.
Returns:
result: the pair result
"""
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
p_pd=self.rna_pd.copy()
n_pd=self.atac_pd.copy()
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
self.pair_res=result
return result
def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
"""
Pair the omics using the result of find_neighbor_cell
Arguments:
omic1: the AnnData of omic1.
omic2: the AnnData of omic2.
Returns:
rna1: the paired AnnData of omic1.
atac1: the paired AnnData of omic2.
"""
rna1=omic1[self.res_pair['omic_1']].copy()
atac1=omic2[self.res_pair['omic_2']].copy()
rna1.obs.index=self.res_pair.index
atac1.obs.index=self.res_pair.index
return rna1,atac1
__init__(rna, atac)
¶
Pair the cells between RNA and ATAC using result of GLUE.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rna |
anndata.AnnData
|
the AnnData of RNA-seq. |
required |
atac |
anndata.AnnData
|
the AnnData of ATAC-seq. |
required |
depth |
the depth of the search for the nearest neighbor. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def __init__(self,rna:anndata.AnnData,
atac:anndata.AnnData) -> None:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
print('......Extract GLUE layer from obs')
self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
correlation()
¶
Perform Pearson Correlation analysis in the layer of GLUE
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def correlation(self):
"""
Perform Pearson Correlation analysis in the layer of GLUE
"""
print('......Prepare for pair')
import gc
len1=(len(self.rna_loc)//5000)+1
len2=(len(self.atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,j*5000+len(c),len(self.atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc)))
del c
gc.collect()
self.rna_pd=p_pd
self.atac_pd=n_pd
find_neighbor_cell(depth=10, cor=0.9)
¶
Find the neighbor cells between two omics using pearson
Parameters:
Name | Type | Description | Default |
---|---|---|---|
depth |
int
|
the depth of the search for the nearest neighbor. |
10
|
Returns:
Name | Type | Description |
---|---|---|
result |
pd.DataFrame
|
the pair result |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
"""
Find the neighbor cells between two omics using pearson
Arguments:
depth: the depth of the search for the nearest neighbor.
Returns:
result: the pair result
"""
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
p_pd=self.rna_pd.copy()
n_pd=self.atac_pd.copy()
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
self.pair_res=result
return result
pair_omic(omic1, omic2)
¶
Pair the omics using the result of find_neighbor_cell
Parameters:
Name | Type | Description | Default |
---|---|---|---|
omic1 |
anndata.AnnData
|
the AnnData of omic1. |
required |
omic2 |
anndata.AnnData
|
the AnnData of omic2. |
required |
Returns:
Name | Type | Description |
---|---|---|
rna1 |
anndata.AnnData
|
the paired AnnData of omic1. |
atac1 |
anndata.AnnData
|
the paired AnnData of omic2. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
"""
Pair the omics using the result of find_neighbor_cell
Arguments:
omic1: the AnnData of omic1.
omic2: the AnnData of omic2.
Returns:
rna1: the paired AnnData of omic1.
atac1: the paired AnnData of omic2.
"""
rna1=omic1[self.res_pair['omic_1']].copy()
atac1=omic2[self.res_pair['omic_2']].copy()
rna1.obs.index=self.res_pair.index
atac1.obs.index=self.res_pair.index
return rna1,atac1
omicverse.single.factor_exact(adata, hdf5_path)
¶
Extract the factor information from hdf5 file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The AnnData object. |
required |
hdf5_path |
str
|
The path of hdf5 file. |
required |
Returns:
Name | Type | Description |
---|---|---|
adata |
anndata.AnnData
|
The AnnData object with factor information. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def factor_exact(adata:anndata.AnnData,hdf5_path:str)->anndata.AnnData:
r"""
Extract the factor information from hdf5 file.
Arguments:
adata: The AnnData object.
hdf5_path: The path of hdf5 file.
Returns:
adata: The AnnData object with factor information.
"""
f_pos = h5py.File(hdf5_path,'r')
for i in range(f_pos['expectations']['Z']['group0'].shape[0]):
adata.obs['factor{0}'.format(i+1)]=f_pos['expectations']['Z']['group0'][i]
return adata
omicverse.single.factor_correlation(adata, cluster, factor_list, p_threshold=500)
¶
Calculate the correlation between factors and cluster.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The AnnData object. |
required |
cluster |
str
|
The name of cluster. |
required |
factor_list |
list
|
The list of factors. |
required |
p_threshold |
int
|
The threshold of p-value. |
500
|
Returns:
Name | Type | Description |
---|---|---|
cell_pd |
pd.DataFrame
|
The correlation between factors and cluster. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def factor_correlation(adata:anndata.AnnData,cluster:str,
factor_list:list,p_threshold:int=500)->pd.DataFrame:
r"""
Calculate the correlation between factors and cluster.
Arguments:
adata: The AnnData object.
cluster: The name of cluster.
factor_list: The list of factors.
p_threshold: The threshold of p-value.
Returns:
cell_pd: The correlation between factors and cluster.
"""
plot_data=adata.obs
cell_t=list(set(plot_data[cluster]))
cell_pd=pd.DataFrame(index=cell_t)
for i in factor_list:
test=[]
for j in cell_t:
a=plot_data[plot_data[cluster]==j]['factor'+str(i)].values
b=plot_data[~(plot_data[cluster]==j)]['factor'+str(i)].values
t, p = stats.ttest_ind(a,b)
logp=-np.log(p)
if(logp>p_threshold):
logp=p_threshold
test.append(logp)
cell_pd['factor'+str(i)]=test
return cell_pd
omicverse.single.get_weights(hdf5_path, view, factor, scale=True)
¶
Get the weights of each feature in a specific factor.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
hdf5_path |
str
|
the path of hdf5 file. |
required |
view |
str
|
the name of view. |
required |
factor |
int
|
the number of factor. |
required |
scale |
bool
|
whether to scale the weights. |
True
|
Returns:
Name | Type | Description |
---|---|---|
res |
pd.DataFrame
|
the weights of each feature in a specific factor. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def get_weights(hdf5_path:str,view:str,
factor:int,scale:bool=True)->pd.DataFrame:
r"""
Get the weights of each feature in a specific factor.
Arguments:
hdf5_path: the path of hdf5 file.
view: the name of view.
factor: the number of factor.
scale: whether to scale the weights.
Returns:
res: the weights of each feature in a specific factor.
"""
f = h5py.File(hdf5_path,'r')
view_names=f['views']['views'][:]
group_names=f['groups']['groups'][:]
feature_names=np.array([f['features'][i][:] for i in view_names])
sample_names=np.array([f['samples'][i][:] for i in group_names])
f_name=feature_names[np.where(view_names==str.encode(view))[0][0]]
f_w=f['expectations']['W'][view][factor-1]
if scale==True:
f_w=normalization(f_w)
res=pd.DataFrame()
res['feature']=f_name
res['weights']=f_w
res['abs_weights']=abs(f_w)
res['sig']='+'
res.loc[(res.weights<0),'sig'] = '-'
return res
omicverse.single.glue_pair(rna, atac, depth=20)
¶
Pair the cells between RNA and ATAC using result of GLUE.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rna |
anndata.AnnData
|
the AnnData of RNA-seq. |
required |
atac |
anndata.AnnData
|
the AnnData of ATAC-seq. |
required |
depth |
int
|
the depth of the search for the nearest neighbor. |
20
|
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_mofa.py
def glue_pair(rna:anndata.AnnData,
atac:anndata.AnnData,depth:int=20)->pd.DataFrame:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
#提取GLUE层结果
print('......Extract GLUE layer from obs')
rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
#对GLUE层进行Pearson系数分析
print('......Prepare for pair')
import gc
len1=(len(rna_loc)//5000)+1
len2=(len(atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=rna_loc.iloc[5000*(i):5000*(i+1)]
t2=atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc)))
del c
gc.collect()
#寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
def find_neighbor_cell(p_pd,n_pd,depth=10):
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
return result
print('......Start to find neighbor')
res_pair=find_neighbor_cell(p_pd,n_pd,depth=depth)
return res_pair