Api mofa
omicverse.single.pyMOFA
¶
Bases: object
MOFA class.
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
class pyMOFA(object):
r"""
MOFA class.
"""
def __init__(self,omics:list,omics_name:list):
r"""
Initialize the MOFA class.
Arguments:
omics: The list of omics data.
omics_name: The list of omics name.
"""
self.omics=omics
self.omics_name=omics_name
self.M=len(omics)
def mofa_preprocess(self):
r"""
Preprocess the data.
"""
self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
self.feature_name=[]
for m in range(self.M):
if issparse(self.omics[m].X)==True:
self.data_mat[m][0]=self.omics[m].X.toarray()
else:
self.data_mat[m][0]=self.omics[m].X
self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])
def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True,
verbose:bool = False, seed:int = 112,scale_groups:bool = False,
scale_views:bool = False,center_groups:bool=True,)->None:
r"""
Train the MOFA model.
Arguments:
outfile: The path of output file.
factors: The number of factors.
iter: The number of iterations.
convergence_mode: The mode of convergence.
spikeslab_weights: Whether to use spikeslab weights.
startELBO: The start of ELBO.
freqELBO: The frequency of ELBO.
dropR2: The drop of R2.
gpu_mode: Whether to use gpu mode.
verbose: Whether to print the information.
seed: The seed of random number.
scale_groups: Whether to scale groups.
scale_views: Whether to scale views.
center_groups: Whether to center groups.
"""
ent1 = entry_point()
ent1.set_data_options(
scale_groups = scale_groups,
scale_views = scale_views,
center_groups=center_groups,
)
ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
views_names=self.omics_name,
samples_names=[self.omics[0].obs.index],
features_names=self.feature_name)
# set param
ent1.set_model_options(
factors = factors,
spikeslab_weights = spikeslab_weights,
ard_factors = True,
ard_weights = True
)
ent1.set_train_options(
iter = iter,
convergence_mode = convergence_mode,
startELBO = startELBO,
freqELBO = freqELBO,
dropR2 = dropR2,
gpu_mode = gpu_mode,
verbose = verbose,
seed = seed
)
#
ent1.build()
ent1.run()
ent1.save(outfile=outfile)
add_reference(self.adata,'MOFA','Multi-omics factor analysis with MOFA')
__init__(omics, omics_name)
¶
Initialize the MOFA class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
omics |
list
|
The list of omics data. |
required |
omics_name |
list
|
The list of omics name. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def __init__(self,omics:list,omics_name:list):
r"""
Initialize the MOFA class.
Arguments:
omics: The list of omics data.
omics_name: The list of omics name.
"""
self.omics=omics
self.omics_name=omics_name
self.M=len(omics)
mofa_preprocess()
¶
Preprocess the data.
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def mofa_preprocess(self):
r"""
Preprocess the data.
"""
self.data_mat=[[None for g in range(1)] for m in range(len(self.omics))]
self.feature_name=[]
for m in range(self.M):
if issparse(self.omics[m].X)==True:
self.data_mat[m][0]=self.omics[m].X.toarray()
else:
self.data_mat[m][0]=self.omics[m].X
self.feature_name.append([self.omics_name[m]+'_'+i for i in self.omics[m].var.index])
mofa_run(outfile='res.hdf5', factors=20, iter=1000, convergence_mode='fast', spikeslab_weights=True, startELBO=1, freqELBO=1, dropR2=0.001, gpu_mode=True, verbose=False, seed=112, scale_groups=False, scale_views=False, center_groups=True)
¶
Train the MOFA model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
outfile |
str
|
The path of output file. |
'res.hdf5'
|
factors |
int
|
The number of factors. |
20
|
iter |
int
|
The number of iterations. |
1000
|
convergence_mode |
str
|
The mode of convergence. |
'fast'
|
spikeslab_weights |
bool
|
Whether to use spikeslab weights. |
True
|
startELBO |
int
|
The start of ELBO. |
1
|
freqELBO |
int
|
The frequency of ELBO. |
1
|
dropR2 |
float
|
The drop of R2. |
0.001
|
gpu_mode |
bool
|
Whether to use gpu mode. |
True
|
verbose |
bool
|
Whether to print the information. |
False
|
seed |
int
|
The seed of random number. |
112
|
scale_groups |
bool
|
Whether to scale groups. |
False
|
scale_views |
bool
|
Whether to scale views. |
False
|
center_groups |
bool
|
Whether to center groups. |
True
|
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def mofa_run(self,outfile:str='res.hdf5',factors:int=20,iter:int = 1000,convergence_mode:str = "fast",
spikeslab_weights:bool = True,startELBO:int = 1, freqELBO:int = 1, dropR2:float = 0.001, gpu_mode:bool = True,
verbose:bool = False, seed:int = 112,scale_groups:bool = False,
scale_views:bool = False,center_groups:bool=True,)->None:
r"""
Train the MOFA model.
Arguments:
outfile: The path of output file.
factors: The number of factors.
iter: The number of iterations.
convergence_mode: The mode of convergence.
spikeslab_weights: Whether to use spikeslab weights.
startELBO: The start of ELBO.
freqELBO: The frequency of ELBO.
dropR2: The drop of R2.
gpu_mode: Whether to use gpu mode.
verbose: Whether to print the information.
seed: The seed of random number.
scale_groups: Whether to scale groups.
scale_views: Whether to scale views.
center_groups: Whether to center groups.
"""
ent1 = entry_point()
ent1.set_data_options(
scale_groups = scale_groups,
scale_views = scale_views,
center_groups=center_groups,
)
ent1.set_data_matrix(self.data_mat, likelihoods = [i for i in ["gaussian"]*self.M],
views_names=self.omics_name,
samples_names=[self.omics[0].obs.index],
features_names=self.feature_name)
# set param
ent1.set_model_options(
factors = factors,
spikeslab_weights = spikeslab_weights,
ard_factors = True,
ard_weights = True
)
ent1.set_train_options(
iter = iter,
convergence_mode = convergence_mode,
startELBO = startELBO,
freqELBO = freqELBO,
dropR2 = dropR2,
gpu_mode = gpu_mode,
verbose = verbose,
seed = seed
)
#
ent1.build()
ent1.run()
ent1.save(outfile=outfile)
add_reference(self.adata,'MOFA','Multi-omics factor analysis with MOFA')
omicverse.single.pyMOFAART
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
class pyMOFAART(object):
def __init__(self,model_path:str):
"""
Initialize the MOFAART class.
Arguments:
model_path: The path of MOFA model.
"""
check_mofax()
global mofax_install
if mofax_install==True:
global_imports("mofax","mfx")
self.model_path=model_path
mfx_model=mfx.mofa_model(model_path)
self.factors=mfx_model.get_factors()
plot_data=pd.DataFrame()
for i in mfx_model.get_r2()['View'].unique():
plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
self.r2=plot_data
mfx_model.close()
def get_factors(self,adata:anndata.AnnData):
"""
Get the factors of MOFA to anndata object.
Arguments:
adata: The anndata object.
"""
print('......Add factors to adata and store to adata.obsm["X_mofa"]')
adata.obsm['X_mofa']=self.factors
adata=factor_exact(adata,hdf5_path=self.model_path)
def get_r2(self,)->pd.DataFrame:
"""
Get the varience of each factor
Returns:
r2: the varience of each factor
"""
return self.r2
def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
ticks_fontsize:int=10,labels_fontsize:int=12,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
plot the varience of each factor.
Arguments:
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The size of ticks.
labels_fontsize: The size of labels.
save: Whether to save the figure.
Returns:
fig: The figure of varience.
ax: The axes of varience.
"""
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.ylabel('Factor',fontsize=labels_fontsize)
plt.xlabel('View',fontsize=labels_fontsize)
plt.title('Varience',fontsize=labels_fontsize)
if save:
fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
return fig,ax
def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
"""
get the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type.
factor_list: The list of factors.
Returns:
plot_data1: The correlation of each factor with cluster type.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
return plot_data1
def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object in MOFA pre trained.
cluster: The cluster type in adata.obs.
factor_list: The list of factors.
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
title: The title of figure.
save: Whether to save the figure.
Returns:
fig: The figure of correlation.
ax: The axes of correlation.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Factor',fontsize=labels_fontsize)
plt.ylabel(cluster,fontsize=labels_fontsize)
plt.title(title,fontsize=labels_fontsize)
if save:
fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
factor1:int=1,factor2:int=2,palette:list=None,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the factor of MOFA in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type in adata.obs.
title: The title of figure.
figsize: The size of figure.
factor1: The first factor.
factor2: The second factor.
palette: The color map.
save: Whether to save the figure.
Returns:
fig: The figure of factor.
ax: The axes of factor.
"""
if 'X_mofa' not in adata.obsm.keys():
self.get_factors(adata)
if palette==None:
palette=pyomic_palette()
fig, ax = plt.subplots(figsize=figsize)
#factor1,factor2=4,6
sc.pl.embedding(
adata=adata,
basis='X_mofa',
color=cluster,
title=title,
components="{},{}".format(factor1,factor2),
palette=palette,
ncols=1,
ax=ax
)
if save:
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'
if colors_dict==None:
colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color=colors_dict['up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color=colors_dict['down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
[colors_dict['up'],colors_dict['down']]):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'
if colors_dict==None:
colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color=colors_dict['up-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color=colors_dict['up-down'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color=colors_dict['down-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color=colors_dict['down-down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
[colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
def plot_weights(self,view:str,factors=None,n_features: int = 5,
w_scaled: bool = False,
w_abs: bool = False,
size: float = 2,
color: str = "black",
label_size: float = 5,
x_offset: float = 0.01,
y_offset: float = 0.15,
jitter: float = 0.01,
line_width: float = 0.5,
line_color: str = "black",
line_alpha: float = 0.2,
zero_line: bool = True,
zero_line_width: float = 1,
ncols: int = 4,
sharex: bool = True,
sharey: bool = False,
feature_label: str = None,
**kwargs) -> Tuple[matplotlib.figure.Figure, matplotlib.axes._axes.Axes]:
r"""
Plot weights for MOFA factors.
Arguments:
view: Name of the modality/view.
factors: List of factors to plot (all by default).
n_features: Number of features with largest weights to label.
w_scaled: Whether to scale weights to unit variance.
w_abs: Whether to plot absolute weight values.
size: Dot size.
color: Color for labeled dots.
label_size: Font size of feature labels.
x_offset: Offset for feature labels from left/right side.
y_offset: Parameter to repel feature labels along y axis.
jitter: Whether to jitter dots per factors.
line_width: Width of lines connecting labels with dots.
line_color: Color of lines connecting labels with dots.
line_alpha: Alpha level for lines connecting labels with dots.
zero_line: Whether to plot dotted line at zero.
zero_line_width: Width of zero line.
ncols: Number of columns in grid of multiple plots.
sharex: Whether to use same X axis across panels.
sharey: Whether to use same Y axis across panels.
feature_label: Column name in var containing feature labels.
**kwargs: Additional arguments passed to seaborn plotting functions.
Returns:
fig: The figure object.
ax: The axis object.
"""
if view not in self.model_path:
raise ValueError(f"View {view} not found in MOFA model")
if 'mofa_weights' not in self.model_path:
raise ValueError(f"Weights not found in MOFA model")
# Get weights
weights = get_weights(hdf5_path=self.model_path,view=view,factor=factors)
# Get feature labels
if feature_label is not None and feature_label in self.model_path:
feature_names = get_weights(hdf5_path=self.model_path,view=view,factor=factors)['feature']
else:
feature_names = get_weights(hdf5_path=self.model_path,view=view,factor=factors)['feature']
# Filter factors if specified
if factors is not None:
factor_names = [f'Factor{i}' if isinstance(i, int) else i for i in factors]
weights = weights[factor_names]
# Scale weights if requested
if w_scaled:
weights = weights / weights.abs().max()
# Convert to absolute values if requested
if w_abs:
weights = weights.abs()
# Melt the DataFrame for plotting
wm = weights.reset_index().melt(
id_vars='index',
var_name='factor',
value_name='value'
)
wm['feature'] = wm['index'].map(lambda x: feature_names[x])
wm['abs_value'] = abs(wm['value'])
# Sort factors
wm['factor'] = wm['factor'].astype('category')
wm['factor'] = wm['factor'].cat.reorder_categories(
sorted(wm['factor'].cat.categories, key=lambda x: int(x.split('Factor')[1]))
)
# Get features to label
features_to_label = []
for factor in wm['factor'].unique():
factor_data = wm[wm['factor'] == factor].sort_values('abs_value', ascending=False)
features_to_label.extend(factor_data['feature'].head(n_features))
wm['to_label'] = wm['feature'].isin(features_to_label)
# Create plot
fig, ax = plt.subplots(figsize=(10, 6))
# Create stripplot
g = sns.stripplot(
data=wm,
x='value',
y='factor',
jitter=jitter,
size=size,
hue='to_label',
palette=['lightgrey', color],
ax=ax
)
# Remove legend
g.legend().remove()
# Add feature labels
for fi, factor in enumerate(wm['factor'].unique()):
for sign_i in [1, -1]:
to_label = wm[(wm['factor'] == factor) &
(wm['to_label']) &
(wm['value'] * sign_i > 0)].sort_values('abs_value', ascending=False)
if len(to_label) == 0:
continue
x_start_pos = sign_i * (to_label['abs_value'].max() + x_offset)
y_start_pos = fi - ((len(to_label) - 1) // 2) * y_offset
y_prev = y_start_pos
for i, (_, point) in enumerate(to_label.iterrows()):
y_loc = y_prev + y_offset if i != 0 else y_start_pos
g.annotate(
point['feature'],
xy=(point['value'], fi),
xytext=(x_start_pos, y_loc),
arrowprops=dict(
arrowstyle='-',
connectionstyle='arc3',
color=line_color,
alpha=line_alpha,
linewidth=line_width
),
horizontalalignment='left' if sign_i > 0 else 'right',
size=label_size,
color='black',
weight='regular',
alpha=0.9
)
y_prev = y_loc
# Add zero line
if zero_line:
ax.axvline(0, ls='--', color='lightgrey', linewidth=zero_line_width, zorder=0)
# Customize plot
sns.despine(offset=10, trim=True)
ax.set_xlabel('Feature weight')
ax.set_ylabel('')
ax.set_title(view)
return fig, ax
def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
"""
Get the top features of each factor
Arguments:
view: str, the view of the factor
log2fc_min: float, the minimum log2fc of the feature
pval_cutoff: float, the maximum pval of the feature
Returns:
top_feature: dict, the top features of each factor
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
top_feature=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
return top_feature
__init__(model_path)
¶
Initialize the MOFAART class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model_path |
str
|
The path of MOFA model. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def __init__(self,model_path:str):
"""
Initialize the MOFAART class.
Arguments:
model_path: The path of MOFA model.
"""
check_mofax()
global mofax_install
if mofax_install==True:
global_imports("mofax","mfx")
self.model_path=model_path
mfx_model=mfx.mofa_model(model_path)
self.factors=mfx_model.get_factors()
plot_data=pd.DataFrame()
for i in mfx_model.get_r2()['View'].unique():
plot_data[i]=mfx_model.get_r2().loc[mfx_model.get_r2()['View']==i,'R2'].values
self.r2=plot_data
mfx_model.close()
get_factors(adata)
¶
Get the factors of MOFA to anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def get_factors(self,adata:anndata.AnnData):
"""
Get the factors of MOFA to anndata object.
Arguments:
adata: The anndata object.
"""
print('......Add factors to adata and store to adata.obsm["X_mofa"]')
adata.obsm['X_mofa']=self.factors
adata=factor_exact(adata,hdf5_path=self.model_path)
get_r2()
¶
Get the varience of each factor
Returns:
Name | Type | Description |
---|---|---|
r2 |
pd.DataFrame
|
the varience of each factor |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def get_r2(self,)->pd.DataFrame:
"""
Get the varience of each factor
Returns:
r2: the varience of each factor
"""
return self.r2
plot_r2(figsize=(2, 3), cmap='Greens', ticks_fontsize=10, labels_fontsize=12, save=False)
¶
plot the varience of each factor.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
figsize |
tuple
|
The size of figure. |
(2, 3)
|
cmap |
str
|
The color map. |
'Greens'
|
ticks_fontsize |
int
|
The size of ticks. |
10
|
labels_fontsize |
int
|
The size of labels. |
12
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of varience. |
ax |
matplotlib.axes._axes.Axes
|
The axes of varience. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_r2(self,figsize:tuple=(2,3),cmap:str='Greens',
ticks_fontsize:int=10,labels_fontsize:int=12,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
plot the varience of each factor.
Arguments:
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The size of ticks.
labels_fontsize: The size of labels.
save: Whether to save the figure.
Returns:
fig: The figure of varience.
ax: The axes of varience.
"""
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(self.r2,cmap=cmap,ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.ylabel('Factor',fontsize=labels_fontsize)
plt.xlabel('View',fontsize=labels_fontsize)
plt.title('Varience',fontsize=labels_fontsize)
if save:
fig.savefig("mofa_varience.png",dpi=300,bbox_inches = 'tight')
return fig,ax
get_cor(adata, cluster, factor_list=None)
¶
get the correlation of each factor with cluster type in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
cluster |
str
|
The cluster type. |
required |
factor_list |
The list of factors. |
None
|
Returns:
Name | Type | Description |
---|---|---|
plot_data1 |
pd.DataFrame
|
The correlation of each factor with cluster type. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def get_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None)->pd.DataFrame:
"""
get the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type.
factor_list: The list of factors.
Returns:
plot_data1: The correlation of each factor with cluster type.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
return plot_data1
plot_cor(adata, cluster, factor_list=None, figsize=(6, 3), cmap='Purples', ticks_fontsize=10, labels_fontsize=12, title='Correlation', save=False)
¶
Plot the correlation of each factor with cluster type in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object in MOFA pre trained. |
required |
cluster |
str
|
The cluster type in adata.obs. |
required |
factor_list |
The list of factors. |
None
|
|
figsize |
tuple
|
The size of figure. |
(6, 3)
|
cmap |
str
|
The color map. |
'Purples'
|
ticks_fontsize |
int
|
The font size of ticks. |
10
|
labels_fontsize |
int
|
The font size of labels. |
12
|
title |
str
|
The title of figure. |
'Correlation'
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of correlation. |
ax |
matplotlib.axes._axes.Axes
|
The axes of correlation. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_cor(self,adata:anndata.AnnData,cluster:str,factor_list=None,figsize:tuple=(6,3),
cmap:str='Purples',ticks_fontsize:int=10,labels_fontsize:int=12,title:str='Correlation',
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the correlation of each factor with cluster type in anndata object.
Arguments:
adata: The anndata object in MOFA pre trained.
cluster: The cluster type in adata.obs.
factor_list: The list of factors.
figsize: The size of figure.
cmap: The color map.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
title: The title of figure.
save: Whether to save the figure.
Returns:
fig: The figure of correlation.
ax: The axes of correlation.
"""
if factor_list==None:
factor_list=[i+1 for i in range(self.r2.shape[0])]
plot_data1=factor_correlation(adata=adata,cluster=cluster,factor_list=factor_list)
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(plot_data1,cmap=cmap,ax=ax,square=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
plt.xlabel('Factor',fontsize=labels_fontsize)
plt.ylabel(cluster,fontsize=labels_fontsize)
plt.title(title,fontsize=labels_fontsize)
if save:
fig.savefig("mofa_cor.png",dpi=300,bbox_inches = 'tight')
return fig,ax
plot_factor(adata, cluster, title, figsize=(3, 3), factor1=1, factor2=2, palette=None, save=False)
¶
Plot the factor of MOFA in anndata object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The anndata object. |
required |
cluster |
str
|
The cluster type in adata.obs. |
required |
title |
str
|
The title of figure. |
required |
figsize |
tuple
|
The size of figure. |
(3, 3)
|
factor1 |
int
|
The first factor. |
1
|
factor2 |
int
|
The second factor. |
2
|
palette |
list
|
The color map. |
None
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of factor. |
ax |
matplotlib.axes._axes.Axes
|
The axes of factor. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_factor(self,adata:anndata.AnnData,cluster:str,title:str,figsize:tuple=(3,3),
factor1:int=1,factor2:int=2,palette:list=None,
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the factor of MOFA in anndata object.
Arguments:
adata: The anndata object.
cluster: The cluster type in adata.obs.
title: The title of figure.
figsize: The size of figure.
factor1: The first factor.
factor2: The second factor.
palette: The color map.
save: Whether to save the figure.
Returns:
fig: The figure of factor.
ax: The axes of factor.
"""
if 'X_mofa' not in adata.obsm.keys():
self.get_factors(adata)
if palette==None:
palette=pyomic_palette()
fig, ax = plt.subplots(figsize=figsize)
#factor1,factor2=4,6
sc.pl.embedding(
adata=adata,
basis='X_mofa',
color=cluster,
title=title,
components="{},{}".format(factor1,factor2),
palette=palette,
ncols=1,
ax=ax
)
if save:
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weight_gene_d1(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)
¶
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
The view of MOFA. |
required |
factor1 |
int
|
The first factor. |
required |
factor2 |
int
|
The second factor. |
required |
colors_dict |
dict
|
The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'} |
None
|
plot_gene_num |
int
|
The number of genes to plot. |
5
|
title |
str
|
The title of figure. |
''
|
title_fontsize |
int
|
The font size of title. |
12
|
ticks_fontsize |
int
|
The font size of ticks. |
12
|
labels_fontsize |
int
|
The font size of labels. |
12
|
weith_threshold |
float
|
The threshold of weight. |
0.5
|
figsize |
tuple
|
The size of figure. |
(3, 3)
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of weight. |
ax |
matplotlib.axes._axes.Axes
|
The axes of weight. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d1(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 1.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict of up, down and normal. default is {'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold),'sig']='down'
if colors_dict==None:
colors_dict={'normal':'#c2c2c2','up':'#a51616','down':'#0d6a3b'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color=colors_dict['up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color=colors_dict['down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
[colors_dict['up'],colors_dict['down']]):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weight_gene_d2(view, factor1, factor2, colors_dict=None, plot_gene_num=5, title='', title_fontsize=12, ticks_fontsize=12, labels_fontsize=12, weith_threshold=0.5, figsize=(3, 3), save=False)
¶
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
The view of MOFA. |
required |
factor1 |
int
|
The first factor. |
required |
factor2 |
int
|
The second factor. |
required |
colors_dict |
dict
|
The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'} |
None
|
plot_gene_num |
int
|
The number of genes to plot. |
5
|
title |
str
|
The title of figure. |
''
|
title_fontsize |
int
|
The font size of title. |
12
|
ticks_fontsize |
int
|
The font size of ticks. |
12
|
labels_fontsize |
int
|
The font size of labels. |
12
|
weith_threshold |
float
|
The threshold of weight. |
0.5
|
figsize |
tuple
|
The size of figure. |
(3, 3)
|
save |
bool
|
Whether to save the figure. |
False
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure of weight. |
ax |
matplotlib.axes._axes.Axes
|
The axes of weight. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_weight_gene_d2(self,view:str,factor1:int,factor2:int,
colors_dict:dict=None,plot_gene_num:int=5,title:str='',title_fontsize:int=12,
ticks_fontsize:int=12,labels_fontsize:int=12,
weith_threshold:float=0.5,figsize:tuple=(3,3),
save:bool=False)->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
"""
Plot the weight of gene in each factor of MOFA in anndata object in dimension 2.
Arguments:
view: The view of MOFA.
factor1: The first factor.
factor2: The second factor.
colors_dict: The color dict. default is {'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
plot_gene_num: The number of genes to plot.
title: The title of figure.
title_fontsize: The font size of title.
ticks_fontsize: The font size of ticks.
labels_fontsize: The font size of labels.
weith_threshold: The threshold of weight.
figsize: The size of figure.
save: Whether to save the figure.
Returns:
fig: The figure of weight.
ax: The axes of weight.
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
#factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]>weith_threshold),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-weith_threshold)&(plot_data3['factor_{}'.format(factor2)]<-weith_threshold),'sig']='down-down'
if colors_dict==None:
colors_dict={'up-up':'#a51616','up-down':'#e25d5d','down-up':'#1a6e1a','down-down':'#5de25d','normal':'#c2c2c2'}
fig, ax = plt.subplots(figsize=figsize)
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color=colors_dict['normal'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color=colors_dict['up-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color=colors_dict['up-down'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color=colors_dict['down-up'],alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color=colors_dict['down-down'],alpha=0.5)
plt.vlines(x=weith_threshold,ymin=-1,ymax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.hlines(y=weith_threshold,xmin=-1,xmax=1,color=colors_dict['up-up'],linestyles='dashed')
plt.vlines(x=-weith_threshold,ymin=-1,ymax=1,color=colors_dict['down-down'],linestyles='dashed')
plt.hlines(y=-weith_threshold,xmin=-1,xmax=1,color=colors_dict['down-down'],linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=labels_fontsize)
plt.ylabel('factor_{}'.format(factor2),fontsize=labels_fontsize)
plt.xticks(fontsize=ticks_fontsize)
plt.yticks(fontsize=ticks_fontsize)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
[colors_dict['up-up'],colors_dict['up-down'],colors_dict['down-up'],colors_dict['down-down']]):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:plot_gene_num]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title(title,fontsize=title_fontsize)
if save:
fig.savefig("factor_gene_{}.png".format(title),dpi=300,bbox_inches = 'tight')
return fig,ax
plot_weights(view, factors=None, n_features=5, w_scaled=False, w_abs=False, size=2, color='black', label_size=5, x_offset=0.01, y_offset=0.15, jitter=0.01, line_width=0.5, line_color='black', line_alpha=0.2, zero_line=True, zero_line_width=1, ncols=4, sharex=True, sharey=False, feature_label=None, **kwargs)
¶
Plot weights for MOFA factors.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
Name of the modality/view. |
required |
factors |
List of factors to plot (all by default). |
None
|
|
n_features |
int
|
Number of features with largest weights to label. |
5
|
w_scaled |
bool
|
Whether to scale weights to unit variance. |
False
|
w_abs |
bool
|
Whether to plot absolute weight values. |
False
|
size |
float
|
Dot size. |
2
|
color |
str
|
Color for labeled dots. |
'black'
|
label_size |
float
|
Font size of feature labels. |
5
|
x_offset |
float
|
Offset for feature labels from left/right side. |
0.01
|
y_offset |
float
|
Parameter to repel feature labels along y axis. |
0.15
|
jitter |
float
|
Whether to jitter dots per factors. |
0.01
|
line_width |
float
|
Width of lines connecting labels with dots. |
0.5
|
line_color |
str
|
Color of lines connecting labels with dots. |
'black'
|
line_alpha |
float
|
Alpha level for lines connecting labels with dots. |
0.2
|
zero_line |
bool
|
Whether to plot dotted line at zero. |
True
|
zero_line_width |
float
|
Width of zero line. |
1
|
ncols |
int
|
Number of columns in grid of multiple plots. |
4
|
sharex |
bool
|
Whether to use same X axis across panels. |
True
|
sharey |
bool
|
Whether to use same Y axis across panels. |
False
|
feature_label |
str
|
Column name in var containing feature labels. |
None
|
**kwargs |
Additional arguments passed to seaborn plotting functions. |
{}
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
The figure object. |
ax |
matplotlib.axes._axes.Axes
|
The axis object. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_weights(self,view:str,factors=None,n_features: int = 5,
w_scaled: bool = False,
w_abs: bool = False,
size: float = 2,
color: str = "black",
label_size: float = 5,
x_offset: float = 0.01,
y_offset: float = 0.15,
jitter: float = 0.01,
line_width: float = 0.5,
line_color: str = "black",
line_alpha: float = 0.2,
zero_line: bool = True,
zero_line_width: float = 1,
ncols: int = 4,
sharex: bool = True,
sharey: bool = False,
feature_label: str = None,
**kwargs) -> Tuple[matplotlib.figure.Figure, matplotlib.axes._axes.Axes]:
r"""
Plot weights for MOFA factors.
Arguments:
view: Name of the modality/view.
factors: List of factors to plot (all by default).
n_features: Number of features with largest weights to label.
w_scaled: Whether to scale weights to unit variance.
w_abs: Whether to plot absolute weight values.
size: Dot size.
color: Color for labeled dots.
label_size: Font size of feature labels.
x_offset: Offset for feature labels from left/right side.
y_offset: Parameter to repel feature labels along y axis.
jitter: Whether to jitter dots per factors.
line_width: Width of lines connecting labels with dots.
line_color: Color of lines connecting labels with dots.
line_alpha: Alpha level for lines connecting labels with dots.
zero_line: Whether to plot dotted line at zero.
zero_line_width: Width of zero line.
ncols: Number of columns in grid of multiple plots.
sharex: Whether to use same X axis across panels.
sharey: Whether to use same Y axis across panels.
feature_label: Column name in var containing feature labels.
**kwargs: Additional arguments passed to seaborn plotting functions.
Returns:
fig: The figure object.
ax: The axis object.
"""
if view not in self.model_path:
raise ValueError(f"View {view} not found in MOFA model")
if 'mofa_weights' not in self.model_path:
raise ValueError(f"Weights not found in MOFA model")
# Get weights
weights = get_weights(hdf5_path=self.model_path,view=view,factor=factors)
# Get feature labels
if feature_label is not None and feature_label in self.model_path:
feature_names = get_weights(hdf5_path=self.model_path,view=view,factor=factors)['feature']
else:
feature_names = get_weights(hdf5_path=self.model_path,view=view,factor=factors)['feature']
# Filter factors if specified
if factors is not None:
factor_names = [f'Factor{i}' if isinstance(i, int) else i for i in factors]
weights = weights[factor_names]
# Scale weights if requested
if w_scaled:
weights = weights / weights.abs().max()
# Convert to absolute values if requested
if w_abs:
weights = weights.abs()
# Melt the DataFrame for plotting
wm = weights.reset_index().melt(
id_vars='index',
var_name='factor',
value_name='value'
)
wm['feature'] = wm['index'].map(lambda x: feature_names[x])
wm['abs_value'] = abs(wm['value'])
# Sort factors
wm['factor'] = wm['factor'].astype('category')
wm['factor'] = wm['factor'].cat.reorder_categories(
sorted(wm['factor'].cat.categories, key=lambda x: int(x.split('Factor')[1]))
)
# Get features to label
features_to_label = []
for factor in wm['factor'].unique():
factor_data = wm[wm['factor'] == factor].sort_values('abs_value', ascending=False)
features_to_label.extend(factor_data['feature'].head(n_features))
wm['to_label'] = wm['feature'].isin(features_to_label)
# Create plot
fig, ax = plt.subplots(figsize=(10, 6))
# Create stripplot
g = sns.stripplot(
data=wm,
x='value',
y='factor',
jitter=jitter,
size=size,
hue='to_label',
palette=['lightgrey', color],
ax=ax
)
# Remove legend
g.legend().remove()
# Add feature labels
for fi, factor in enumerate(wm['factor'].unique()):
for sign_i in [1, -1]:
to_label = wm[(wm['factor'] == factor) &
(wm['to_label']) &
(wm['value'] * sign_i > 0)].sort_values('abs_value', ascending=False)
if len(to_label) == 0:
continue
x_start_pos = sign_i * (to_label['abs_value'].max() + x_offset)
y_start_pos = fi - ((len(to_label) - 1) // 2) * y_offset
y_prev = y_start_pos
for i, (_, point) in enumerate(to_label.iterrows()):
y_loc = y_prev + y_offset if i != 0 else y_start_pos
g.annotate(
point['feature'],
xy=(point['value'], fi),
xytext=(x_start_pos, y_loc),
arrowprops=dict(
arrowstyle='-',
connectionstyle='arc3',
color=line_color,
alpha=line_alpha,
linewidth=line_width
),
horizontalalignment='left' if sign_i > 0 else 'right',
size=label_size,
color='black',
weight='regular',
alpha=0.9
)
y_prev = y_loc
# Add zero line
if zero_line:
ax.axvline(0, ls='--', color='lightgrey', linewidth=zero_line_width, zorder=0)
# Customize plot
sns.despine(offset=10, trim=True)
ax.set_xlabel('Feature weight')
ax.set_ylabel('')
ax.set_title(view)
return fig, ax
plot_top_feature_dotplot(view, cmap='bwr', n_genes=3)
¶
Plot the top features of each factor in dotplot
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
cmap |
str
|
str, the color map of the plot |
'bwr'
|
n_genes |
int
|
int, the number of genes to plot |
3
|
Returns:
Name | Type | Description |
---|---|---|
axes |
list
|
the list of the figure |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_top_feature_dotplot(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
plot_top_feature_heatmap(view, cmap='bwr', n_genes=3)
¶
Plot the top features of each factor in dotplot
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
cmap |
str
|
str, the color map of the plot |
'bwr'
|
n_genes |
int
|
int, the number of genes to plot |
3
|
Returns:
Name | Type | Description |
---|---|---|
axes |
list
|
the list of the figure |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def plot_top_feature_heatmap(self,view:str,cmap:str='bwr',n_genes:int=3)->list:
"""
Plot the top features of each factor in dotplot
Arguments:
view: str, the view of the factor
cmap: str, the color map of the plot
n_genes: int, the number of genes to plot
Returns:
axes: the list of the figure
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_matrixplot(adata1, n_genes=n_genes,
cmap=cmap,show=False)
return ax
get_top_feature(view, log2fc_min=3, pval_cutoff=0.1)
¶
Get the top features of each factor
Parameters:
Name | Type | Description | Default |
---|---|---|---|
view |
str
|
str, the view of the factor |
required |
log2fc_min |
int
|
float, the minimum log2fc of the feature |
3
|
pval_cutoff |
float
|
float, the maximum pval of the feature |
0.1
|
Returns:
Name | Type | Description |
---|---|---|
top_feature |
dict
|
dict, the top features of each factor |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def get_top_feature(self,view:str,log2fc_min:int=3,pval_cutoff:float=0.1)->dict:
"""
Get the top features of each factor
Arguments:
view: str, the view of the factor
log2fc_min: float, the minimum log2fc of the feature
pval_cutoff: float, the maximum pval of the feature
Returns:
top_feature: dict, the top features of each factor
"""
factor_w=pd.DataFrame()
for i in range(self.factors.shape[1]):
f1_w=get_weights(hdf5_path=self.model_path,view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
top_feature=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=log2fc_min,pval_cutoff=pval_cutoff)
return top_feature
omicverse.single.GLUE_pair
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
class GLUE_pair(object):
def __init__(self,rna:anndata.AnnData,
atac:anndata.AnnData) -> None:
r"""Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: The AnnData of RNA-seq.
atac: The AnnData of ATAC-seq.
Returns:
None
"""
print('......Extract GLUE layer from obs')
self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
def correlation(self):
r"""Perform Pearson Correlation analysis in the layer of GLUE.
Returns:
None: Updates self.rna_pd and self.atac_pd attributes
"""
print('......Prepare for pair')
import gc
len1=(len(self.rna_loc)//5000)+1
len2=(len(self.atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(self.atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc)))
del c
gc.collect()
self.rna_pd=p_pd
self.atac_pd=n_pd
def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
r"""Find the neighbor cells between two omics using pearson correlation.
Arguments:
depth: The depth of the search for the nearest neighbor. (10)
cor: Correlation threshold for pairing. (0.9)
Returns:
result: The pair result as DataFrame
"""
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
p_pd=self.rna_pd.copy()
n_pd=self.atac_pd.copy()
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
self.pair_res=result
return result
def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
r"""Pair the omics using the result of find_neighbor_cell.
Arguments:
omic1: The AnnData of first omic.
omic2: The AnnData of second omic.
Returns:
rna1: The paired AnnData of first omic.
atac1: The paired AnnData of second omic.
"""
rna1=omic1[self.res_pair['omic_1']].copy()
atac1=omic2[self.res_pair['omic_2']].copy()
rna1.obs.index=self.res_pair.index
atac1.obs.index=self.res_pair.index
return rna1,atac1
__init__(rna, atac)
¶
Pair the cells between RNA and ATAC using result of GLUE.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rna |
anndata.AnnData
|
The AnnData of RNA-seq. |
required |
atac |
anndata.AnnData
|
The AnnData of ATAC-seq. |
required |
Returns:
Type | Description |
---|---|
None
|
None |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def __init__(self,rna:anndata.AnnData,
atac:anndata.AnnData) -> None:
r"""Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: The AnnData of RNA-seq.
atac: The AnnData of ATAC-seq.
Returns:
None
"""
print('......Extract GLUE layer from obs')
self.rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
self.atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
correlation()
¶
Perform Pearson Correlation analysis in the layer of GLUE.
Returns:
Name | Type | Description |
---|---|---|
None | Updates self.rna_pd and self.atac_pd attributes |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def correlation(self):
r"""Perform Pearson Correlation analysis in the layer of GLUE.
Returns:
None: Updates self.rna_pd and self.atac_pd attributes
"""
print('......Prepare for pair')
import gc
len1=(len(self.rna_loc)//5000)+1
len2=(len(self.atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=self.rna_loc.iloc[5000*(i):5000*(i+1)]
t2=self.atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(self.atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(self.atac_loc)))
del c
gc.collect()
self.rna_pd=p_pd
self.atac_pd=n_pd
find_neighbor_cell(depth=10, cor=0.9)
¶
Find the neighbor cells between two omics using pearson correlation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
depth |
int
|
The depth of the search for the nearest neighbor. (10) |
10
|
cor |
float
|
Correlation threshold for pairing. (0.9) |
0.9
|
Returns:
Name | Type | Description |
---|---|---|
result |
pd.DataFrame
|
The pair result as DataFrame |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def find_neighbor_cell(self,depth:int=10,cor:float=0.9)->pd.DataFrame:
r"""Find the neighbor cells between two omics using pearson correlation.
Arguments:
depth: The depth of the search for the nearest neighbor. (10)
cor: Correlation threshold for pairing. (0.9)
Returns:
result: The pair result as DataFrame
"""
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
p_pd=self.rna_pd.copy()
n_pd=self.atac_pd.copy()
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>cor]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
self.pair_res=result
return result
pair_omic(omic1, omic2)
¶
Pair the omics using the result of find_neighbor_cell.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
omic1 |
anndata.AnnData
|
The AnnData of first omic. |
required |
omic2 |
anndata.AnnData
|
The AnnData of second omic. |
required |
Returns:
Name | Type | Description |
---|---|---|
rna1 |
anndata.AnnData
|
The paired AnnData of first omic. |
atac1 |
anndata.AnnData
|
The paired AnnData of second omic. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def pair_omic(self,omic1:anndata.AnnData,omic2:anndata.AnnData)->Tuple[anndata.AnnData,anndata.AnnData]:
r"""Pair the omics using the result of find_neighbor_cell.
Arguments:
omic1: The AnnData of first omic.
omic2: The AnnData of second omic.
Returns:
rna1: The paired AnnData of first omic.
atac1: The paired AnnData of second omic.
"""
rna1=omic1[self.res_pair['omic_1']].copy()
atac1=omic2[self.res_pair['omic_2']].copy()
rna1.obs.index=self.res_pair.index
atac1.obs.index=self.res_pair.index
return rna1,atac1
omicverse.single.factor_exact(adata, hdf5_path)
¶
Extract the factor information from hdf5 file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The AnnData object. |
required |
hdf5_path |
str
|
The path of hdf5 file. |
required |
Returns:
Name | Type | Description |
---|---|---|
adata |
anndata.AnnData
|
The AnnData object with factor information. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def factor_exact(adata:anndata.AnnData,hdf5_path:str)->anndata.AnnData:
r"""
Extract the factor information from hdf5 file.
Arguments:
adata: The AnnData object.
hdf5_path: The path of hdf5 file.
Returns:
adata: The AnnData object with factor information.
"""
f_pos = h5py.File(hdf5_path,'r')
g_name=f_pos['groups']['groups'][:][0]
for i in range(f_pos['expectations']['Z'][g_name].shape[0]):
adata.obs['factor{0}'.format(i+1)]=f_pos['expectations']['Z'][g_name][i]
return adata
omicverse.single.factor_correlation(adata, cluster, factor_list, p_threshold=500)
¶
Calculate the correlation between factors and cluster.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
adata |
anndata.AnnData
|
The AnnData object. |
required |
cluster |
str
|
The name of cluster. |
required |
factor_list |
list
|
The list of factors. |
required |
p_threshold |
int
|
The threshold of p-value. |
500
|
Returns:
Name | Type | Description |
---|---|---|
cell_pd |
pd.DataFrame
|
The correlation between factors and cluster. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def factor_correlation(adata:anndata.AnnData,cluster:str,
factor_list:list,p_threshold:int=500)->pd.DataFrame:
r"""
Calculate the correlation between factors and cluster.
Arguments:
adata: The AnnData object.
cluster: The name of cluster.
factor_list: The list of factors.
p_threshold: The threshold of p-value.
Returns:
cell_pd: The correlation between factors and cluster.
"""
plot_data=adata.obs
cell_t=list(set(plot_data[cluster]))
cell_pd=pd.DataFrame(index=cell_t)
for i in factor_list:
test=[]
for j in cell_t:
a=plot_data[plot_data[cluster]==j]['factor'+str(i)].values
b=plot_data[~(plot_data[cluster]==j)]['factor'+str(i)].values
t, p = stats.ttest_ind(a,b)
logp=-np.log(p)
if(logp>p_threshold):
logp=p_threshold
test.append(logp)
cell_pd['factor'+str(i)]=test
return cell_pd
omicverse.single.get_weights(hdf5_path, view, factor, scale=True)
¶
Get the weights of each feature in a specific factor.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
hdf5_path |
str
|
the path of hdf5 file. |
required |
view |
str
|
the name of view. |
required |
factor |
int
|
the number of factor. |
required |
scale |
bool
|
whether to scale the weights. |
True
|
Returns:
Name | Type | Description |
---|---|---|
res |
pd.DataFrame
|
the weights of each feature in a specific factor. |
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def get_weights(hdf5_path:str,view:str,
factor:int,scale:bool=True)->pd.DataFrame:
r"""
Get the weights of each feature in a specific factor.
Arguments:
hdf5_path: the path of hdf5 file.
view: the name of view.
factor: the number of factor.
scale: whether to scale the weights.
Returns:
res: the weights of each feature in a specific factor.
"""
f = h5py.File(hdf5_path,'r')
view_names=f['views']['views'][:]
group_names=f['groups']['groups'][:]
feature_names={view: f['features'][view][:] for view in view_names}
#sample_names=np.array([f['samples'][i][:] for i in group_names])
f_name=feature_names[str.encode(view)]
f_w=f['expectations']['W'][view][factor-1]
if scale==True:
f_w=normalization(f_w)
res=pd.DataFrame()
res['feature']=f_name
res['weights']=f_w
res['abs_weights']=abs(f_w)
res['sig']='+'
res.loc[(res.weights<0),'sig'] = '-'
return res
omicverse.single.glue_pair(rna, atac, depth=20)
¶
Pair the cells between RNA and ATAC using result of GLUE.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rna |
anndata.AnnData
|
the AnnData of RNA-seq. |
required |
atac |
anndata.AnnData
|
the AnnData of ATAC-seq. |
required |
depth |
int
|
the depth of the search for the nearest neighbor. |
20
|
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_mofa.py
def glue_pair(rna:anndata.AnnData,
atac:anndata.AnnData,depth:int=20)->pd.DataFrame:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
#提取GLUE层结果
print('......Extract GLUE layer from obs')
rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
#对GLUE层进行Pearson系数分析
print('......Prepare for pair')
import gc
len1=(len(rna_loc)//5000)+1
len2=(len(atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=rna_loc.iloc[5000*(i):5000*(i+1)]
t2=atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc)))
del c
gc.collect()
#寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
def find_neighbor_cell(p_pd,n_pd,depth=10):
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
return result
print('......Start to find neighbor')
res_pair=find_neighbor_cell(p_pd,n_pd,depth=depth)
return res_pair