Api enrichment
omicverse.bulk.pyGSE
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
class pyGSE(object):
def __init__(self,gene_list:list,pathways_dict:dict,pvalue_threshold:float=0.05,pvalue_type:str='auto',
background=None,organism:str='Human',description:str='None',outdir:str='./enrichr',cutoff:float=0.5) -> None:
"""Initialize the pyGSE class.
Arguments:
gene_list: A list of genes.
pathways_dict: A dictionary of pathways.
pvalue_threshold: The p-value threshold for enrichment. Default is 0.05.
pvalue_type: The p-value type. Default is 'auto'.
organism: The organism. Default is 'Human'.
description: The description. Default is 'None'.
outdir: The output directory. Default is './enrichr'.
cutoff: The cutoff for enrichment. Default is 0.5.
"""
self.gene_list=gene_list
self.pathways_dict=pathways_dict
self.pvalue_threshold=pvalue_threshold
self.pvalue_type=pvalue_type
self.organism=organism
self.description=description
self.outdir=outdir
self.cutoff=cutoff
if background is None:
if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
background='mmusculus_gene_ensembl'
elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
background='hsapiens_gene_ensembl'
self.background=background
else:
self.background=background
def enrichment(self):
"""gene set enrichment analysis.
Returns:
A pandas.DataFrame object containing the enrichment results.
"""
enrich_res=geneset_enrichment(self.gene_list,self.pathways_dict,self.pvalue_threshold,self.pvalue_type,
self.organism,self.description,self.background,self.outdir,self.cutoff)
self.enrich_res=enrich_res
return enrich_res
def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
cax_loc:int=2,cax_fontsize:int=12,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:
"""Plot the gene set enrichment result.
Arguments:
num: The number of enriched terms to plot. Default is 10.
node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
cax_loc: The location of the colorbar on the plot. Default is 2.
cax_fontsize: The fontsize of the colorbar label. Default is 12.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
Returns:
A matplotlib.axes.Axes object.
"""
return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)
__init__(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', background=None, organism='Human', description='None', outdir='./enrichr', cutoff=0.5)
¶
Initialize the pyGSE class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
gene_list |
list
|
A list of genes. |
required |
pathways_dict |
dict
|
A dictionary of pathways. |
required |
pvalue_threshold |
float
|
The p-value threshold for enrichment. Default is 0.05. |
0.05
|
pvalue_type |
str
|
The p-value type. Default is 'auto'. |
'auto'
|
organism |
str
|
The organism. Default is 'Human'. |
'Human'
|
description |
str
|
The description. Default is 'None'. |
'None'
|
outdir |
str
|
The output directory. Default is './enrichr'. |
'./enrichr'
|
cutoff |
float
|
The cutoff for enrichment. Default is 0.5. |
0.5
|
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def __init__(self,gene_list:list,pathways_dict:dict,pvalue_threshold:float=0.05,pvalue_type:str='auto',
background=None,organism:str='Human',description:str='None',outdir:str='./enrichr',cutoff:float=0.5) -> None:
"""Initialize the pyGSE class.
Arguments:
gene_list: A list of genes.
pathways_dict: A dictionary of pathways.
pvalue_threshold: The p-value threshold for enrichment. Default is 0.05.
pvalue_type: The p-value type. Default is 'auto'.
organism: The organism. Default is 'Human'.
description: The description. Default is 'None'.
outdir: The output directory. Default is './enrichr'.
cutoff: The cutoff for enrichment. Default is 0.5.
"""
self.gene_list=gene_list
self.pathways_dict=pathways_dict
self.pvalue_threshold=pvalue_threshold
self.pvalue_type=pvalue_type
self.organism=organism
self.description=description
self.outdir=outdir
self.cutoff=cutoff
if background is None:
if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
background='mmusculus_gene_ensembl'
elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
background='hsapiens_gene_ensembl'
self.background=background
else:
self.background=background
enrichment()
¶
gene set enrichment analysis.
Returns:
Type | Description |
---|---|
A pandas.DataFrame object containing the enrichment results. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def enrichment(self):
"""gene set enrichment analysis.
Returns:
A pandas.DataFrame object containing the enrichment results.
"""
enrich_res=geneset_enrichment(self.gene_list,self.pathways_dict,self.pvalue_threshold,self.pvalue_type,
self.organism,self.description,self.background,self.outdir,self.cutoff)
self.enrich_res=enrich_res
return enrich_res
plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)
¶
Plot the gene set enrichment result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
num |
int
|
The number of enriched terms to plot. Default is 10. |
10
|
node_size |
list
|
A list of integers defining the size of nodes in the plot. Default is [5,10,15]. |
[5, 10, 15]
|
cax_loc |
int
|
The location of the colorbar on the plot. Default is 2. |
2
|
cax_fontsize |
int
|
The fontsize of the colorbar label. Default is 12. |
12
|
fig_title |
str
|
The title of the plot. Default is an empty string. |
''
|
fig_xlabel |
str
|
The label of the x-axis. Default is 'Fractions of genes'. |
'Fractions of genes'
|
figsize |
tuple
|
The size of the plot. Default is (2,4). |
(2, 4)
|
cmap |
str
|
The colormap to use for the plot. Default is 'YlGnBu'. |
'YlGnBu'
|
Returns:
Type | Description |
---|---|
matplotlib.axes._axes.Axes
|
A matplotlib.axes.Axes object. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
cax_loc:int=2,cax_fontsize:int=12,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:
"""Plot the gene set enrichment result.
Arguments:
num: The number of enriched terms to plot. Default is 10.
node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
cax_loc: The location of the colorbar on the plot. Default is 2.
cax_fontsize: The fontsize of the colorbar label. Default is 12.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
Returns:
A matplotlib.axes.Axes object.
"""
return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)
omicverse.bulk.pyGSEA
¶
Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
class pyGSEA(object):
def __init__(self,gene_rnk:pd.DataFrame,pathways_dict:dict,
processes:int=8,permutation_num:int=100,
outdir:str='./enrichr_gsea',cutoff:float=0.5) -> None:
"""Initialize the pyGSEA class.
Arguments:
gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
processes: Number of Processes you are going to use. Default: 8.
permutation_num: Number of permutations for significance computation. Default: 100.
outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
cutoff: The cutoff for enrichment. Default is 0.5.
"""
self.gene_rnk=gene_rnk
self.pathways_dict=pathways_dict
self.processes=processes
self.permutation_num=permutation_num
self.outdir=outdir
self.cutoff=cutoff
def enrichment(self,format:str='png', pval=0.05,seed:int=112)->pd.DataFrame:
"""gene set enrichment analysis.
Arguments:
format: Matplotlib figure format. Default: 'png'.
seed: Random seed. Default: 112.
Returns:
enrich_res:A pandas.DataFrame object containing the enrichment results.
"""
pre_res=geneset_enrichment_GSEA(self.gene_rnk,self.pathways_dict,
self.processes,self.permutation_num,
self.outdir,format,seed)
self.pre_res=pre_res
enrich_res=pre_res.res2d[pre_res.res2d['fdr']<pval]
enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
enrich_res['logc']=enrich_res['nes']
enrich_res['num']=enrich_res['matched_size']
enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
enrich_res['Term']=enrich_res.index.tolist()
enrich_res['P-value']=enrich_res['fdr']
self.enrich_res=enrich_res
return enrich_res
def plot_gsea(self,term_num:int=0,
gene_set_title:str='',
figsize:tuple=(3,4),
cmap:str='RdBu_r',
title_fontsize:int=12,
title_y:float=0.95)->matplotlib.figure.Figure:
"""Plot the gene set enrichment result.
Arguments:
term_num: The number of enriched terms to plot. Default is 0.
gene_set_title: The title of the plot. Default is an empty string.
figsize: The size of the plot. Default is (3,4).
cmap: The colormap to use for the plot. Default is 'RdBu_r'.
title_fontsize: The fontsize of the title. Default is 12.
title_y: The y coordinate of the title. Default is 0.95.
Returns:
fig: A matplotlib.figure.Figure object.
"""
from gseapy.plot import GSEAPlot
terms = self.enrich_res.index
g = GSEAPlot(
rank_metric=self.pre_res.ranking, term=terms[term_num],figsize=figsize,cmap=cmap,
**self.pre_res.results[terms[term_num]]
)
if gene_set_title=='':
g.fig.suptitle(terms[term_num],fontsize=title_fontsize,y=title_y)
else:
g.fig.suptitle(gene_set_title,fontsize=title_fontsize,y=title_y)
g.add_axes()
return g.fig
def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
cax_loc:int=2,cax_fontsize:int=12,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',
text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:
"""Plot the gene set enrichment result.
Arguments:
num: The number of enriched terms to plot. Default is 10.
node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
cax_loc: The location of the colorbar on the plot. Default is 2.
cax_fontsize: The fontsize of the colorbar label. Default is 12.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
text_knock: The number of terms to knock out for text labels. Default is 2.
text_maxsize: The maximum fontsize of text labels. Default is 20.
Returns:
ax: A matplotlib.axes.Axes object.
"""
return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)
__init__(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', cutoff=0.5)
¶
Initialize the pyGSEA class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
gene_rnk |
pd.DataFrame
|
pre-ranked correlation table or pandas DataFrame. Same input with |
required |
pathways_dict |
dict
|
Dictionary of pathway library names and corresponding Enrichr API URLs. |
required |
processes |
int
|
Number of Processes you are going to use. Default: 8. |
8
|
permutation_num |
int
|
Number of permutations for significance computation. Default: 100. |
100
|
outdir |
str
|
Output directory for Enrichr results. Default is './enrichr_gsea'. |
'./enrichr_gsea'
|
cutoff |
float
|
The cutoff for enrichment. Default is 0.5. |
0.5
|
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def __init__(self,gene_rnk:pd.DataFrame,pathways_dict:dict,
processes:int=8,permutation_num:int=100,
outdir:str='./enrichr_gsea',cutoff:float=0.5) -> None:
"""Initialize the pyGSEA class.
Arguments:
gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
processes: Number of Processes you are going to use. Default: 8.
permutation_num: Number of permutations for significance computation. Default: 100.
outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
cutoff: The cutoff for enrichment. Default is 0.5.
"""
self.gene_rnk=gene_rnk
self.pathways_dict=pathways_dict
self.processes=processes
self.permutation_num=permutation_num
self.outdir=outdir
self.cutoff=cutoff
enrichment(format='png', pval=0.05, seed=112)
¶
gene set enrichment analysis.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
format |
str
|
Matplotlib figure format. Default: 'png'. |
'png'
|
seed |
int
|
Random seed. Default: 112. |
112
|
Returns:
Name | Type | Description |
---|---|---|
enrich_res |
pd.DataFrame
|
A pandas.DataFrame object containing the enrichment results. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def enrichment(self,format:str='png', pval=0.05,seed:int=112)->pd.DataFrame:
"""gene set enrichment analysis.
Arguments:
format: Matplotlib figure format. Default: 'png'.
seed: Random seed. Default: 112.
Returns:
enrich_res:A pandas.DataFrame object containing the enrichment results.
"""
pre_res=geneset_enrichment_GSEA(self.gene_rnk,self.pathways_dict,
self.processes,self.permutation_num,
self.outdir,format,seed)
self.pre_res=pre_res
enrich_res=pre_res.res2d[pre_res.res2d['fdr']<pval]
enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
enrich_res['logc']=enrich_res['nes']
enrich_res['num']=enrich_res['matched_size']
enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
enrich_res['Term']=enrich_res.index.tolist()
enrich_res['P-value']=enrich_res['fdr']
self.enrich_res=enrich_res
return enrich_res
plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)
¶
Plot the gene set enrichment result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
num |
int
|
The number of enriched terms to plot. Default is 10. |
10
|
node_size |
list
|
A list of integers defining the size of nodes in the plot. Default is [5,10,15]. |
[5, 10, 15]
|
cax_loc |
int
|
The location of the colorbar on the plot. Default is 2. |
2
|
cax_fontsize |
int
|
The fontsize of the colorbar label. Default is 12. |
12
|
fig_title |
str
|
The title of the plot. Default is an empty string. |
''
|
fig_xlabel |
str
|
The label of the x-axis. Default is 'Fractions of genes'. |
'Fractions of genes'
|
figsize |
tuple
|
The size of the plot. Default is (2,4). |
(2, 4)
|
cmap |
str
|
The colormap to use for the plot. Default is 'YlGnBu'. |
'YlGnBu'
|
text_knock |
int
|
The number of terms to knock out for text labels. Default is 2. |
2
|
text_maxsize |
int
|
The maximum fontsize of text labels. Default is 20. |
20
|
Returns:
Name | Type | Description |
---|---|---|
ax |
matplotlib.axes._axes.Axes
|
A matplotlib.axes.Axes object. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
cax_loc:int=2,cax_fontsize:int=12,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',
text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:
"""Plot the gene set enrichment result.
Arguments:
num: The number of enriched terms to plot. Default is 10.
node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
cax_loc: The location of the colorbar on the plot. Default is 2.
cax_fontsize: The fontsize of the colorbar label. Default is 12.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
text_knock: The number of terms to knock out for text labels. Default is 2.
text_maxsize: The maximum fontsize of text labels. Default is 20.
Returns:
ax: A matplotlib.axes.Axes object.
"""
return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)
plot_gsea(term_num=0, gene_set_title='', figsize=(3, 4), cmap='RdBu_r', title_fontsize=12, title_y=0.95)
¶
Plot the gene set enrichment result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
term_num |
int
|
The number of enriched terms to plot. Default is 0. |
0
|
gene_set_title |
str
|
The title of the plot. Default is an empty string. |
''
|
figsize |
tuple
|
The size of the plot. Default is (3,4). |
(3, 4)
|
cmap |
str
|
The colormap to use for the plot. Default is 'RdBu_r'. |
'RdBu_r'
|
title_fontsize |
int
|
The fontsize of the title. Default is 12. |
12
|
title_y |
float
|
The y coordinate of the title. Default is 0.95. |
0.95
|
Returns:
Name | Type | Description |
---|---|---|
fig |
matplotlib.figure.Figure
|
A matplotlib.figure.Figure object. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def plot_gsea(self,term_num:int=0,
gene_set_title:str='',
figsize:tuple=(3,4),
cmap:str='RdBu_r',
title_fontsize:int=12,
title_y:float=0.95)->matplotlib.figure.Figure:
"""Plot the gene set enrichment result.
Arguments:
term_num: The number of enriched terms to plot. Default is 0.
gene_set_title: The title of the plot. Default is an empty string.
figsize: The size of the plot. Default is (3,4).
cmap: The colormap to use for the plot. Default is 'RdBu_r'.
title_fontsize: The fontsize of the title. Default is 12.
title_y: The y coordinate of the title. Default is 0.95.
Returns:
fig: A matplotlib.figure.Figure object.
"""
from gseapy.plot import GSEAPlot
terms = self.enrich_res.index
g = GSEAPlot(
rank_metric=self.pre_res.ranking, term=terms[term_num],figsize=figsize,cmap=cmap,
**self.pre_res.results[terms[term_num]]
)
if gene_set_title=='':
g.fig.suptitle(terms[term_num],fontsize=title_fontsize,y=title_y)
else:
g.fig.suptitle(gene_set_title,fontsize=title_fontsize,y=title_y)
g.add_axes()
return g.fig
omicverse.bulk.geneset_enrichment(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', organism='Human', description='None', background=None, outdir='./enrichr', cutoff=0.5)
¶
Performs gene set enrichment analysis using Enrichr API.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
gene_list |
list
|
List of gene symbols to be tested for enrichment. |
required |
pathways_dict |
dict
|
Dictionary of pathway library names and corresponding Enrichr API URLs. |
required |
pvalue_threshold |
float
|
P-value threshold for significant pathways. Default is 0.05. |
0.05
|
pvalue_type |
str
|
Type of p-value correction to use. 'auto' uses Benjamini-Hochberg correction,for small gene sets (<500 genes) and Bonferroni correction for larger gene sets.,'bh' uses only Benjamini-Hochberg correction. 'bonferroni' uses only Bonferroni correction.,Default is 'auto'. |
'auto'
|
organism |
str
|
Organism of the input gene list. Default is 'Human'. |
'Human'
|
description |
str
|
Description of the input gene list. Default is 'None'. |
'None'
|
background |
list
|
Background gene list to use for enrichment analysis. Default is None. If None, the background gene list is automatically set to the organism-specific gene list. |
None
|
outdir |
str
|
Output directory for Enrichr results. Default is './enrichr'. |
'./enrichr'
|
cutoff |
float
|
Show enriched terms which Adjusted P-value < cutoff. Default is 0.5. |
0.5
|
Returns:
Name | Type | Description |
---|---|---|
enrich_res |
pd.DataFrame
|
A pandas DataFrame containing the enrichment results. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def geneset_enrichment(gene_list:list,pathways_dict:dict,
pvalue_threshold:float=0.05,pvalue_type:str='auto',
organism:str='Human',description:str='None',
background:list=None,
outdir:str='./enrichr',cutoff:float=0.5)->pd.DataFrame:
"""
Performs gene set enrichment analysis using Enrichr API.
Arguments:
gene_list: List of gene symbols to be tested for enrichment.
pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
pvalue_threshold: P-value threshold for significant pathways. Default is 0.05.
pvalue_type: Type of p-value correction to use. 'auto' uses Benjamini-Hochberg correction,for small gene sets (<500 genes) and Bonferroni correction for larger gene sets.,'bh' uses only Benjamini-Hochberg correction. 'bonferroni' uses only Bonferroni correction.,Default is 'auto'.
organism: Organism of the input gene list. Default is 'Human'.
description: Description of the input gene list. Default is 'None'.
background: Background gene list to use for enrichment analysis. Default is None. If None, the background gene list is automatically set to the organism-specific gene list.
outdir: Output directory for Enrichr results. Default is './enrichr'.
cutoff: Show enriched terms which Adjusted P-value < cutoff. Default is 0.5.
Returns:
enrich_res: A pandas DataFrame containing the enrichment results.
"""
from ..externel.gseapy import enrichr
#import gseapy as gp
if background is None:
if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
background='mmusculus_gene_ensembl'
elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
background='hsapiens_gene_ensembl'
enr = enrichr(gene_list=gene_list,
gene_sets=pathways_dict,
organism=organism, # don't forget to set organism to the one you desired! e.g. Yeast
description=description,
background=background,
outdir=outdir,
cutoff=cutoff # test dataset, use lower value from range(0,1)
)
if pvalue_type=='auto':
if enr.res2d.shape[0]>100:
enrich_res=enr.res2d[enr.res2d['Adjusted P-value']<pvalue_threshold]
enrich_res['logp']=-np.log(enrich_res['Adjusted P-value'])
else:
enrich_res=enr.res2d[enr.res2d['P-value']<pvalue_threshold]
enrich_res['logp']=-np.log(enrich_res['P-value'])
elif pvalue_type=='adjust':
enrich_res=enr.res2d[enr.res2d['Adjusted P-value']<pvalue_threshold]
enrich_res['logp']=-np.log(enrich_res['Adjusted P-value'])
else:
enrich_res=enr.res2d[enr.res2d['P-value']<pvalue_threshold]
enrich_res['logp']=-np.log(enrich_res['P-value'])
enrich_res['logc']=np.log(enrich_res['Odds Ratio'])
enrich_res['num']=[int(i.split('/')[0]) for i in enrich_res['Overlap']]
enrich_res['fraction']=[int(i.split('/')[0])/int(i.split('/')[1]) for i in enrich_res['Overlap']]
return enrich_res
omicverse.bulk.geneset_enrichment_GSEA(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', format='png', seed=112)
¶
Enrichment analysis using GSEA
Parameters:
Name | Type | Description | Default |
---|---|---|---|
gene_rnk |
pd.DataFrame
|
pre-ranked correlation table or pandas DataFrame. Same input with |
required |
pathways_dict |
dict
|
Dictionary of pathway library names and corresponding Enrichr API URLs. |
required |
processes |
int
|
Number of Processes you are going to use. Default: 8. |
8
|
permutation_num |
int
|
Number of permutations for significance computation. Default: 100. |
100
|
outdir |
str
|
Output directory for Enrichr results. Default is './enrichr_gsea'. |
'./enrichr_gsea'
|
format |
str
|
Matplotlib figure format. Default: 'png'. |
'png'
|
seed |
int
|
Random seed. Default: 112. |
112
|
Returns:
Name | Type | Description |
---|---|---|
pre_res |
dict
|
A prerank object containing the enrichment results. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def geneset_enrichment_GSEA(gene_rnk:pd.DataFrame,pathways_dict:dict,
processes:int=8,
permutation_num:int=100, # reduce number to speed up testing
outdir:str='./enrichr_gsea', format:str='png', seed:int=112)->dict:
"""
Enrichment analysis using GSEA
Arguments:
gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
processes: Number of Processes you are going to use. Default: 8.
permutation_num: Number of permutations for significance computation. Default: 100.
outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
format: Matplotlib figure format. Default: 'png'.
seed: Random seed. Default: 112.
Returns:
pre_res: A prerank object containing the enrichment results.
"""
from ..externel.gseapy import prerank
pre_res = prerank(rnk=gene_rnk, gene_sets=pathways_dict,
processes=processes,
permutation_num=permutation_num, # reduce number to speed up testing
outdir=outdir, format=format, seed=seed)
return pre_res
enrich_res=pre_res.res2d[pre_res.res2d['fdr']<0.05]
enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
enrich_res['logc']=enrich_res['nes']
enrich_res['num']=enrich_res['matched_size']
enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
enrich_res['Term']=enrich_res.index.tolist()
enrich_res['P-value']=enrich_res['fdr']
return enrich_res
omicverse.bulk.geneset_plot(enrich_res, num=10, node_size=[5, 10, 15], cax_loc=[2, 0.55, 0.5, 0.02], cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=5, text_maxsize=20, bbox_to_anchor_used=(-0.45, -13), node_diameter=10, custom_ticks=[5, 10], ax=None)
¶
Plot the gene set enrichment result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
num |
int
|
The number of enriched terms to plot. Default is 10. |
10
|
node_size |
list
|
A list of integers defining the size of nodes in the plot. Default is [5,10,15]. |
[5, 10, 15]
|
cax_loc |
list
|
The location, width and height of the colorbar on the plot. Default is [2, 0.55, 0.5, 0.02]. |
[2, 0.55, 0.5, 0.02]
|
cax_fontsize |
int
|
The fontsize of the colorbar label. Default is 12. |
12
|
fig_title |
str
|
The title of the plot. Default is an empty string. |
''
|
fig_xlabel |
str
|
The label of the x-axis. Default is 'Fractions of genes'. |
'Fractions of genes'
|
figsize |
tuple
|
The size of the plot. Default is (2,4). |
(2, 4)
|
cmap |
str
|
The colormap to use for the plot. Default is 'YlGnBu'. |
'YlGnBu'
|
text_knock |
int
|
The number of characters to knock off the end of the term name. Default is 2. |
5
|
text_maxsize |
int
|
The maximum fontsize of the term names. Default is 20. |
20
|
bbox_to_anchor_used |
tuple
|
The anchor point for placing the legend. Default is (-0.45, -13). |
(-0.45, -13)
|
node_diameter |
int
|
The base size for nodes in the plot. Default is 10. |
10
|
Returns:
Type | Description |
---|---|
matplotlib.axes._axes.Axes
|
A matplotlib.axes.Axes object. |
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py
def geneset_plot(enrich_res,num:int=10,node_size:list=[5,10,15],
cax_loc:list=[2, 0.55, 0.5, 0.02],cax_fontsize:int=12,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',
text_knock:int=5,text_maxsize:int=20,
bbox_to_anchor_used:tuple=(-0.45, -13),node_diameter:int=10,
custom_ticks:list=[5,10],ax=None)->matplotlib.axes._axes.Axes:
"""
Plot the gene set enrichment result.
Arguments:
num: The number of enriched terms to plot. Default is 10.
node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
cax_loc: The location, width and height of the colorbar on the plot. Default is [2, 0.55, 0.5, 0.02].
cax_fontsize: The fontsize of the colorbar label. Default is 12.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
text_knock: The number of characters to knock off the end of the term name. Default is 2.
text_maxsize: The maximum fontsize of the term names. Default is 20.
bbox_to_anchor_used (tuple): The anchor point for placing the legend. Default is (-0.45, -13).
node_diameter (int): The base size for nodes in the plot. Default is 10.
Returns:
A matplotlib.axes.Axes object.
"""
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
plot_data2=enrich_res.sort_values('P-value')[:num].sort_values('logc')
st=ax.scatter(plot_data2['fraction'],range(len(plot_data2['logc'])),
s=plot_data2['num']*node_diameter,linewidths=1,edgecolors='black',c=plot_data2['logp'],cmap=cmap)
ax.yaxis.tick_right()
plt.yticks(range(len(plot_data2['fraction'])),[plot_text_set(i.split('(')[0],text_knock=text_knock,text_maxsize=text_maxsize) for i in plot_data2['Term']],
fontsize=10,)
plt.xticks(fontsize=12,)
plt.title(fig_title,fontsize=12)
plt.xlabel(fig_xlabel,fontsize=12)
fig = plt.gcf()
cax = fig.add_axes(cax_loc)
cb=fig.colorbar(st,shrink=0.25,cax=cax,orientation='horizontal')
cb.set_label(r'$−Log_{10}(P_{adjusted})$',fontdict={'size':cax_fontsize})
# new code to add custom ticks
cb.set_ticks(custom_ticks)
gl_li=[]
for i in node_size:
gl_li.append(ax.scatter([],[], s=i*node_diameter, marker='o', color='white',edgecolors='black'))
plt.legend(gl_li,
[str(i) for i in node_size],
loc='lower left',
ncol=3,bbox_to_anchor=bbox_to_anchor_used,
fontsize=cax_fontsize)
return ax