Api enrichment

`omicverse.bulk.pyGSE` ¶

Bases: object

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

class pyGSE(object):

    def __init__(self,gene_list:list,pathways_dict:dict,pvalue_threshold:float=0.05,pvalue_type:str='auto',
                 background=None,organism:str='Human',description:str='None',outdir:str='./enrichr',cutoff:float=0.5) -> None:
        """Initialize the pyGSEA class.

        Arguments:
            gene_list: A list of genes.
            pathways_dict: A dictionary of pathways.
            pvalue_threshold: The p-value threshold for enrichment. Default is 0.05.
            pvalue_type: The p-value type. Default is 'auto'.
            organism: The organism. Default is 'Human'.
            description: The description. Default is 'None'.
            outdir: The output directory. Default is './enrichr'.
            cutoff: The cutoff for enrichment. Default is 0.5.

        """

        self.gene_list=gene_list
        self.pathways_dict=pathways_dict
        self.pvalue_threshold=pvalue_threshold
        self.pvalue_type=pvalue_type
        self.organism=organism
        self.description=description
        self.outdir=outdir
        self.cutoff=cutoff
        if background is None:
            if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
                background='mmusculus_gene_ensembl'
            elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
                background='hsapiens_gene_ensembl'
            self.background=background
        else:
            self.background=background

    def enrichment(self):
        """gene set enrichment analysis.

        Returns:
            A pandas.DataFrame object containing the enrichment results.
        """

        enrich_res=geneset_enrichment(self.gene_list,self.pathways_dict,self.pvalue_threshold,self.pvalue_type,
                                  self.organism,self.description,self.background,self.outdir,self.cutoff)
        self.enrich_res=enrich_res
        return enrich_res


    def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
                        cax_loc:int=2,cax_fontsize:int=12,
                        fig_title:str='',fig_xlabel:str='Fractions of genes',
                        figsize:tuple=(2,4),cmap:str='YlGnBu',text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:

        """Plot the gene set enrichment result.

        Arguments:
            num: The number of enriched terms to plot. Default is 10.
            node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
            cax_loc: The location of the colorbar on the plot. Default is 2.
            cax_fontsize: The fontsize of the colorbar label. Default is 12.
            fig_title: The title of the plot. Default is an empty string.
            fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
            figsize: The size of the plot. Default is (2,4).
            cmap: The colormap to use for the plot. Default is 'YlGnBu'.

        Returns:
            A matplotlib.axes.Axes object.
        """
        return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
                            fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)

`init(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', background=None, organism='Human', description='None', outdir='./enrichr', cutoff=0.5)` ¶

Initialize the pyGSEA class.

Parameters:

Name	Type	Description	Default
`gene_list`	`list`	A list of genes.	required
`pathways_dict`	`dict`	A dictionary of pathways.	required
`pvalue_threshold`	`float`	The p-value threshold for enrichment. Default is 0.05.	`0.05`
`pvalue_type`	`str`	The p-value type. Default is 'auto'.	`'auto'`
`organism`	`str`	The organism. Default is 'Human'.	`'Human'`
`description`	`str`	The description. Default is 'None'.	`'None'`
`outdir`	`str`	The output directory. Default is './enrichr'.	`'./enrichr'`
`cutoff`	`float`	The cutoff for enrichment. Default is 0.5.	`0.5`

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def __init__(self,gene_list:list,pathways_dict:dict,pvalue_threshold:float=0.05,pvalue_type:str='auto',
             background=None,organism:str='Human',description:str='None',outdir:str='./enrichr',cutoff:float=0.5) -> None:
    """Initialize the pyGSEA class.

    Arguments:
        gene_list: A list of genes.
        pathways_dict: A dictionary of pathways.
        pvalue_threshold: The p-value threshold for enrichment. Default is 0.05.
        pvalue_type: The p-value type. Default is 'auto'.
        organism: The organism. Default is 'Human'.
        description: The description. Default is 'None'.
        outdir: The output directory. Default is './enrichr'.
        cutoff: The cutoff for enrichment. Default is 0.5.

    """

    self.gene_list=gene_list
    self.pathways_dict=pathways_dict
    self.pvalue_threshold=pvalue_threshold
    self.pvalue_type=pvalue_type
    self.organism=organism
    self.description=description
    self.outdir=outdir
    self.cutoff=cutoff
    if background is None:
        if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
            background='mmusculus_gene_ensembl'
        elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
            background='hsapiens_gene_ensembl'
        self.background=background
    else:
        self.background=background

`enrichment()` ¶

gene set enrichment analysis.

Returns:

Type	Description
	A pandas.DataFrame object containing the enrichment results.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def enrichment(self):
    """gene set enrichment analysis.

    Returns:
        A pandas.DataFrame object containing the enrichment results.
    """

    enrich_res=geneset_enrichment(self.gene_list,self.pathways_dict,self.pvalue_threshold,self.pvalue_type,
                              self.organism,self.description,self.background,self.outdir,self.cutoff)
    self.enrich_res=enrich_res
    return enrich_res

`plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)` ¶

Plot the gene set enrichment result.

Parameters:

Name	Type	Description	Default
`num`	`int`	The number of enriched terms to plot. Default is 10.	`10`
`node_size`	`list`	A list of integers defining the size of nodes in the plot. Default is [5,10,15].	`[5, 10, 15]`
`cax_loc`	`int`	The location of the colorbar on the plot. Default is 2.	`2`
`cax_fontsize`	`int`	The fontsize of the colorbar label. Default is 12.	`12`
`fig_title`	`str`	The title of the plot. Default is an empty string.	`''`
`fig_xlabel`	`str`	The label of the x-axis. Default is 'Fractions of genes'.	`'Fractions of genes'`
`figsize`	`tuple`	The size of the plot. Default is (2,4).	`(2, 4)`
`cmap`	`str`	The colormap to use for the plot. Default is 'YlGnBu'.	`'YlGnBu'`

Returns:

Type	Description
`matplotlib.axes._axes.Axes`	A matplotlib.axes.Axes object.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
                    cax_loc:int=2,cax_fontsize:int=12,
                    fig_title:str='',fig_xlabel:str='Fractions of genes',
                    figsize:tuple=(2,4),cmap:str='YlGnBu',text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:

    """Plot the gene set enrichment result.

    Arguments:
        num: The number of enriched terms to plot. Default is 10.
        node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
        cax_loc: The location of the colorbar on the plot. Default is 2.
        cax_fontsize: The fontsize of the colorbar label. Default is 12.
        fig_title: The title of the plot. Default is an empty string.
        fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
        figsize: The size of the plot. Default is (2,4).
        cmap: The colormap to use for the plot. Default is 'YlGnBu'.

    Returns:
        A matplotlib.axes.Axes object.
    """
    return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
                        fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)

`omicverse.bulk.pyGSEA` ¶

Bases: object

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

class pyGSEA(object):

    def __init__(self,gene_rnk:pd.DataFrame,pathways_dict:dict,
                 processes:int=8,permutation_num:int=100,
                 outdir:str='./enrichr_gsea',cutoff:float=0.5) -> None:
        """Initialize the pyGSEA class.

        Arguments:
            gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
            pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
            processes: Number of Processes you are going to use. Default: 8.
            permutation_num: Number of permutations for significance computation. Default: 100.
            outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
            cutoff: The cutoff for enrichment. Default is 0.5.
        """

        self.gene_rnk=gene_rnk
        self.pathways_dict=pathways_dict
        self.processes=processes
        self.permutation_num=permutation_num
        self.outdir=outdir
        self.cutoff=cutoff


    def enrichment(self,format:str='png', pval=0.05,seed:int=112)->pd.DataFrame:
        """gene set enrichment analysis.

        Arguments:
            format: Matplotlib figure format. Default: 'png'.
            seed: Random seed. Default: 112.

        Returns:
            enrich_res:A pandas.DataFrame object containing the enrichment results.
        """


        pre_res=geneset_enrichment_GSEA(self.gene_rnk,self.pathways_dict,
                                           self.processes,self.permutation_num,
                                           self.outdir,format,seed)
        self.pre_res=pre_res
        enrich_res=pre_res.res2d[pre_res.res2d['fdr']<pval]
        enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
        enrich_res['logc']=enrich_res['nes']
        enrich_res['num']=enrich_res['matched_size']
        enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
        enrich_res['Term']=enrich_res.index.tolist()
        enrich_res['P-value']=enrich_res['fdr']
        self.enrich_res=enrich_res
        return enrich_res

    def plot_gsea(self,term_num:int=0,
                  gene_set_title:str='',
                  figsize:tuple=(3,4),
                  cmap:str='RdBu_r',
                  title_fontsize:int=12,
                  title_y:float=0.95)->matplotlib.figure.Figure:
        """Plot the gene set enrichment result.

        Arguments:
            term_num: The number of enriched terms to plot. Default is 0.
            gene_set_title: The title of the plot. Default is an empty string.
            figsize: The size of the plot. Default is (3,4).
            cmap: The colormap to use for the plot. Default is 'RdBu_r'.
            title_fontsize: The fontsize of the title. Default is 12.
            title_y: The y coordinate of the title. Default is 0.95.

        Returns:
            fig: A matplotlib.figure.Figure object.
        """
        from gseapy.plot import GSEAPlot
        terms = self.enrich_res.index
        g = GSEAPlot(
        rank_metric=self.pre_res.ranking, term=terms[term_num],figsize=figsize,cmap=cmap,
            **self.pre_res.results[terms[term_num]]
            )
        if gene_set_title=='':
            g.fig.suptitle(terms[term_num],fontsize=title_fontsize,y=title_y)
        else:
            g.fig.suptitle(gene_set_title,fontsize=title_fontsize,y=title_y)
        g.add_axes()
        return g.fig


    def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
                        cax_loc:int=2,cax_fontsize:int=12,
                        fig_title:str='',fig_xlabel:str='Fractions of genes',
                        figsize:tuple=(2,4),cmap:str='YlGnBu',
                        text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:

        """Plot the gene set enrichment result.

        Arguments:
            num: The number of enriched terms to plot. Default is 10.
            node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
            cax_loc: The location of the colorbar on the plot. Default is 2.
            cax_fontsize: The fontsize of the colorbar label. Default is 12.
            fig_title: The title of the plot. Default is an empty string.
            fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
            figsize: The size of the plot. Default is (2,4).
            cmap: The colormap to use for the plot. Default is 'YlGnBu'.
            text_knock: The number of terms to knock out for text labels. Default is 2.
            text_maxsize: The maximum fontsize of text labels. Default is 20.

        Returns:
            ax: A matplotlib.axes.Axes object.
        """
        return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
                            fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)

`init(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', cutoff=0.5)` ¶

Initialize the pyGSEA class.

Parameters:

Name	Type	Description	Default
`gene_rnk`	`pd.DataFrame`	pre-ranked correlation table or pandas DataFrame. Same input with `GSEA` .rnk file.	required
`pathways_dict`	`dict`	Dictionary of pathway library names and corresponding Enrichr API URLs.	required
`processes`	`int`	Number of Processes you are going to use. Default: 8.	`8`
`permutation_num`	`int`	Number of permutations for significance computation. Default: 100.	`100`
`outdir`	`str`	Output directory for Enrichr results. Default is './enrichr_gsea'.	`'./enrichr_gsea'`
`cutoff`	`float`	The cutoff for enrichment. Default is 0.5.	`0.5`

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def __init__(self,gene_rnk:pd.DataFrame,pathways_dict:dict,
             processes:int=8,permutation_num:int=100,
             outdir:str='./enrichr_gsea',cutoff:float=0.5) -> None:
    """Initialize the pyGSEA class.

    Arguments:
        gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
        pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
        processes: Number of Processes you are going to use. Default: 8.
        permutation_num: Number of permutations for significance computation. Default: 100.
        outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
        cutoff: The cutoff for enrichment. Default is 0.5.
    """

    self.gene_rnk=gene_rnk
    self.pathways_dict=pathways_dict
    self.processes=processes
    self.permutation_num=permutation_num
    self.outdir=outdir
    self.cutoff=cutoff

`enrichment(format='png', pval=0.05, seed=112)` ¶

gene set enrichment analysis.

Parameters:

Name	Type	Description	Default
`format`	`str`	Matplotlib figure format. Default: 'png'.	`'png'`
`seed`	`int`	Random seed. Default: 112.	`112`

Returns:

Name	Type	Description
`enrich_res`	`pd.DataFrame`	A pandas.DataFrame object containing the enrichment results.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def enrichment(self,format:str='png', pval=0.05,seed:int=112)->pd.DataFrame:
    """gene set enrichment analysis.

    Arguments:
        format: Matplotlib figure format. Default: 'png'.
        seed: Random seed. Default: 112.

    Returns:
        enrich_res:A pandas.DataFrame object containing the enrichment results.
    """


    pre_res=geneset_enrichment_GSEA(self.gene_rnk,self.pathways_dict,
                                       self.processes,self.permutation_num,
                                       self.outdir,format,seed)
    self.pre_res=pre_res
    enrich_res=pre_res.res2d[pre_res.res2d['fdr']<pval]
    enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
    enrich_res['logc']=enrich_res['nes']
    enrich_res['num']=enrich_res['matched_size']
    enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
    enrich_res['Term']=enrich_res.index.tolist()
    enrich_res['P-value']=enrich_res['fdr']
    self.enrich_res=enrich_res
    return enrich_res

`plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)` ¶

Plot the gene set enrichment result.

Parameters:

Name	Type	Description	Default
`num`	`int`	The number of enriched terms to plot. Default is 10.	`10`
`node_size`	`list`	A list of integers defining the size of nodes in the plot. Default is [5,10,15].	`[5, 10, 15]`
`cax_loc`	`int`	The location of the colorbar on the plot. Default is 2.	`2`
`cax_fontsize`	`int`	The fontsize of the colorbar label. Default is 12.	`12`
`fig_title`	`str`	The title of the plot. Default is an empty string.	`''`
`fig_xlabel`	`str`	The label of the x-axis. Default is 'Fractions of genes'.	`'Fractions of genes'`
`figsize`	`tuple`	The size of the plot. Default is (2,4).	`(2, 4)`
`cmap`	`str`	The colormap to use for the plot. Default is 'YlGnBu'.	`'YlGnBu'`
`text_knock`	`int`	The number of terms to knock out for text labels. Default is 2.	`2`
`text_maxsize`	`int`	The maximum fontsize of text labels. Default is 20.	`20`

Returns:

Name	Type	Description
`ax`	`matplotlib.axes._axes.Axes`	A matplotlib.axes.Axes object.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def plot_enrichment(self,num:int=10,node_size:list=[5,10,15],
                    cax_loc:int=2,cax_fontsize:int=12,
                    fig_title:str='',fig_xlabel:str='Fractions of genes',
                    figsize:tuple=(2,4),cmap:str='YlGnBu',
                    text_knock:int=2,text_maxsize:int=20)->matplotlib.axes._axes.Axes:

    """Plot the gene set enrichment result.

    Arguments:
        num: The number of enriched terms to plot. Default is 10.
        node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
        cax_loc: The location of the colorbar on the plot. Default is 2.
        cax_fontsize: The fontsize of the colorbar label. Default is 12.
        fig_title: The title of the plot. Default is an empty string.
        fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
        figsize: The size of the plot. Default is (2,4).
        cmap: The colormap to use for the plot. Default is 'YlGnBu'.
        text_knock: The number of terms to knock out for text labels. Default is 2.
        text_maxsize: The maximum fontsize of text labels. Default is 20.

    Returns:
        ax: A matplotlib.axes.Axes object.
    """
    return geneset_plot(self.enrich_res,num,node_size,cax_loc,cax_fontsize,
                        fig_title,fig_xlabel,figsize,cmap,text_knock,text_maxsize)

`plot_gsea(term_num=0, gene_set_title='', figsize=(3, 4), cmap='RdBu_r', title_fontsize=12, title_y=0.95)` ¶

Plot the gene set enrichment result.

Parameters:

Name	Type	Description	Default
`term_num`	`int`	The number of enriched terms to plot. Default is 0.	`0`
`gene_set_title`	`str`	The title of the plot. Default is an empty string.	`''`
`figsize`	`tuple`	The size of the plot. Default is (3,4).	`(3, 4)`
`cmap`	`str`	The colormap to use for the plot. Default is 'RdBu_r'.	`'RdBu_r'`
`title_fontsize`	`int`	The fontsize of the title. Default is 12.	`12`
`title_y`	`float`	The y coordinate of the title. Default is 0.95.	`0.95`

Returns:

Name	Type	Description
`fig`	`matplotlib.figure.Figure`	A matplotlib.figure.Figure object.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def plot_gsea(self,term_num:int=0,
              gene_set_title:str='',
              figsize:tuple=(3,4),
              cmap:str='RdBu_r',
              title_fontsize:int=12,
              title_y:float=0.95)->matplotlib.figure.Figure:
    """Plot the gene set enrichment result.

    Arguments:
        term_num: The number of enriched terms to plot. Default is 0.
        gene_set_title: The title of the plot. Default is an empty string.
        figsize: The size of the plot. Default is (3,4).
        cmap: The colormap to use for the plot. Default is 'RdBu_r'.
        title_fontsize: The fontsize of the title. Default is 12.
        title_y: The y coordinate of the title. Default is 0.95.

    Returns:
        fig: A matplotlib.figure.Figure object.
    """
    from gseapy.plot import GSEAPlot
    terms = self.enrich_res.index
    g = GSEAPlot(
    rank_metric=self.pre_res.ranking, term=terms[term_num],figsize=figsize,cmap=cmap,
        **self.pre_res.results[terms[term_num]]
        )
    if gene_set_title=='':
        g.fig.suptitle(terms[term_num],fontsize=title_fontsize,y=title_y)
    else:
        g.fig.suptitle(gene_set_title,fontsize=title_fontsize,y=title_y)
    g.add_axes()
    return g.fig

`omicverse.bulk.geneset_enrichment(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', organism='Human', description='None', background=None, outdir='./enrichr', cutoff=0.5)` ¶

Performs gene set enrichment analysis using Enrichr API.

Parameters:

Name	Type	Description	Default
`gene_list`	`list`	List of gene symbols to be tested for enrichment.	required
`pathways_dict`	`dict`	Dictionary of pathway library names and corresponding Enrichr API URLs.	required
`pvalue_threshold`	`float`	P-value threshold for significant pathways. Default is 0.05.	`0.05`
`pvalue_type`	`str`	Type of p-value correction to use. 'auto' uses Benjamini-Hochberg correction,for small gene sets (<500 genes) and Bonferroni correction for larger gene sets.,'bh' uses only Benjamini-Hochberg correction. 'bonferroni' uses only Bonferroni correction.,Default is 'auto'.	`'auto'`
`organism`	`str`	Organism of the input gene list. Default is 'Human'.	`'Human'`
`description`	`str`	Description of the input gene list. Default is 'None'.	`'None'`
`background`	`list`	Background gene list to use for enrichment analysis. Default is None. If None, the background gene list is automatically set to the organism-specific gene list.	`None`
`outdir`	`str`	Output directory for Enrichr results. Default is './enrichr'.	`'./enrichr'`
`cutoff`	`float`	Show enriched terms which Adjusted P-value < cutoff. Default is 0.5.	`0.5`

Returns:

Name	Type	Description
`enrich_res`	`pd.DataFrame`	A pandas DataFrame containing the enrichment results.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def geneset_enrichment(gene_list:list,pathways_dict:dict,
                       pvalue_threshold:float=0.05,pvalue_type:str='auto',
                       organism:str='Human',description:str='None',
                       background:list=None,
                       outdir:str='./enrichr',cutoff:float=0.5)->pd.DataFrame:
    """
    Performs gene set enrichment analysis using Enrichr API.

    Arguments:
        gene_list: List of gene symbols to be tested for enrichment.
        pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
        pvalue_threshold: P-value threshold for significant pathways. Default is 0.05.
        pvalue_type: Type of p-value correction to use. 'auto' uses Benjamini-Hochberg correction,for small gene sets (<500 genes) and Bonferroni correction for larger gene sets.,'bh' uses only Benjamini-Hochberg correction. 'bonferroni' uses only Bonferroni correction.,Default is 'auto'.
        organism: Organism of the input gene list. Default is 'Human'.
        description: Description of the input gene list. Default is 'None'.
        background: Background gene list to use for enrichment analysis. Default is None. If None, the background gene list is automatically set to the organism-specific gene list.
        outdir: Output directory for Enrichr results. Default is './enrichr'.
        cutoff: Show enriched terms which Adjusted P-value < cutoff. Default is 0.5.

    Returns:
        enrich_res: A pandas DataFrame containing the enrichment results.


    """
    from ..externel.gseapy import enrichr
    #import gseapy as gp
    if background is None:
        if (organism == 'Mouse') or (organism == 'mouse') or (organism == 'mm'):
            background='mmusculus_gene_ensembl'
        elif (organism == 'Human') or (organism == 'human') or (organism == 'hs'):
            background='hsapiens_gene_ensembl'

    enr = enrichr(gene_list=gene_list,
                 gene_sets=pathways_dict,
                 organism=organism, # don't forget to set organism to the one you desired! e.g. Yeast
                 description=description,
                 background=background,
                 outdir=outdir,
                 cutoff=cutoff # test dataset, use lower value from range(0,1)
                )
    if pvalue_type=='auto':
        if enr.res2d.shape[0]>100:
            enrich_res=enr.res2d[enr.res2d['Adjusted P-value']<pvalue_threshold]
            enrich_res['logp']=-np.log(enrich_res['Adjusted P-value'])
        else:
            enrich_res=enr.res2d[enr.res2d['P-value']<pvalue_threshold]
            enrich_res['logp']=-np.log(enrich_res['P-value'])
    elif pvalue_type=='adjust':
        enrich_res=enr.res2d[enr.res2d['Adjusted P-value']<pvalue_threshold]
        enrich_res['logp']=-np.log(enrich_res['Adjusted P-value'])
    else:
        enrich_res=enr.res2d[enr.res2d['P-value']<pvalue_threshold]
        enrich_res['logp']=-np.log(enrich_res['P-value'])
    enrich_res['logc']=np.log(enrich_res['Odds Ratio'])
    enrich_res['num']=[int(i.split('/')[0]) for i in enrich_res['Overlap']]
    enrich_res['fraction']=[int(i.split('/')[0])/int(i.split('/')[1]) for i in enrich_res['Overlap']]
    return enrich_res

`omicverse.bulk.geneset_enrichment_GSEA(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', format='png', seed=112)` ¶

Enrichment analysis using GSEA

Parameters:

Name	Type	Description	Default
`gene_rnk`	`pd.DataFrame`	pre-ranked correlation table or pandas DataFrame. Same input with `GSEA` .rnk file.	required
`pathways_dict`	`dict`	Dictionary of pathway library names and corresponding Enrichr API URLs.	required
`processes`	`int`	Number of Processes you are going to use. Default: 8.	`8`
`permutation_num`	`int`	Number of permutations for significance computation. Default: 100.	`100`
`outdir`	`str`	Output directory for Enrichr results. Default is './enrichr_gsea'.	`'./enrichr_gsea'`
`format`	`str`	Matplotlib figure format. Default: 'png'.	`'png'`
`seed`	`int`	Random seed. Default: 112.	`112`

Returns:

Name	Type	Description
`pre_res`	`dict`	A prerank object containing the enrichment results.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def geneset_enrichment_GSEA(gene_rnk:pd.DataFrame,pathways_dict:dict,
                            processes:int=8,
                     permutation_num:int=100, # reduce number to speed up testing
                     outdir:str='./enrichr_gsea', format:str='png', seed:int=112)->dict:
    """
    Enrichment analysis using GSEA

    Arguments:
        gene_rnk: pre-ranked correlation table or pandas DataFrame. Same input with ``GSEA`` .rnk file.
        pathways_dict: Dictionary of pathway library names and corresponding Enrichr API URLs.
        processes: Number of Processes you are going to use. Default: 8.
        permutation_num: Number of permutations for significance computation. Default: 100.
        outdir: Output directory for Enrichr results. Default is './enrichr_gsea'.
        format: Matplotlib figure format. Default: 'png'.
        seed: Random seed. Default: 112.

    Returns:
        pre_res: A prerank object containing the enrichment results.

    """
    from ..externel.gseapy import prerank
    pre_res = prerank(rnk=gene_rnk, gene_sets=pathways_dict,
                     processes=processes,
                     permutation_num=permutation_num, # reduce number to speed up testing
                     outdir=outdir, format=format, seed=seed)
    return pre_res
    enrich_res=pre_res.res2d[pre_res.res2d['fdr']<0.05]
    enrich_res['logp']=-np.log(enrich_res['fdr']+0.0001)
    enrich_res['logc']=enrich_res['nes']
    enrich_res['num']=enrich_res['matched_size']
    enrich_res['fraction']=enrich_res['matched_size']/enrich_res['geneset_size']
    enrich_res['Term']=enrich_res.index.tolist()
    enrich_res['P-value']=enrich_res['fdr']
    return enrich_res

`omicverse.bulk.geneset_plot(enrich_res, num=10, node_size=[5, 10, 15], cax_loc=[2, 0.55, 0.5, 0.02], cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=5, text_maxsize=20, bbox_to_anchor_used=(-0.45, -13), node_diameter=10, custom_ticks=[5, 10])` ¶

Plot the gene set enrichment result.

Parameters:

Name	Type	Description	Default
`num`	`int`	The number of enriched terms to plot. Default is 10.	`10`
`node_size`	`list`	A list of integers defining the size of nodes in the plot. Default is [5,10,15].	`[5, 10, 15]`
`cax_loc`	`list`	The location, width and height of the colorbar on the plot. Default is [2, 0.55, 0.5, 0.02].	`[2, 0.55, 0.5, 0.02]`
`cax_fontsize`	`int`	The fontsize of the colorbar label. Default is 12.	`12`
`fig_title`	`str`	The title of the plot. Default is an empty string.	`''`
`fig_xlabel`	`str`	The label of the x-axis. Default is 'Fractions of genes'.	`'Fractions of genes'`
`figsize`	`tuple`	The size of the plot. Default is (2,4).	`(2, 4)`
`cmap`	`str`	The colormap to use for the plot. Default is 'YlGnBu'.	`'YlGnBu'`
`text_knock`	`int`	The number of characters to knock off the end of the term name. Default is 2.	`5`
`text_maxsize`	`int`	The maximum fontsize of the term names. Default is 20.	`20`
`bbox_to_anchor_used`	`tuple`	The anchor point for placing the legend. Default is (-0.45, -13).	`(-0.45, -13)`
`node_diameter`	`int`	The base size for nodes in the plot. Default is 10.	`10`

Returns:

Type	Description
`matplotlib.axes._axes.Axes`	A matplotlib.axes.Axes object.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk/_Enrichment.py

def geneset_plot(enrich_res,num:int=10,node_size:list=[5,10,15],
                        cax_loc:list=[2, 0.55, 0.5, 0.02],cax_fontsize:int=12,
                        fig_title:str='',fig_xlabel:str='Fractions of genes',
                        figsize:tuple=(2,4),cmap:str='YlGnBu',
                        text_knock:int=5,text_maxsize:int=20,
                        bbox_to_anchor_used:tuple=(-0.45, -13),node_diameter:int=10,
                        custom_ticks:list=[5,10])->matplotlib.axes._axes.Axes:
    """
    Plot the gene set enrichment result.

    Arguments:
        num: The number of enriched terms to plot. Default is 10.
        node_size: A list of integers defining the size of nodes in the plot. Default is [5,10,15].
        cax_loc: The location, width and height of the colorbar on the plot. Default is [2, 0.55, 0.5, 0.02].
        cax_fontsize: The fontsize of the colorbar label. Default is 12.
        fig_title: The title of the plot. Default is an empty string.
        fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
        figsize: The size of the plot. Default is (2,4).
        cmap: The colormap to use for the plot. Default is 'YlGnBu'.
        text_knock: The number of characters to knock off the end of the term name. Default is 2.
        text_maxsize: The maximum fontsize of the term names. Default is 20.
        bbox_to_anchor_used (tuple): The anchor point for placing the legend. Default is (-0.45, -13).
        node_diameter (int): The base size for nodes in the plot. Default is 10.

    Returns:
        A matplotlib.axes.Axes object.

    """
    fig, ax = plt.subplots(figsize=figsize)
    plot_data2=enrich_res.sort_values('P-value')[:num].sort_values('logc')
    st=ax.scatter(plot_data2['fraction'],range(len(plot_data2['logc'])),
            s=plot_data2['num']*node_diameter,linewidths=1,edgecolors='black',c=plot_data2['logp'],cmap=cmap)
    ax.yaxis.tick_right()
    plt.yticks(range(len(plot_data2['fraction'])),[plot_text_set(i.split('(')[0],text_knock=text_knock,text_maxsize=text_maxsize) for i in plot_data2['Term']],
            fontsize=10,)
    plt.xticks(fontsize=12,)
    plt.title(fig_title,fontsize=12)
    plt.xlabel(fig_xlabel,fontsize=12)

    #fig = plt.gcf()
    cax = fig.add_axes(cax_loc)
    cb=fig.colorbar(st,shrink=0.25,cax=cax,orientation='horizontal')
    cb.set_label(r'$−Log_{10}(P_{adjusted})$',fontdict={'size':cax_fontsize})
    # new code to add custom ticks
    cb.set_ticks(custom_ticks)

    gl_li=[]
    for i in node_size:
        gl_li.append(ax.scatter([],[], s=i*node_diameter, marker='o', color='white',edgecolors='black'))

    plt.legend(gl_li,
        [str(i) for i in node_size],
        loc='lower left',
        ncol=3,bbox_to_anchor=bbox_to_anchor_used,
        fontsize=cax_fontsize)
    return ax

Api enrichment

omicverse.bulk.pyGSE ¶

__init__(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', background=None, organism='Human', description='None', outdir='./enrichr', cutoff=0.5) ¶

enrichment() ¶

plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20) ¶

omicverse.bulk.pyGSEA ¶

__init__(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', cutoff=0.5) ¶

enrichment(format='png', pval=0.05, seed=112) ¶

plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20) ¶

plot_gsea(term_num=0, gene_set_title='', figsize=(3, 4), cmap='RdBu_r', title_fontsize=12, title_y=0.95) ¶

omicverse.bulk.geneset_enrichment(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', organism='Human', description='None', background=None, outdir='./enrichr', cutoff=0.5) ¶

omicverse.bulk.geneset_enrichment_GSEA(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', format='png', seed=112) ¶

`omicverse.bulk.pyGSE` ¶

`init(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', background=None, organism='Human', description='None', outdir='./enrichr', cutoff=0.5)` ¶

`enrichment()` ¶

`plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)` ¶

`omicverse.bulk.pyGSEA` ¶

`init(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', cutoff=0.5)` ¶

`enrichment(format='png', pval=0.05, seed=112)` ¶

`plot_enrichment(num=10, node_size=[5, 10, 15], cax_loc=2, cax_fontsize=12, fig_title='', fig_xlabel='Fractions of genes', figsize=(2, 4), cmap='YlGnBu', text_knock=2, text_maxsize=20)` ¶

`plot_gsea(term_num=0, gene_set_title='', figsize=(3, 4), cmap='RdBu_r', title_fontsize=12, title_y=0.95)` ¶

`omicverse.bulk.geneset_enrichment(gene_list, pathways_dict, pvalue_threshold=0.05, pvalue_type='auto', organism='Human', description='None', background=None, outdir='./enrichr', cutoff=0.5)` ¶

`omicverse.bulk.geneset_enrichment_GSEA(gene_rnk, pathways_dict, processes=8, permutation_num=100, outdir='./enrichr_gsea', format='png', seed=112)` ¶