Skip to content

Api via

omicverse.single.pyVIA

Bases: object

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
class pyVIA(object):

    def __init__(self,adata:anndata.AnnData,adata_key:str='X_pca',adata_ncomps:int=80,basis:str='X_umap',
                 clusters:str='',dist_std_local:float=2, jac_std_global=0.15, labels:np.ndarray=None,
                 keep_all_local_dist='auto', too_big_factor:float=0.4, resolution_parameter:float=1.0, partition_type:str="ModularityVP", small_pop:int=10,
                 jac_weighted_edges:bool=True, knn:int=30, n_iter_leiden:int=5, random_seed:int=42,
                 num_threads=-1, distance='l2', time_smallpop=15,
                 super_cluster_labels:bool=False,                 super_node_degree_list:bool=False, super_terminal_cells:bool=False, x_lazy:float=0.95, alpha_teleport:float=0.99,
                 root_user=None, preserve_disconnected:bool=True, dataset:str='', super_terminal_clusters:list=[],
                 is_coarse=True, csr_full_graph:np.ndarray='', csr_array_locally_pruned='', ig_full_graph='',
                 full_neighbor_array='', full_distance_array='',  df_annot=None,
                 preserve_disconnected_after_pruning:bool=False,
                 secondary_annotations:list=None, pseudotime_threshold_TS:int=30, cluster_graph_pruning_std:float=0.15,
                 visual_cluster_graph_pruning:float=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300,
                 piegraph_arrow_head_width=0.1,
                 piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges:int=2, via_coarse=None, velocity_matrix=None,
                 gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo = None, CSM = None, edgebundle_pruning_twice=False, pca_loadings = None, time_series=False,
                 time_series_labels:list=None, knn_sequential:int = 10, knn_sequential_reverse:int = 0,t_diff_step:int = 1,single_cell_transition_matrix = None,
                 embedding_type:str='via-mds',do_compute_embedding:bool=False, color_dict:dict=None,user_defined_terminal_cell:list=[], user_defined_terminal_group:list=[],
                 do_gaussian_kernel_edgeweights:bool=False,RW2_mode:bool=False,working_dir_fp:str ='/home/shobi/Trajectory/Datasets/') -> None:
        r"""
        Initialize a pyVIA object.

        Arguments:
            adata: An AnnData object containing the scRNA-seq.
            adata_key: the key of the AnnData in obsm to perform VIA on. default: 'X_pca'
            adata_ncomps: the number of components to use from the AnnData in obsm to perform VIA on. default: 80
            basis: the key of the AnnData in obsm to use as the basis for the embedding. default: 'X_umap'
            clusters: the clusters to use for the VIA analysis. default: ''
            dist_std_local: local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention
            jac_std_global: (optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges
            labels: default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations
            keep_all_local_dist: default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed.

            too_big_factor: (optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered
            resolution_parameter: (float) the resolution parameter for the Louvain algorithm.
            partition_type: (str, default: "ModularityVP") the partitioning algorithm to use.
            small_pop: (int, default: 10) the number of cells to be considered in a small population.
            jac_weighted_edges: (bool, default: True) whether to use weighted edges in the PARC clustering step.
            knn: (int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells.
            n_iter_leiden: (int) the number of iterations for the Leiden algorithm.
            random_seed: (int) the random seed to pass to the clustering algorithm.
            num_threads: (int) the number of threads to use for the clustering algorithm.
            distance: (str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'.
            visual_cluster_graph_pruning: (float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods.
            cluster_graph_pruning_std: (float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value.
            time_smallpop: (max time to be allowed handling singletons) the maximum time allowed to handle singletons.
            super_cluster_labels:
            super_node_degree_list:
            super_terminal_cells:
            x_lazy: (float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable.
            alpha_teleport: (float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation.
            root_user: (list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group.
            preserve_disconnected: bool (default = True) If you believe there may be disconnected trajectories then set this to False
            dataset: str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default)
            embedding: ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates
            velo_weight: float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity
            neighboring_terminal_states_threshold:int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors
            knn_sequential:int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1)
            knn_sequential_reverse: int (default = 0) number of knn enforced from current to previous time point
            t_diff_step: int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained
            is_coarse:bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False
            via_coarse: VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object
            df_annot: DataFrame (default None) used for the Mouse Organ data
            preserve_disconnected_after_pruning:bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis
            A_velo: ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus]
            velocity_matrix: matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too.
            gene_matrix: matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense())
            time_series: if the data has time-series labels then set to True
            time_series_labels:list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering
            pca_loadings: array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs
            secondary_annotations: None (default None)
            edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
            edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
            piegraph_arrow_head_width: float (default = 0.1) size of arrow heads in via cluster graph
            piegraph_edgeweight_scalingfactor: (defaulf = 1.5) scaling factor for edge thickness in via cluster graph
            max_visual_outgoing_edges: int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node.
            edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
            edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
            pseudotime_threshold_TS: int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range
            num_mcmc_simulations:int (default = 1300) number of random walk simulations conducted
            embedding_type: str (default = 'via-mds', other options are 'via-umap' and 'via-force'
            do_compute_embedding: bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True
            do_gaussian_kernel_edgeweights: bool (default = False) Type of edgeweighting on the graph edges


        """


        self.adata = adata
        #self.adata_key = adata_key
        data = adata.obsm[adata_key][:, 0:adata_ncomps]
        embedding=self.adata.obsm[basis]
        true_label=adata.obs[clusters]
        self.clusters=clusters
        self.basis=basis

        if root_user is not None:
             dataset='group'



        self.model=VIA(data=data,true_label=true_label,
                 dist_std_local=dist_std_local,jac_std_global=jac_std_global,labels=labels,
                 keep_all_local_dist=keep_all_local_dist,too_big_factor=too_big_factor,resolution_parameter=resolution_parameter,partition_type=partition_type,small_pop=small_pop,
                 jac_weighted_edges=jac_weighted_edges,knn=knn,n_iter_leiden=n_iter_leiden,random_seed=random_seed,
                 num_threads=num_threads,distance=distance,time_smallpop=time_smallpop,
                 super_cluster_labels=super_cluster_labels,super_node_degree_list=super_node_degree_list,super_terminal_cells=super_terminal_cells,x_lazy=x_lazy,alpha_teleport=alpha_teleport,
                 root_user=root_user,preserve_disconnected=preserve_disconnected,dataset=dataset,super_terminal_clusters=super_terminal_clusters,
                 is_coarse=is_coarse,csr_full_graph=csr_full_graph,csr_array_locally_pruned=csr_array_locally_pruned,ig_full_graph=ig_full_graph,
                 full_neighbor_array=full_neighbor_array,full_distance_array=full_distance_array,embedding=embedding,df_annot=df_annot,
                 preserve_disconnected_after_pruning=preserve_disconnected_after_pruning,
                 secondary_annotations=secondary_annotations,pseudotime_threshold_TS=pseudotime_threshold_TS,cluster_graph_pruning_std=cluster_graph_pruning_std,
                 visual_cluster_graph_pruning=visual_cluster_graph_pruning,neighboring_terminal_states_threshold=neighboring_terminal_states_threshold,num_mcmc_simulations=num_mcmc_simulations,
                 piegraph_arrow_head_width=piegraph_arrow_head_width,
                 piegraph_edgeweight_scalingfactor=piegraph_edgeweight_scalingfactor,max_visual_outgoing_edges=max_visual_outgoing_edges,via_coarse=via_coarse,velocity_matrix=velocity_matrix,
                 gene_matrix=gene_matrix,velo_weight=velo_weight,edgebundle_pruning=edgebundle_pruning,A_velo=A_velo,CSM=CSM,edgebundle_pruning_twice=edgebundle_pruning_twice,pca_loadings=pca_loadings,time_series=time_series,
                 time_series_labels=time_series_labels,knn_sequential=knn_sequential,knn_sequential_reverse=knn_sequential_reverse,t_diff_step=t_diff_step,single_cell_transition_matrix=single_cell_transition_matrix,
                 embedding_type=embedding_type,do_compute_embedding=do_compute_embedding,color_dict=color_dict,user_defined_terminal_cell=user_defined_terminal_cell,user_defined_terminal_group=user_defined_terminal_group,
                 do_gaussian_kernel_edgeweights=do_gaussian_kernel_edgeweights,RW2_mode=RW2_mode,working_dir_fp=working_dir_fp
                 )
    def run(self):
        """calculate the via graph and pseudotime

        """
        self.model.run_VIA()

    def get_piechart_dict(self,label:int=0,clusters:str='')->dict:
        """
        Cluster composition graph

        Arguments:
            label: int (default=0) cluster label of pie chart
            clusters: the celltype you want interested

        Returns:
            res_dict: cluster composition graph
        """
        if clusters=='':
            clusters=self.clusters
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        cluster_i_loc=np.where(np.asarray(self.model.labels) == label)[0]
        res_dict=dict(self.adata.obs.iloc[cluster_i_loc].value_counts(clusters))
        return res_dict

    def get_pseudotime(self,adata=None):
        """
        Extract the pseudotime of VIA

        Arguments:
            adata: an adata object of you interested,if None, it will be added to `self.adata.obs['pt_via']`

        """

        print('...the pseudotime of VIA added to AnnData obs named `pt_via`')
        if adata is None:
            self.adata.obs['pt_via']=self.model.single_cell_pt_markov
        else:
            adata.obs['pt_via']=self.model.single_cell_pt_markov

    def plot_piechart_graph(self,clusters:str='', type_data='pt',
                                gene_exp:list=[], title='', 
                                cmap:str=None, ax_text=True, figsize:tuple=(8,4),
                                dpi=150,headwidth_arrow = 0.1, 
                                alpha_edge=0.4, linewidth_edge=2, 
                                edge_color='darkblue',reference=None, 
                                show_legend:bool=True, pie_size_scale:float=0.8, fontsize:float=8)->Tuple[matplotlib.figure.Figure,
                                                                                                          matplotlib.axes._axes.Axes,
                                                                                                          matplotlib.axes._axes.Axes]:
        """plot two subplots with a clustergraph level representation of the viagraph showing true-label composition (lhs) and pseudotime/gene expression (rhs)

        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            type_data : string  default 'pt' for pseudotime colored nodes. or 'gene'
            gene_exp : list of values (column of dataframe) corresponding to feature or gene expression to be used to color nodes at CLUSTER level
            title : string
            cmap : default None. automatically chooses coolwarm for gene expression or viridis_r for pseudotime
            ax_text : Bool default= True. Annotates each node with cluster number and population of membership
            dpi : int default = 150
            headwidth_bundle : default = 0.1. width of arrowhead used to directed edges
            reference : None or list. list of categorical (str) labels for cluster composition of the piecharts (LHS subplot) length = n_samples.
            pie_size_scale : float default=0.8 scaling factor of the piechart nodes
            fontsize : float default=8. fontsize of the text in the piecharts
            figsize : tuple default=(8,4). size of the figure

        Returns:
            fig: Returns matplotlib figure with two axes that plot the clustergraph using edge bundling
            ax: left axis shows the clustergraph with each node colored by annotated ground truth membership.
            ax1: right axis shows the same clustergraph with each node colored by the pseudotime or gene expression
        """


        if clusters=='':
            clusters=self.clusters
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        fig, ax, ax1 = draw_piechart_graph_pyomic(clusters=clusters,adata=self.adata,
                                   via_object=self.model, type_data=type_data,
                                gene_exp=gene_exp, title=title, 
                                cmap=cmap, ax_text=ax_text,figsize=figsize,
                                dpi=dpi,headwidth_arrow = headwidth_arrow,
                                alpha_edge=alpha_edge, linewidth_edge=linewidth_edge,
                                edge_color=edge_color,reference=reference,
                                show_legend=show_legend, pie_size_scale=pie_size_scale, fontsize=fontsize)
        return fig, ax, ax1

    def plot_stream(self,clusters:str='',basis:str='',
                   density_grid:float=0.5, arrow_size:float=0.7, arrow_color:str = 'k',
                   arrow_style="-|>",  max_length:int=4, linewidth:float=1,min_mass = 1, cutoff_perc:int = 5,
                   scatter_size:int=500, scatter_alpha:float=0.5,marker_edgewidth:float=0.1,
                   density_stream:int = 2, smooth_transition:int=1, smooth_grid:float=0.5,
                   color_scheme:str = 'annotation', add_outline_clusters:bool=False,
                   cluster_outline_edgewidth = 0.001,gp_color = 'white', bg_color='black' ,
                   dpi=80 , title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None,
                   other_labels:list = None,use_sequentially_augmented:bool=False, cmap_str:str='rainbow')->Tuple[matplotlib.figure.Figure,
                                                                                                          matplotlib.axes._axes.Axes]:
        """Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory

        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            density_grid : float, default = 0.5, density of the grid on which to project the directionality of cells
            arrow_size : float, default = 0.7, size of the arrows in the streamplot
            arrow_color : str, default = 'k', color of the arrows in the streamplot
            arrow_style : str, default = "-|>", style of the arrows in the streamplot
            max_length : int, default = 4, maximum length of the arrows in the streamplot
            linewidth : float, default = 1, width of  lines in streamplot
            min_mass : float, default = 1, minimum mass of the arrows in the streamplot
            cutoff_perc : int, default = 5, cutoff percentage of the arrows in the streamplot
            scatter_size : int, default = 500, size of scatter points
            scatter_alpha : float, default = 0.5, transpsarency of scatter points
            marker_edgewidth : float, default = 0.1, width of outline arround each scatter point
            density_stream : int, default = 2, density of the streamplot
            smooth_transition : int, default = 1, smoothness of the transition between the streamplot and the scatter points
            smooth_grid : float, default = 0.5, smoothness of the grid on which to project the directionality of cells
            color_scheme : str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters)
            add_outline_clusters : bool, default = False, whether to add an outline to the clusters
            cluster_outline_edgewidth : float, default = 0.001, width of the outline around the clusters
            gp_color : str, default = 'white', color of the grid points
            bg_color : str, default = 'black', color of the background
            dpi : int, default = 80, dpi of the figure
            title : str, default = 'Streamplot', title of the figure
            b_bias : int, default = 20, higher value makes the forward bias of pseudotime stronger
            n_neighbors_velocity_grid : int, default = None, number of neighbors to use for the velocity grid
            other_labels : list, default = None, list of other labels to plot in the streamplot
            use_sequentially_augmented : bool, default = False, whether to use the sequentially augmented data
            cmap_str : str, default = 'rainbow', color map to use for the streamplot

        Returns:
            fig : matplotlib figure
            ax : matplotlib axis
        """

        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig,ax = via_streamplot_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                 embedding=embedding,density_grid=density_grid, arrow_size=arrow_size,
                                 arrow_color=arrow_color,arrow_style=arrow_style,  max_length=max_length,
                                 linewidth=linewidth,min_mass = min_mass, cutoff_perc=cutoff_perc,
                                 scatter_size=scatter_size, scatter_alpha=scatter_alpha,marker_edgewidth=marker_edgewidth,
                                 density_stream=density_stream, smooth_transition=smooth_transition, smooth_grid=smooth_grid,
                                 color_scheme=color_scheme, add_outline_clusters=add_outline_clusters,
                                 cluster_outline_edgewidth = cluster_outline_edgewidth,gp_color = gp_color, bg_color=bg_color,
                                 dpi=dpi , title=title, b_bias=b_bias, n_neighbors_velocity_grid=n_neighbors_velocity_grid,
                                 other_labels=other_labels,use_sequentially_augmented=use_sequentially_augmented, cmap_str=cmap_str)
        return fig,ax

    def plot_trajectory_gams(self,clusters:str='',basis:str='',via_fine=None, idx=None,
                         title_str:str= "Pseudotime", draw_all_curves:bool=True, arrow_width_scale_factor:float=15.0,
                         scatter_size:float=50, scatter_alpha:float=0.5,figsize:tuple=(8,4),
                         linewidth:float=1.5, marker_edgewidth:float=1, cmap_pseudotime:str='viridis_r',dpi:int=80,
                         highlight_terminal_states:bool=True, use_maxout_edgelist:bool =False)->Tuple[matplotlib.figure.Figure,
                                                                                                 matplotlib.axes._axes.Axes,
                                                                                                 matplotlib.axes._axes.Axes]:
        """projects the graph based coarse trajectory onto a umap/tsne embedding

        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            via_fine : via object suggest to use via_object only unless you found that running via_fine gave better pathways
            idx : default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too
            title_str : title of figure
            draw_all_curves : if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways
            arrow_width_scale_factor : the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows
            scatter_size : size of the scatter points
            scatter_alpha : transparency of the scatter points
            linewidth : width of the lines
            marker_edgewidth : width of the outline around each scatter point
            cmap_pseudotime : color map to use for the pseudotime
            dpi : dpi of the figure
            highlight_terminal_states :  whether or not to highlight/distinguish the clusters which are detected as the terminal states by via

        Returns:
            fig : matplotlib figure
            ax1 : matplotlib axis
            ax2 : matplotlib axis

        """


        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig,ax1,ax2 = draw_trajectory_gams_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                            via_fine=via_fine, embedding=embedding, idx=idx,
                                            title_str=title_str, draw_all_curves=draw_all_curves, arrow_width_scale_factor=arrow_width_scale_factor,
                                            scatter_size=scatter_size, scatter_alpha=scatter_alpha,figsize=figsize,
                                            linewidth=linewidth, marker_edgewidth=marker_edgewidth, cmap_pseudotime=cmap_pseudotime,dpi=dpi,
                                            highlight_terminal_states=highlight_terminal_states, use_maxout_edgelist=use_maxout_edgelist)
        return fig,ax1,ax2

    def plot_lineage_probability(self,clusters:str='',basis:str='',via_fine=None, 
                                idx=None, figsize:tuple=(8,4),
                                cmap:str='plasma', dpi:int=80, scatter_size =None,
                                marker_lineages:list = [], fontsize:int=12)->Tuple[matplotlib.figure.Figure,
                                                                                   matplotlib.axes._axes.Axes]:
        """G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph

        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            via_fine : usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters
            idx : if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization
            figsize : size of the figure
            cmap : color map to use for the lineage probability
            dpi : dpi of the figure
            scatter_size : size of the scatter points
            marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
            fontsize : fontsize of the title

        Returns:
            fig : matplotlib figure
            axs : matplotlib axis
        """


        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig, axs = draw_sc_lineage_probability(via_object=self.model,via_fine=via_fine, embedding=embedding,figsize=figsize,
                                               idx=idx, cmap_name=cmap, dpi=dpi, scatter_size =scatter_size,
                                            marker_lineages = marker_lineages, fontsize=fontsize)
        fig.tight_layout()
        return fig, axs

    def plot_gene_trend(self,gene_list:list=None,figsize:tuple=(8,4),
                        magic_steps:int=3, spline_order:int=5, dpi:int=80,cmap:str='jet', 
                        marker_genes:list = [], linewidth:float = 2.0,
                        n_splines:int=10,  fontsize_:int=12, marker_lineages=[])->Tuple[matplotlib.figure.Figure,
                                                                                        matplotlib.axes._axes.Axes]:
        """plots the gene expression trend along the pseudotime

        Arguments:
            gene_list : list of genes to plot
            figsize : size of the figure
            magic_steps : number of magic steps to use for imputation
            spline_order : order of the spline to use for smoothing
            dpi : dpi of the figure
            cmap : color map to use for the gene expression
            marker_genes : Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp.
            linewidth : width of the lines
            n_splines : number of splines to use for smoothing
            fontsize_ : fontsize of the title
            marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).

        Returns:
            fig : matplotlib figure
            axs : matplotlib axis

        """

        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
        fig, axs=get_gene_expression_pyomic(self.model,df_magic,spline_order=spline_order,dpi=dpi,
                                   cmap=cmap, marker_genes=marker_genes, linewidth=linewidth,figsize=figsize,
                                   n_splines=n_splines,  fontsize_=fontsize_, marker_lineages=marker_lineages)
        fig.tight_layout()
        return fig, axs

    def plot_clustergraph(self,gene_list:list,arrow_head:float=0.1,figsize:tuple=(8,4),dpi=80,magic_steps=3,
                          edgeweight_scale:float=1.5, cmap=None, label_=True,)->Tuple[matplotlib.figure.Figure,
                                                                                        matplotlib.axes._axes.Axes]:
        """plot the gene in pie chart for each cluster

        Arguments:
            gene_list : list of genes to plot
            arrow_head : size of the arrow head
            figsize : size of the figure
            edgeweight_scale : scale of the edge weight
            cmap : color map to use for the gene expression
            label_ : whether to label the nodes

        Returns:
            fig : matplotlib figure
            axs : matplotlib axis
        """
        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
        df_magic['parc'] = self.model.labels
        df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
        fig, axs = draw_clustergraph_pyomic(via_object=self.model, type_data='gene', gene_exp=df_magic_cluster, 
                                    gene_list=gene_list, arrow_head=arrow_head,figsize=figsize,
                                    edgeweight_scale=edgeweight_scale, cmap=cmap, label_=label_,dpi=dpi)
        fig.tight_layout()
        return fig,axs

    def plot_gene_trend_heatmap(self,gene_list:list,marker_lineages:list = [], 
                             fontsize:int=8,cmap:str='viridis', normalize:bool=True, ytick_labelrotation:int = 0, 
                             figsize:tuple=(2,4))->Tuple[matplotlib.figure.Figure,
                                                                    list]:
        """Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages

        Arguments:
            gene_list : list of genes to plot
            marker_lineages : list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates
            fontsize : int default = 8
            cmap : str default = 'viridis'
            normalize : bool = True
            ytick_labelrotation : int default = 0
            figsize : size of the figure

        Returns:
            fig : matplotlib figure
            axs : list of matplotlib axis       
        """

        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=3, gene_list=gene_list)
        df_magic['parc'] = self.model.labels
        df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
        fig,axs=plot_gene_trend_heatmaps_pyomic(via_object=self.model, df_gene_exp=df_magic, 
                                                cmap=cmap,fontsize=fontsize,normalize=normalize,
                                                ytick_labelrotation=ytick_labelrotation,figsize=figsize,
                                                marker_lineages=marker_lineages)
        fig.tight_layout()
        return fig,axs

__init__(adata, adata_key='X_pca', adata_ncomps=80, basis='X_umap', clusters='', dist_std_local=2, jac_std_global=0.15, labels=None, keep_all_local_dist='auto', too_big_factor=0.4, resolution_parameter=1.0, partition_type='ModularityVP', small_pop=10, jac_weighted_edges=True, knn=30, n_iter_leiden=5, random_seed=42, num_threads=-1, distance='l2', time_smallpop=15, super_cluster_labels=False, super_node_degree_list=False, super_terminal_cells=False, x_lazy=0.95, alpha_teleport=0.99, root_user=None, preserve_disconnected=True, dataset='', super_terminal_clusters=[], is_coarse=True, csr_full_graph='', csr_array_locally_pruned='', ig_full_graph='', full_neighbor_array='', full_distance_array='', df_annot=None, preserve_disconnected_after_pruning=False, secondary_annotations=None, pseudotime_threshold_TS=30, cluster_graph_pruning_std=0.15, visual_cluster_graph_pruning=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300, piegraph_arrow_head_width=0.1, piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges=2, via_coarse=None, velocity_matrix=None, gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo=None, CSM=None, edgebundle_pruning_twice=False, pca_loadings=None, time_series=False, time_series_labels=None, knn_sequential=10, knn_sequential_reverse=0, t_diff_step=1, single_cell_transition_matrix=None, embedding_type='via-mds', do_compute_embedding=False, color_dict=None, user_defined_terminal_cell=[], user_defined_terminal_group=[], do_gaussian_kernel_edgeweights=False, RW2_mode=False, working_dir_fp='/home/shobi/Trajectory/Datasets/')

Initialize a pyVIA object.

Parameters:

Name Type Description Default
adata anndata.AnnData

An AnnData object containing the scRNA-seq.

required
adata_key str

the key of the AnnData in obsm to perform VIA on. default: 'X_pca'

'X_pca'
adata_ncomps int

the number of components to use from the AnnData in obsm to perform VIA on. default: 80

80
basis str

the key of the AnnData in obsm to use as the basis for the embedding. default: 'X_umap'

'X_umap'
clusters str

the clusters to use for the VIA analysis. default: ''

''
dist_std_local float

local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention

2
jac_std_global

(optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges

0.15
labels np.ndarray

default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations

None
keep_all_local_dist

default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed.

'auto'
too_big_factor float

(optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered

0.4
resolution_parameter float

(float) the resolution parameter for the Louvain algorithm.

1.0
partition_type str

(str, default: "ModularityVP") the partitioning algorithm to use.

'ModularityVP'
small_pop int

(int, default: 10) the number of cells to be considered in a small population.

10
jac_weighted_edges bool

(bool, default: True) whether to use weighted edges in the PARC clustering step.

True
knn int

(int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells.

30
n_iter_leiden int

(int) the number of iterations for the Leiden algorithm.

5
random_seed int

(int) the random seed to pass to the clustering algorithm.

42
num_threads

(int) the number of threads to use for the clustering algorithm.

-1
distance

(str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'.

'l2'
visual_cluster_graph_pruning float

(float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods.

0.15
cluster_graph_pruning_std float

(float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value.

0.15
time_smallpop

(max time to be allowed handling singletons) the maximum time allowed to handle singletons.

15
super_cluster_labels bool False
super_node_degree_list bool False
super_terminal_cells bool False
x_lazy float

(float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable.

0.95
alpha_teleport float

(float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation.

0.99
root_user

(list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group.

None
preserve_disconnected bool

bool (default = True) If you believe there may be disconnected trajectories then set this to False

True
dataset str

str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default)

''
embedding

ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates

required
velo_weight

float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity

0.5
neighboring_terminal_states_threshold

int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors

3
knn_sequential int

int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1)

10
knn_sequential_reverse int

int (default = 0) number of knn enforced from current to previous time point

0
t_diff_step int

int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained

1
is_coarse

bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False

True
via_coarse

VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object

None
df_annot

DataFrame (default None) used for the Mouse Organ data

None
preserve_disconnected_after_pruning bool

bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis

False
A_velo

ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus]

None
velocity_matrix

matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too.

None
gene_matrix

matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense())

None
time_series

if the data has time-series labels then set to True

False
time_series_labels list

list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering

None
pca_loadings

array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs

None
secondary_annotations list

None (default None)

None
edgebundle_pruning

float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges

None
edgebundle_pruning_twice

bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations

False
piegraph_arrow_head_width

float (default = 0.1) size of arrow heads in via cluster graph

0.1
piegraph_edgeweight_scalingfactor

(defaulf = 1.5) scaling factor for edge thickness in via cluster graph

1.5
max_visual_outgoing_edges int

int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node.

2
edgebundle_pruning

float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges

None
edgebundle_pruning_twice

bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations

False
pseudotime_threshold_TS int

int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range

30
num_mcmc_simulations

int (default = 1300) number of random walk simulations conducted

1300
embedding_type str

str (default = 'via-mds', other options are 'via-umap' and 'via-force'

'via-mds'
do_compute_embedding bool

bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True

False
do_gaussian_kernel_edgeweights bool

bool (default = False) Type of edgeweighting on the graph edges

False
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def __init__(self,adata:anndata.AnnData,adata_key:str='X_pca',adata_ncomps:int=80,basis:str='X_umap',
             clusters:str='',dist_std_local:float=2, jac_std_global=0.15, labels:np.ndarray=None,
             keep_all_local_dist='auto', too_big_factor:float=0.4, resolution_parameter:float=1.0, partition_type:str="ModularityVP", small_pop:int=10,
             jac_weighted_edges:bool=True, knn:int=30, n_iter_leiden:int=5, random_seed:int=42,
             num_threads=-1, distance='l2', time_smallpop=15,
             super_cluster_labels:bool=False,                 super_node_degree_list:bool=False, super_terminal_cells:bool=False, x_lazy:float=0.95, alpha_teleport:float=0.99,
             root_user=None, preserve_disconnected:bool=True, dataset:str='', super_terminal_clusters:list=[],
             is_coarse=True, csr_full_graph:np.ndarray='', csr_array_locally_pruned='', ig_full_graph='',
             full_neighbor_array='', full_distance_array='',  df_annot=None,
             preserve_disconnected_after_pruning:bool=False,
             secondary_annotations:list=None, pseudotime_threshold_TS:int=30, cluster_graph_pruning_std:float=0.15,
             visual_cluster_graph_pruning:float=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300,
             piegraph_arrow_head_width=0.1,
             piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges:int=2, via_coarse=None, velocity_matrix=None,
             gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo = None, CSM = None, edgebundle_pruning_twice=False, pca_loadings = None, time_series=False,
             time_series_labels:list=None, knn_sequential:int = 10, knn_sequential_reverse:int = 0,t_diff_step:int = 1,single_cell_transition_matrix = None,
             embedding_type:str='via-mds',do_compute_embedding:bool=False, color_dict:dict=None,user_defined_terminal_cell:list=[], user_defined_terminal_group:list=[],
             do_gaussian_kernel_edgeweights:bool=False,RW2_mode:bool=False,working_dir_fp:str ='/home/shobi/Trajectory/Datasets/') -> None:
    r"""
    Initialize a pyVIA object.

    Arguments:
        adata: An AnnData object containing the scRNA-seq.
        adata_key: the key of the AnnData in obsm to perform VIA on. default: 'X_pca'
        adata_ncomps: the number of components to use from the AnnData in obsm to perform VIA on. default: 80
        basis: the key of the AnnData in obsm to use as the basis for the embedding. default: 'X_umap'
        clusters: the clusters to use for the VIA analysis. default: ''
        dist_std_local: local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention
        jac_std_global: (optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges
        labels: default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations
        keep_all_local_dist: default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed.

        too_big_factor: (optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered
        resolution_parameter: (float) the resolution parameter for the Louvain algorithm.
        partition_type: (str, default: "ModularityVP") the partitioning algorithm to use.
        small_pop: (int, default: 10) the number of cells to be considered in a small population.
        jac_weighted_edges: (bool, default: True) whether to use weighted edges in the PARC clustering step.
        knn: (int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells.
        n_iter_leiden: (int) the number of iterations for the Leiden algorithm.
        random_seed: (int) the random seed to pass to the clustering algorithm.
        num_threads: (int) the number of threads to use for the clustering algorithm.
        distance: (str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'.
        visual_cluster_graph_pruning: (float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods.
        cluster_graph_pruning_std: (float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value.
        time_smallpop: (max time to be allowed handling singletons) the maximum time allowed to handle singletons.
        super_cluster_labels:
        super_node_degree_list:
        super_terminal_cells:
        x_lazy: (float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable.
        alpha_teleport: (float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation.
        root_user: (list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group.
        preserve_disconnected: bool (default = True) If you believe there may be disconnected trajectories then set this to False
        dataset: str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default)
        embedding: ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates
        velo_weight: float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity
        neighboring_terminal_states_threshold:int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors
        knn_sequential:int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1)
        knn_sequential_reverse: int (default = 0) number of knn enforced from current to previous time point
        t_diff_step: int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained
        is_coarse:bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False
        via_coarse: VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object
        df_annot: DataFrame (default None) used for the Mouse Organ data
        preserve_disconnected_after_pruning:bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis
        A_velo: ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus]
        velocity_matrix: matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too.
        gene_matrix: matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense())
        time_series: if the data has time-series labels then set to True
        time_series_labels:list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering
        pca_loadings: array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs
        secondary_annotations: None (default None)
        edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
        edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
        piegraph_arrow_head_width: float (default = 0.1) size of arrow heads in via cluster graph
        piegraph_edgeweight_scalingfactor: (defaulf = 1.5) scaling factor for edge thickness in via cluster graph
        max_visual_outgoing_edges: int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node.
        edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
        edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
        pseudotime_threshold_TS: int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range
        num_mcmc_simulations:int (default = 1300) number of random walk simulations conducted
        embedding_type: str (default = 'via-mds', other options are 'via-umap' and 'via-force'
        do_compute_embedding: bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True
        do_gaussian_kernel_edgeweights: bool (default = False) Type of edgeweighting on the graph edges


    """


    self.adata = adata
    #self.adata_key = adata_key
    data = adata.obsm[adata_key][:, 0:adata_ncomps]
    embedding=self.adata.obsm[basis]
    true_label=adata.obs[clusters]
    self.clusters=clusters
    self.basis=basis

    if root_user is not None:
         dataset='group'



    self.model=VIA(data=data,true_label=true_label,
             dist_std_local=dist_std_local,jac_std_global=jac_std_global,labels=labels,
             keep_all_local_dist=keep_all_local_dist,too_big_factor=too_big_factor,resolution_parameter=resolution_parameter,partition_type=partition_type,small_pop=small_pop,
             jac_weighted_edges=jac_weighted_edges,knn=knn,n_iter_leiden=n_iter_leiden,random_seed=random_seed,
             num_threads=num_threads,distance=distance,time_smallpop=time_smallpop,
             super_cluster_labels=super_cluster_labels,super_node_degree_list=super_node_degree_list,super_terminal_cells=super_terminal_cells,x_lazy=x_lazy,alpha_teleport=alpha_teleport,
             root_user=root_user,preserve_disconnected=preserve_disconnected,dataset=dataset,super_terminal_clusters=super_terminal_clusters,
             is_coarse=is_coarse,csr_full_graph=csr_full_graph,csr_array_locally_pruned=csr_array_locally_pruned,ig_full_graph=ig_full_graph,
             full_neighbor_array=full_neighbor_array,full_distance_array=full_distance_array,embedding=embedding,df_annot=df_annot,
             preserve_disconnected_after_pruning=preserve_disconnected_after_pruning,
             secondary_annotations=secondary_annotations,pseudotime_threshold_TS=pseudotime_threshold_TS,cluster_graph_pruning_std=cluster_graph_pruning_std,
             visual_cluster_graph_pruning=visual_cluster_graph_pruning,neighboring_terminal_states_threshold=neighboring_terminal_states_threshold,num_mcmc_simulations=num_mcmc_simulations,
             piegraph_arrow_head_width=piegraph_arrow_head_width,
             piegraph_edgeweight_scalingfactor=piegraph_edgeweight_scalingfactor,max_visual_outgoing_edges=max_visual_outgoing_edges,via_coarse=via_coarse,velocity_matrix=velocity_matrix,
             gene_matrix=gene_matrix,velo_weight=velo_weight,edgebundle_pruning=edgebundle_pruning,A_velo=A_velo,CSM=CSM,edgebundle_pruning_twice=edgebundle_pruning_twice,pca_loadings=pca_loadings,time_series=time_series,
             time_series_labels=time_series_labels,knn_sequential=knn_sequential,knn_sequential_reverse=knn_sequential_reverse,t_diff_step=t_diff_step,single_cell_transition_matrix=single_cell_transition_matrix,
             embedding_type=embedding_type,do_compute_embedding=do_compute_embedding,color_dict=color_dict,user_defined_terminal_cell=user_defined_terminal_cell,user_defined_terminal_group=user_defined_terminal_group,
             do_gaussian_kernel_edgeweights=do_gaussian_kernel_edgeweights,RW2_mode=RW2_mode,working_dir_fp=working_dir_fp
             )

run()

calculate the via graph and pseudotime

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def run(self):
    """calculate the via graph and pseudotime

    """
    self.model.run_VIA()

get_piechart_dict(label=0, clusters='')

Cluster composition graph

Parameters:

Name Type Description Default
label int

int (default=0) cluster label of pie chart

0
clusters str

the celltype you want interested

''

Returns:

Name Type Description
res_dict dict

cluster composition graph

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def get_piechart_dict(self,label:int=0,clusters:str='')->dict:
    """
    Cluster composition graph

    Arguments:
        label: int (default=0) cluster label of pie chart
        clusters: the celltype you want interested

    Returns:
        res_dict: cluster composition graph
    """
    if clusters=='':
        clusters=self.clusters
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    cluster_i_loc=np.where(np.asarray(self.model.labels) == label)[0]
    res_dict=dict(self.adata.obs.iloc[cluster_i_loc].value_counts(clusters))
    return res_dict

get_pseudotime(adata=None)

Extract the pseudotime of VIA

Parameters:

Name Type Description Default
adata

an adata object of you interested,if None, it will be added to self.adata.obs['pt_via']

None
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def get_pseudotime(self,adata=None):
    """
    Extract the pseudotime of VIA

    Arguments:
        adata: an adata object of you interested,if None, it will be added to `self.adata.obs['pt_via']`

    """

    print('...the pseudotime of VIA added to AnnData obs named `pt_via`')
    if adata is None:
        self.adata.obs['pt_via']=self.model.single_cell_pt_markov
    else:
        adata.obs['pt_via']=self.model.single_cell_pt_markov

plot_piechart_graph(clusters='', type_data='pt', gene_exp=[], title='', cmap=None, ax_text=True, figsize=(8, 4), dpi=150, headwidth_arrow=0.1, alpha_edge=0.4, linewidth_edge=2, edge_color='darkblue', reference=None, show_legend=True, pie_size_scale=0.8, fontsize=8)

plot two subplots with a clustergraph level representation of the viagraph showing true-label composition (lhs) and pseudotime/gene expression (rhs)

Parameters:

Name Type Description Default
clusters

column name of the adata.obs dataframe that contains the cluster labels

''
type_data

string default 'pt' for pseudotime colored nodes. or 'gene'

'pt'
gene_exp

list of values (column of dataframe) corresponding to feature or gene expression to be used to color nodes at CLUSTER level

[]
title

string

''
cmap

default None. automatically chooses coolwarm for gene expression or viridis_r for pseudotime

None
ax_text

Bool default= True. Annotates each node with cluster number and population of membership

True
dpi

int default = 150

150
headwidth_bundle

default = 0.1. width of arrowhead used to directed edges

required
reference

None or list. list of categorical (str) labels for cluster composition of the piecharts (LHS subplot) length = n_samples.

None
pie_size_scale

float default=0.8 scaling factor of the piechart nodes

0.8
fontsize

float default=8. fontsize of the text in the piecharts

8
figsize

tuple default=(8,4). size of the figure

(8, 4)

Returns:

Name Type Description
fig matplotlib.figure.Figure

Returns matplotlib figure with two axes that plot the clustergraph using edge bundling

ax matplotlib.axes._axes.Axes

left axis shows the clustergraph with each node colored by annotated ground truth membership.

ax1 matplotlib.axes._axes.Axes

right axis shows the same clustergraph with each node colored by the pseudotime or gene expression

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_piechart_graph(self,clusters:str='', type_data='pt',
                            gene_exp:list=[], title='', 
                            cmap:str=None, ax_text=True, figsize:tuple=(8,4),
                            dpi=150,headwidth_arrow = 0.1, 
                            alpha_edge=0.4, linewidth_edge=2, 
                            edge_color='darkblue',reference=None, 
                            show_legend:bool=True, pie_size_scale:float=0.8, fontsize:float=8)->Tuple[matplotlib.figure.Figure,
                                                                                                      matplotlib.axes._axes.Axes,
                                                                                                      matplotlib.axes._axes.Axes]:
    """plot two subplots with a clustergraph level representation of the viagraph showing true-label composition (lhs) and pseudotime/gene expression (rhs)

    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        type_data : string  default 'pt' for pseudotime colored nodes. or 'gene'
        gene_exp : list of values (column of dataframe) corresponding to feature or gene expression to be used to color nodes at CLUSTER level
        title : string
        cmap : default None. automatically chooses coolwarm for gene expression or viridis_r for pseudotime
        ax_text : Bool default= True. Annotates each node with cluster number and population of membership
        dpi : int default = 150
        headwidth_bundle : default = 0.1. width of arrowhead used to directed edges
        reference : None or list. list of categorical (str) labels for cluster composition of the piecharts (LHS subplot) length = n_samples.
        pie_size_scale : float default=0.8 scaling factor of the piechart nodes
        fontsize : float default=8. fontsize of the text in the piecharts
        figsize : tuple default=(8,4). size of the figure

    Returns:
        fig: Returns matplotlib figure with two axes that plot the clustergraph using edge bundling
        ax: left axis shows the clustergraph with each node colored by annotated ground truth membership.
        ax1: right axis shows the same clustergraph with each node colored by the pseudotime or gene expression
    """


    if clusters=='':
        clusters=self.clusters
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    fig, ax, ax1 = draw_piechart_graph_pyomic(clusters=clusters,adata=self.adata,
                               via_object=self.model, type_data=type_data,
                            gene_exp=gene_exp, title=title, 
                            cmap=cmap, ax_text=ax_text,figsize=figsize,
                            dpi=dpi,headwidth_arrow = headwidth_arrow,
                            alpha_edge=alpha_edge, linewidth_edge=linewidth_edge,
                            edge_color=edge_color,reference=reference,
                            show_legend=show_legend, pie_size_scale=pie_size_scale, fontsize=fontsize)
    return fig, ax, ax1

plot_stream(clusters='', basis='', density_grid=0.5, arrow_size=0.7, arrow_color='k', arrow_style='-|>', max_length=4, linewidth=1, min_mass=1, cutoff_perc=5, scatter_size=500, scatter_alpha=0.5, marker_edgewidth=0.1, density_stream=2, smooth_transition=1, smooth_grid=0.5, color_scheme='annotation', add_outline_clusters=False, cluster_outline_edgewidth=0.001, gp_color='white', bg_color='black', dpi=80, title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None, other_labels=None, use_sequentially_augmented=False, cmap_str='rainbow')

Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory

Parameters:

Name Type Description Default
clusters

column name of the adata.obs dataframe that contains the cluster labels

''
basis

str, default = 'X_umap', which to use for the embedding

''
density_grid

float, default = 0.5, density of the grid on which to project the directionality of cells

0.5
arrow_size

float, default = 0.7, size of the arrows in the streamplot

0.7
arrow_color

str, default = 'k', color of the arrows in the streamplot

'k'
arrow_style

str, default = "-|>", style of the arrows in the streamplot

'-|>'
max_length

int, default = 4, maximum length of the arrows in the streamplot

4
linewidth

float, default = 1, width of lines in streamplot

1
min_mass

float, default = 1, minimum mass of the arrows in the streamplot

1
cutoff_perc

int, default = 5, cutoff percentage of the arrows in the streamplot

5
scatter_size

int, default = 500, size of scatter points

500
scatter_alpha

float, default = 0.5, transpsarency of scatter points

0.5
marker_edgewidth

float, default = 0.1, width of outline arround each scatter point

0.1
density_stream

int, default = 2, density of the streamplot

2
smooth_transition

int, default = 1, smoothness of the transition between the streamplot and the scatter points

1
smooth_grid

float, default = 0.5, smoothness of the grid on which to project the directionality of cells

0.5
color_scheme

str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters)

'annotation'
add_outline_clusters

bool, default = False, whether to add an outline to the clusters

False
cluster_outline_edgewidth

float, default = 0.001, width of the outline around the clusters

0.001
gp_color

str, default = 'white', color of the grid points

'white'
bg_color

str, default = 'black', color of the background

'black'
dpi

int, default = 80, dpi of the figure

80
title

str, default = 'Streamplot', title of the figure

'Streamplot'
b_bias

int, default = 20, higher value makes the forward bias of pseudotime stronger

20
n_neighbors_velocity_grid

int, default = None, number of neighbors to use for the velocity grid

None
other_labels

list, default = None, list of other labels to plot in the streamplot

None
use_sequentially_augmented

bool, default = False, whether to use the sequentially augmented data

False
cmap_str

str, default = 'rainbow', color map to use for the streamplot

'rainbow'

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

ax matplotlib.axes._axes.Axes

matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_stream(self,clusters:str='',basis:str='',
               density_grid:float=0.5, arrow_size:float=0.7, arrow_color:str = 'k',
               arrow_style="-|>",  max_length:int=4, linewidth:float=1,min_mass = 1, cutoff_perc:int = 5,
               scatter_size:int=500, scatter_alpha:float=0.5,marker_edgewidth:float=0.1,
               density_stream:int = 2, smooth_transition:int=1, smooth_grid:float=0.5,
               color_scheme:str = 'annotation', add_outline_clusters:bool=False,
               cluster_outline_edgewidth = 0.001,gp_color = 'white', bg_color='black' ,
               dpi=80 , title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None,
               other_labels:list = None,use_sequentially_augmented:bool=False, cmap_str:str='rainbow')->Tuple[matplotlib.figure.Figure,
                                                                                                      matplotlib.axes._axes.Axes]:
    """Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory

    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        density_grid : float, default = 0.5, density of the grid on which to project the directionality of cells
        arrow_size : float, default = 0.7, size of the arrows in the streamplot
        arrow_color : str, default = 'k', color of the arrows in the streamplot
        arrow_style : str, default = "-|>", style of the arrows in the streamplot
        max_length : int, default = 4, maximum length of the arrows in the streamplot
        linewidth : float, default = 1, width of  lines in streamplot
        min_mass : float, default = 1, minimum mass of the arrows in the streamplot
        cutoff_perc : int, default = 5, cutoff percentage of the arrows in the streamplot
        scatter_size : int, default = 500, size of scatter points
        scatter_alpha : float, default = 0.5, transpsarency of scatter points
        marker_edgewidth : float, default = 0.1, width of outline arround each scatter point
        density_stream : int, default = 2, density of the streamplot
        smooth_transition : int, default = 1, smoothness of the transition between the streamplot and the scatter points
        smooth_grid : float, default = 0.5, smoothness of the grid on which to project the directionality of cells
        color_scheme : str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters)
        add_outline_clusters : bool, default = False, whether to add an outline to the clusters
        cluster_outline_edgewidth : float, default = 0.001, width of the outline around the clusters
        gp_color : str, default = 'white', color of the grid points
        bg_color : str, default = 'black', color of the background
        dpi : int, default = 80, dpi of the figure
        title : str, default = 'Streamplot', title of the figure
        b_bias : int, default = 20, higher value makes the forward bias of pseudotime stronger
        n_neighbors_velocity_grid : int, default = None, number of neighbors to use for the velocity grid
        other_labels : list, default = None, list of other labels to plot in the streamplot
        use_sequentially_augmented : bool, default = False, whether to use the sequentially augmented data
        cmap_str : str, default = 'rainbow', color map to use for the streamplot

    Returns:
        fig : matplotlib figure
        ax : matplotlib axis
    """

    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig,ax = via_streamplot_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                             embedding=embedding,density_grid=density_grid, arrow_size=arrow_size,
                             arrow_color=arrow_color,arrow_style=arrow_style,  max_length=max_length,
                             linewidth=linewidth,min_mass = min_mass, cutoff_perc=cutoff_perc,
                             scatter_size=scatter_size, scatter_alpha=scatter_alpha,marker_edgewidth=marker_edgewidth,
                             density_stream=density_stream, smooth_transition=smooth_transition, smooth_grid=smooth_grid,
                             color_scheme=color_scheme, add_outline_clusters=add_outline_clusters,
                             cluster_outline_edgewidth = cluster_outline_edgewidth,gp_color = gp_color, bg_color=bg_color,
                             dpi=dpi , title=title, b_bias=b_bias, n_neighbors_velocity_grid=n_neighbors_velocity_grid,
                             other_labels=other_labels,use_sequentially_augmented=use_sequentially_augmented, cmap_str=cmap_str)
    return fig,ax

plot_trajectory_gams(clusters='', basis='', via_fine=None, idx=None, title_str='Pseudotime', draw_all_curves=True, arrow_width_scale_factor=15.0, scatter_size=50, scatter_alpha=0.5, figsize=(8, 4), linewidth=1.5, marker_edgewidth=1, cmap_pseudotime='viridis_r', dpi=80, highlight_terminal_states=True, use_maxout_edgelist=False)

projects the graph based coarse trajectory onto a umap/tsne embedding

Parameters:

Name Type Description Default
clusters

column name of the adata.obs dataframe that contains the cluster labels

''
basis

str, default = 'X_umap', which to use for the embedding

''
via_fine

via object suggest to use via_object only unless you found that running via_fine gave better pathways

None
idx

default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too

None
title_str

title of figure

'Pseudotime'
draw_all_curves

if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways

True
arrow_width_scale_factor

the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows

15.0
scatter_size

size of the scatter points

50
scatter_alpha

transparency of the scatter points

0.5
linewidth

width of the lines

1.5
marker_edgewidth

width of the outline around each scatter point

1
cmap_pseudotime

color map to use for the pseudotime

'viridis_r'
dpi

dpi of the figure

80
highlight_terminal_states

whether or not to highlight/distinguish the clusters which are detected as the terminal states by via

True

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

ax1 matplotlib.axes._axes.Axes

matplotlib axis

ax2 matplotlib.axes._axes.Axes

matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_trajectory_gams(self,clusters:str='',basis:str='',via_fine=None, idx=None,
                     title_str:str= "Pseudotime", draw_all_curves:bool=True, arrow_width_scale_factor:float=15.0,
                     scatter_size:float=50, scatter_alpha:float=0.5,figsize:tuple=(8,4),
                     linewidth:float=1.5, marker_edgewidth:float=1, cmap_pseudotime:str='viridis_r',dpi:int=80,
                     highlight_terminal_states:bool=True, use_maxout_edgelist:bool =False)->Tuple[matplotlib.figure.Figure,
                                                                                             matplotlib.axes._axes.Axes,
                                                                                             matplotlib.axes._axes.Axes]:
    """projects the graph based coarse trajectory onto a umap/tsne embedding

    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        via_fine : via object suggest to use via_object only unless you found that running via_fine gave better pathways
        idx : default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too
        title_str : title of figure
        draw_all_curves : if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways
        arrow_width_scale_factor : the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows
        scatter_size : size of the scatter points
        scatter_alpha : transparency of the scatter points
        linewidth : width of the lines
        marker_edgewidth : width of the outline around each scatter point
        cmap_pseudotime : color map to use for the pseudotime
        dpi : dpi of the figure
        highlight_terminal_states :  whether or not to highlight/distinguish the clusters which are detected as the terminal states by via

    Returns:
        fig : matplotlib figure
        ax1 : matplotlib axis
        ax2 : matplotlib axis

    """


    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig,ax1,ax2 = draw_trajectory_gams_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                        via_fine=via_fine, embedding=embedding, idx=idx,
                                        title_str=title_str, draw_all_curves=draw_all_curves, arrow_width_scale_factor=arrow_width_scale_factor,
                                        scatter_size=scatter_size, scatter_alpha=scatter_alpha,figsize=figsize,
                                        linewidth=linewidth, marker_edgewidth=marker_edgewidth, cmap_pseudotime=cmap_pseudotime,dpi=dpi,
                                        highlight_terminal_states=highlight_terminal_states, use_maxout_edgelist=use_maxout_edgelist)
    return fig,ax1,ax2

plot_lineage_probability(clusters='', basis='', via_fine=None, idx=None, figsize=(8, 4), cmap='plasma', dpi=80, scatter_size=None, marker_lineages=[], fontsize=12)

G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph

Parameters:

Name Type Description Default
clusters

column name of the adata.obs dataframe that contains the cluster labels

''
basis

str, default = 'X_umap', which to use for the embedding

''
via_fine

usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters

None
idx

if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization

None
figsize

size of the figure

(8, 4)
cmap

color map to use for the lineage probability

'plasma'
dpi

dpi of the figure

80
scatter_size

size of the scatter points

None
marker_lineages

Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).

[]
fontsize

fontsize of the title

12

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

axs matplotlib.axes._axes.Axes

matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_lineage_probability(self,clusters:str='',basis:str='',via_fine=None, 
                            idx=None, figsize:tuple=(8,4),
                            cmap:str='plasma', dpi:int=80, scatter_size =None,
                            marker_lineages:list = [], fontsize:int=12)->Tuple[matplotlib.figure.Figure,
                                                                               matplotlib.axes._axes.Axes]:
    """G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph

    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        via_fine : usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters
        idx : if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization
        figsize : size of the figure
        cmap : color map to use for the lineage probability
        dpi : dpi of the figure
        scatter_size : size of the scatter points
        marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
        fontsize : fontsize of the title

    Returns:
        fig : matplotlib figure
        axs : matplotlib axis
    """


    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig, axs = draw_sc_lineage_probability(via_object=self.model,via_fine=via_fine, embedding=embedding,figsize=figsize,
                                           idx=idx, cmap_name=cmap, dpi=dpi, scatter_size =scatter_size,
                                        marker_lineages = marker_lineages, fontsize=fontsize)
    fig.tight_layout()
    return fig, axs

plot_gene_trend(gene_list=None, figsize=(8, 4), magic_steps=3, spline_order=5, dpi=80, cmap='jet', marker_genes=[], linewidth=2.0, n_splines=10, fontsize_=12, marker_lineages=[])

plots the gene expression trend along the pseudotime

Parameters:

Name Type Description Default
gene_list

list of genes to plot

None
figsize

size of the figure

(8, 4)
magic_steps

number of magic steps to use for imputation

3
spline_order

order of the spline to use for smoothing

5
dpi

dpi of the figure

80
cmap

color map to use for the gene expression

'jet'
marker_genes

Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp.

[]
linewidth

width of the lines

2.0
n_splines

number of splines to use for smoothing

10
fontsize_

fontsize of the title

12
marker_lineages

Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).

[]

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

axs matplotlib.axes._axes.Axes

matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_gene_trend(self,gene_list:list=None,figsize:tuple=(8,4),
                    magic_steps:int=3, spline_order:int=5, dpi:int=80,cmap:str='jet', 
                    marker_genes:list = [], linewidth:float = 2.0,
                    n_splines:int=10,  fontsize_:int=12, marker_lineages=[])->Tuple[matplotlib.figure.Figure,
                                                                                    matplotlib.axes._axes.Axes]:
    """plots the gene expression trend along the pseudotime

    Arguments:
        gene_list : list of genes to plot
        figsize : size of the figure
        magic_steps : number of magic steps to use for imputation
        spline_order : order of the spline to use for smoothing
        dpi : dpi of the figure
        cmap : color map to use for the gene expression
        marker_genes : Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp.
        linewidth : width of the lines
        n_splines : number of splines to use for smoothing
        fontsize_ : fontsize of the title
        marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).

    Returns:
        fig : matplotlib figure
        axs : matplotlib axis

    """

    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
    fig, axs=get_gene_expression_pyomic(self.model,df_magic,spline_order=spline_order,dpi=dpi,
                               cmap=cmap, marker_genes=marker_genes, linewidth=linewidth,figsize=figsize,
                               n_splines=n_splines,  fontsize_=fontsize_, marker_lineages=marker_lineages)
    fig.tight_layout()
    return fig, axs

plot_gene_trend_heatmap(gene_list, marker_lineages=[], fontsize=8, cmap='viridis', normalize=True, ytick_labelrotation=0, figsize=(2, 4))

Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages

Parameters:

Name Type Description Default
gene_list

list of genes to plot

required
marker_lineages

list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates

[]
fontsize

int default = 8

8
cmap

str default = 'viridis'

'viridis'
normalize

bool = True

True
ytick_labelrotation

int default = 0

0
figsize

size of the figure

(2, 4)

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

axs list

list of matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_gene_trend_heatmap(self,gene_list:list,marker_lineages:list = [], 
                         fontsize:int=8,cmap:str='viridis', normalize:bool=True, ytick_labelrotation:int = 0, 
                         figsize:tuple=(2,4))->Tuple[matplotlib.figure.Figure,
                                                                list]:
    """Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages

    Arguments:
        gene_list : list of genes to plot
        marker_lineages : list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates
        fontsize : int default = 8
        cmap : str default = 'viridis'
        normalize : bool = True
        ytick_labelrotation : int default = 0
        figsize : size of the figure

    Returns:
        fig : matplotlib figure
        axs : list of matplotlib axis       
    """

    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=3, gene_list=gene_list)
    df_magic['parc'] = self.model.labels
    df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
    fig,axs=plot_gene_trend_heatmaps_pyomic(via_object=self.model, df_gene_exp=df_magic, 
                                            cmap=cmap,fontsize=fontsize,normalize=normalize,
                                            ytick_labelrotation=ytick_labelrotation,figsize=figsize,
                                            marker_lineages=marker_lineages)
    fig.tight_layout()
    return fig,axs

plot_clustergraph(gene_list, arrow_head=0.1, figsize=(8, 4), dpi=80, magic_steps=3, edgeweight_scale=1.5, cmap=None, label_=True)

plot the gene in pie chart for each cluster

Parameters:

Name Type Description Default
gene_list

list of genes to plot

required
arrow_head

size of the arrow head

0.1
figsize

size of the figure

(8, 4)
edgeweight_scale

scale of the edge weight

1.5
cmap

color map to use for the gene expression

None
label_

whether to label the nodes

True

Returns:

Name Type Description
fig matplotlib.figure.Figure

matplotlib figure

axs matplotlib.axes._axes.Axes

matplotlib axis

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_via.py
def plot_clustergraph(self,gene_list:list,arrow_head:float=0.1,figsize:tuple=(8,4),dpi=80,magic_steps=3,
                      edgeweight_scale:float=1.5, cmap=None, label_=True,)->Tuple[matplotlib.figure.Figure,
                                                                                    matplotlib.axes._axes.Axes]:
    """plot the gene in pie chart for each cluster

    Arguments:
        gene_list : list of genes to plot
        arrow_head : size of the arrow head
        figsize : size of the figure
        edgeweight_scale : scale of the edge weight
        cmap : color map to use for the gene expression
        label_ : whether to label the nodes

    Returns:
        fig : matplotlib figure
        axs : matplotlib axis
    """
    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
    df_magic['parc'] = self.model.labels
    df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
    fig, axs = draw_clustergraph_pyomic(via_object=self.model, type_data='gene', gene_exp=df_magic_cluster, 
                                gene_list=gene_list, arrow_head=arrow_head,figsize=figsize,
                                edgeweight_scale=edgeweight_scale, cmap=cmap, label_=label_,dpi=dpi)
    fig.tight_layout()
    return fig,axs