Api via
omicverse.single.pyVIA
¶
        Bases: object
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
class pyVIA(object):
    def __init__(self,adata:anndata.AnnData,adata_key:str='X_pca',adata_ncomps:int=80,basis:str='X_umap',
                 clusters:str='',dist_std_local:float=2, jac_std_global=0.15, labels:np.ndarray=None,
                 keep_all_local_dist='auto', too_big_factor:float=0.4, resolution_parameter:float=1.0, partition_type:str="ModularityVP", small_pop:int=10,
                 jac_weighted_edges:bool=True, knn:int=30, n_iter_leiden:int=5, random_seed:int=42,
                 num_threads=-1, distance='l2', time_smallpop=15,
                 super_cluster_labels:bool=False,                 super_node_degree_list:bool=False, super_terminal_cells:bool=False, x_lazy:float=0.95, alpha_teleport:float=0.99,
                 root_user=None, preserve_disconnected:bool=True, dataset:str='', super_terminal_clusters:list=[],
                 is_coarse=True, csr_full_graph:np.ndarray='', csr_array_locally_pruned='', ig_full_graph='',
                 full_neighbor_array='', full_distance_array='',  df_annot=None,
                 preserve_disconnected_after_pruning:bool=False,
                 secondary_annotations:list=None, pseudotime_threshold_TS:int=30, cluster_graph_pruning_std:float=0.15,
                 visual_cluster_graph_pruning:float=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300,
                 piegraph_arrow_head_width=0.1,
                 piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges:int=2, via_coarse=None, velocity_matrix=None,
                 gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo = None, CSM = None, edgebundle_pruning_twice=False, pca_loadings = None, time_series=False,
                 time_series_labels:list=None, knn_sequential:int = 10, knn_sequential_reverse:int = 0,t_diff_step:int = 1,single_cell_transition_matrix = None,
                 embedding_type:str='via-mds',do_compute_embedding:bool=False, color_dict:dict=None,user_defined_terminal_cell:list=[], user_defined_terminal_group:list=[],
                 do_gaussian_kernel_edgeweights:bool=False,RW2_mode:bool=False,working_dir_fp:str ='/home/shobi/Trajectory/Datasets/') -> None:
        r"""Initialize a pyVIA object for trajectory inference.
        Arguments:
            adata: An AnnData object containing the single-cell RNA-seq data
            adata_key (str): The key of the AnnData in obsm to perform VIA on (default: 'X_pca')
            adata_ncomps (int): The number of components to use from the AnnData in obsm (default: 80)
            basis (str): The key of the AnnData in obsm to use as the basis for embedding (default: 'X_umap')
            clusters (str): The clusters to use for the VIA analysis (default: '')
            dist_std_local: local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention
            jac_std_global: (optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges
            labels: default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations
            keep_all_local_dist: default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed.
            too_big_factor: (optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered
            resolution_parameter: (float) the resolution parameter for the Louvain algorithm.
            partition_type: (str, default: "ModularityVP") the partitioning algorithm to use.
            small_pop: (int, default: 10) the number of cells to be considered in a small population.
            jac_weighted_edges: (bool, default: True) whether to use weighted edges in the PARC clustering step.
            knn: (int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells.
            n_iter_leiden: (int) the number of iterations for the Leiden algorithm.
            random_seed: (int) the random seed to pass to the clustering algorithm.
            num_threads: (int) the number of threads to use for the clustering algorithm.
            distance: (str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'.
            visual_cluster_graph_pruning: (float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods.
            cluster_graph_pruning_std: (float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value.
            time_smallpop: (max time to be allowed handling singletons) the maximum time allowed to handle singletons.
            super_cluster_labels:
            super_node_degree_list:
            super_terminal_cells:
            x_lazy: (float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable.
            alpha_teleport: (float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation.
            root_user: (list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group.
            preserve_disconnected: bool (default = True) If you believe there may be disconnected trajectories then set this to False
            dataset: str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default)
            embedding: ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates
            velo_weight: float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity
            neighboring_terminal_states_threshold:int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors
            knn_sequential:int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1)
            knn_sequential_reverse: int (default = 0) number of knn enforced from current to previous time point
            t_diff_step: int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained
            is_coarse:bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False
            via_coarse: VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object
            df_annot: DataFrame (default None) used for the Mouse Organ data
            preserve_disconnected_after_pruning:bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis
            A_velo: ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus]
            velocity_matrix: matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too.
            gene_matrix: matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense())
            time_series: if the data has time-series labels then set to True
            time_series_labels:list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering
            pca_loadings: array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs
            secondary_annotations: None (default None)
            edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
            edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
            piegraph_arrow_head_width: float (default = 0.1) size of arrow heads in via cluster graph
            piegraph_edgeweight_scalingfactor: (defaulf = 1.5) scaling factor for edge thickness in via cluster graph
            max_visual_outgoing_edges: int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node.
            edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
            edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
            pseudotime_threshold_TS: int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range
            num_mcmc_simulations:int (default = 1300) number of random walk simulations conducted
            embedding_type: str (default = 'via-mds', other options are 'via-umap' and 'via-force'
            do_compute_embedding: bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True
            do_gaussian_kernel_edgeweights: bool (default = False) Type of edgeweighting on the graph edges
        """
        self.adata = adata
        #self.adata_key = adata_key
        data = adata.obsm[adata_key][:, 0:adata_ncomps]
        embedding=self.adata.obsm[basis]
        true_label=adata.obs[clusters]
        self.clusters=clusters
        self.basis=basis
        if root_user is not None:
             dataset='group'
        self.model=VIA(data=data,true_label=true_label,
                 dist_std_local=dist_std_local,jac_std_global=jac_std_global,labels=labels,
                 keep_all_local_dist=keep_all_local_dist,too_big_factor=too_big_factor,resolution_parameter=resolution_parameter,partition_type=partition_type,small_pop=small_pop,
                 jac_weighted_edges=jac_weighted_edges,knn=knn,n_iter_leiden=n_iter_leiden,random_seed=random_seed,
                 num_threads=num_threads,distance=distance,time_smallpop=time_smallpop,
                 super_cluster_labels=super_cluster_labels,super_node_degree_list=super_node_degree_list,super_terminal_cells=super_terminal_cells,x_lazy=x_lazy,alpha_teleport=alpha_teleport,
                 root_user=root_user,preserve_disconnected=preserve_disconnected,dataset=dataset,super_terminal_clusters=super_terminal_clusters,
                 is_coarse=is_coarse,csr_full_graph=csr_full_graph,csr_array_locally_pruned=csr_array_locally_pruned,ig_full_graph=ig_full_graph,
                 full_neighbor_array=full_neighbor_array,full_distance_array=full_distance_array,embedding=embedding,df_annot=df_annot,
                 preserve_disconnected_after_pruning=preserve_disconnected_after_pruning,
                 secondary_annotations=secondary_annotations,pseudotime_threshold_TS=pseudotime_threshold_TS,cluster_graph_pruning_std=cluster_graph_pruning_std,
                 visual_cluster_graph_pruning=visual_cluster_graph_pruning,neighboring_terminal_states_threshold=neighboring_terminal_states_threshold,num_mcmc_simulations=num_mcmc_simulations,
                 piegraph_arrow_head_width=piegraph_arrow_head_width,
                 piegraph_edgeweight_scalingfactor=piegraph_edgeweight_scalingfactor,max_visual_outgoing_edges=max_visual_outgoing_edges,via_coarse=via_coarse,velocity_matrix=velocity_matrix,
                 gene_matrix=gene_matrix,velo_weight=velo_weight,edgebundle_pruning=edgebundle_pruning,A_velo=A_velo,CSM=CSM,edgebundle_pruning_twice=edgebundle_pruning_twice,pca_loadings=pca_loadings,time_series=time_series,
                 time_series_labels=time_series_labels,knn_sequential=knn_sequential,knn_sequential_reverse=knn_sequential_reverse,t_diff_step=t_diff_step,single_cell_transition_matrix=single_cell_transition_matrix,
                 embedding_type=embedding_type,do_compute_embedding=do_compute_embedding,color_dict=color_dict,user_defined_terminal_cell=user_defined_terminal_cell,user_defined_terminal_group=user_defined_terminal_group,
                 do_gaussian_kernel_edgeweights=do_gaussian_kernel_edgeweights,RW2_mode=RW2_mode,working_dir_fp=working_dir_fp
                 )
    def run(self):
        r"""Calculate the VIA graph and pseudotime.
        This method runs the main VIA algorithm to construct the trajectory graph
        and compute pseudotime for each cell.
        """
        self.model.run_VIA()
        add_reference(self.adata,'VIA','trajectory inference with VIA')
    def get_piechart_dict(self,label:int=0,clusters:str='')->dict:
        r"""Get cluster composition dictionary for pie chart visualization.
        Arguments:
            label (int): Cluster label of pie chart (default: 0)
            clusters (str): The celltype column name of interest (default: '')
        Returns:
            dict: Cluster composition dictionary for visualization
        """
        if clusters=='':
            clusters=self.clusters
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        cluster_i_loc=np.where(np.asarray(self.model.labels) == label)[0]
        res_dict=dict(self.adata.obs.iloc[cluster_i_loc].value_counts(clusters))
        return res_dict
    def get_pseudotime(self,adata=None):
        r"""Extract the pseudotime values computed by VIA.
        Arguments:
            adata: An AnnData object to add pseudotime to (default: None)
                  If None, pseudotime will be added to self.adata.obs['pt_via']
        """
        print('...the pseudotime of VIA added to AnnData obs named `pt_via`')
        if adata is None:
            self.adata.obs['pt_via']=self.model.single_cell_pt_markov
        else:
            adata.obs['pt_via']=self.model.single_cell_pt_markov
    def plot_piechart_graph(self,clusters:str='', type_data='pt',
                                gene_exp:list=[], title='', 
                                cmap:str=None, ax_text=True, figsize:tuple=(8,4),
                                dpi=150,headwidth_arrow = 0.1, 
                                alpha_edge=0.4, linewidth_edge=2, 
                                edge_color='darkblue',reference=None, 
                                show_legend:bool=True, pie_size_scale:float=0.8, fontsize:float=8)->Tuple[matplotlib.figure.Figure,
                                                                                                          matplotlib.axes._axes.Axes,
                                                                                                          matplotlib.axes._axes.Axes]:
        r"""Plot two subplots with clustergraph representation showing cluster composition and pseudotime/gene expression.
        Arguments:
            clusters (str): Column name of the adata.obs dataframe containing cluster labels (default: '')
            type_data (str): Type of data for coloring nodes - 'pt' for pseudotime or 'gene' (default: 'pt')
            gene_exp (list): List of values for gene expression to color nodes at cluster level (default: [])
            title (str): Title for the plot (default: '')
            cmap (str): Colormap - automatically chooses coolwarm for gene or viridis_r for pseudotime (default: None)
            ax_text (bool): Whether to annotate nodes with cluster number and population (default: True)
            figsize (tuple): Figure size (default: (8,4))
            dpi (int): DPI for the figure (default: 150)
            headwidth_arrow (float): Width of arrowhead for directed edges (default: 0.1)
            alpha_edge (float): Transparency of edges (default: 0.4)
            linewidth_edge (float): Width of edges (default: 2)
            edge_color (str): Color of edges (default: 'darkblue')
            reference: List of categorical labels for cluster composition (default: None)
            show_legend (bool): Whether to show legend (default: True)
            pie_size_scale (float): Scaling factor of the piechart nodes (default: 0.8)
            fontsize (float): Font size of text in piecharts (default: 8)
        Returns:
            tuple: (fig, ax, ax1) where fig is matplotlib figure, ax is left axis with cluster composition, ax1 is right axis with pseudotime/gene expression
        """
        if clusters=='':
            clusters=self.clusters
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        fig, ax, ax1 = draw_piechart_graph_pyomic(clusters=clusters,adata=self.adata,
                                   via_object=self.model, type_data=type_data,
                                gene_exp=gene_exp, title=title, 
                                cmap=cmap, ax_text=ax_text,figsize=figsize,
                                dpi=dpi,headwidth_arrow = headwidth_arrow,
                                alpha_edge=alpha_edge, linewidth_edge=linewidth_edge,
                                edge_color=edge_color,reference=reference,
                                show_legend=show_legend, pie_size_scale=pie_size_scale, fontsize=fontsize)
        return fig, ax, ax1
    def plot_stream(self,clusters:str='',basis:str='',
                   density_grid:float=0.5, arrow_size:float=0.7, arrow_color:str = 'k',
                   arrow_style="-|>",  max_length:int=4, linewidth:float=1,min_mass = 1, cutoff_perc:int = 5,
                   scatter_size:int=500, scatter_alpha:float=0.5,marker_edgewidth:float=0.1,
                   density_stream:int = 2, smooth_transition:int=1, smooth_grid:float=0.5,
                   color_scheme:str = 'annotation', add_outline_clusters:bool=False,
                   cluster_outline_edgewidth = 0.001,gp_color = 'white', bg_color='black' ,
                   dpi=80 , title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None,
                   other_labels:list = None,use_sequentially_augmented:bool=False, cmap_str:str='rainbow')->Tuple[matplotlib.figure.Figure,
                                                                                                          matplotlib.axes._axes.Axes]:
        """Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory
        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            density_grid : float, default = 0.5, density of the grid on which to project the directionality of cells
            arrow_size : float, default = 0.7, size of the arrows in the streamplot
            arrow_color : str, default = 'k', color of the arrows in the streamplot
            arrow_style : str, default = "-|>", style of the arrows in the streamplot
            max_length : int, default = 4, maximum length of the arrows in the streamplot
            linewidth : float, default = 1, width of  lines in streamplot
            min_mass : float, default = 1, minimum mass of the arrows in the streamplot
            cutoff_perc : int, default = 5, cutoff percentage of the arrows in the streamplot
            scatter_size : int, default = 500, size of scatter points
            scatter_alpha : float, default = 0.5, transpsarency of scatter points
            marker_edgewidth : float, default = 0.1, width of outline arround each scatter point
            density_stream : int, default = 2, density of the streamplot
            smooth_transition : int, default = 1, smoothness of the transition between the streamplot and the scatter points
            smooth_grid : float, default = 0.5, smoothness of the grid on which to project the directionality of cells
            color_scheme : str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters)
            add_outline_clusters : bool, default = False, whether to add an outline to the clusters
            cluster_outline_edgewidth : float, default = 0.001, width of the outline around the clusters
            gp_color : str, default = 'white', color of the grid points
            bg_color : str, default = 'black', color of the background
            dpi : int, default = 80, dpi of the figure
            title : str, default = 'Streamplot', title of the figure
            b_bias : int, default = 20, higher value makes the forward bias of pseudotime stronger
            n_neighbors_velocity_grid : int, default = None, number of neighbors to use for the velocity grid
            other_labels : list, default = None, list of other labels to plot in the streamplot
            use_sequentially_augmented : bool, default = False, whether to use the sequentially augmented data
            cmap_str : str, default = 'rainbow', color map to use for the streamplot
        Returns:
            fig : matplotlib figure
            ax : matplotlib axis
        """
        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig,ax = via_streamplot_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                 embedding=embedding,density_grid=density_grid, arrow_size=arrow_size,
                                 arrow_color=arrow_color,arrow_style=arrow_style,  max_length=max_length,
                                 linewidth=linewidth,min_mass = min_mass, cutoff_perc=cutoff_perc,
                                 scatter_size=scatter_size, scatter_alpha=scatter_alpha,marker_edgewidth=marker_edgewidth,
                                 density_stream=density_stream, smooth_transition=smooth_transition, smooth_grid=smooth_grid,
                                 color_scheme=color_scheme, add_outline_clusters=add_outline_clusters,
                                 cluster_outline_edgewidth = cluster_outline_edgewidth,gp_color = gp_color, bg_color=bg_color,
                                 dpi=dpi , title=title, b_bias=b_bias, n_neighbors_velocity_grid=n_neighbors_velocity_grid,
                                 other_labels=other_labels,use_sequentially_augmented=use_sequentially_augmented, cmap_str=cmap_str)
        return fig,ax
    def plot_trajectory_gams(self,clusters:str='',basis:str='',via_fine=None, idx=None,
                         title_str:str= "Pseudotime", draw_all_curves:bool=True, arrow_width_scale_factor:float=15.0,
                         scatter_size:float=50, scatter_alpha:float=0.5,figsize:tuple=(8,4),
                         linewidth:float=1.5, marker_edgewidth:float=1, cmap_pseudotime:str='viridis_r',dpi:int=80,
                         highlight_terminal_states:bool=True, use_maxout_edgelist:bool =False)->Tuple[matplotlib.figure.Figure,
                                                                                                 matplotlib.axes._axes.Axes,
                                                                                                 matplotlib.axes._axes.Axes]:
        """projects the graph based coarse trajectory onto a umap/tsne embedding
        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            via_fine : via object suggest to use via_object only unless you found that running via_fine gave better pathways
            idx : default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too
            title_str : title of figure
            draw_all_curves : if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways
            arrow_width_scale_factor : the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows
            scatter_size : size of the scatter points
            scatter_alpha : transparency of the scatter points
            linewidth : width of the lines
            marker_edgewidth : width of the outline around each scatter point
            cmap_pseudotime : color map to use for the pseudotime
            dpi : dpi of the figure
            highlight_terminal_states :  whether or not to highlight/distinguish the clusters which are detected as the terminal states by via
        Returns:
            fig : matplotlib figure
            ax1 : matplotlib axis
            ax2 : matplotlib axis
        """
        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig,ax1,ax2 = draw_trajectory_gams_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                            via_fine=via_fine, embedding=embedding, idx=idx,
                                            title_str=title_str, draw_all_curves=draw_all_curves, arrow_width_scale_factor=arrow_width_scale_factor,
                                            scatter_size=scatter_size, scatter_alpha=scatter_alpha,figsize=figsize,
                                            linewidth=linewidth, marker_edgewidth=marker_edgewidth, cmap_pseudotime=cmap_pseudotime,dpi=dpi,
                                            highlight_terminal_states=highlight_terminal_states, use_maxout_edgelist=use_maxout_edgelist)
        return fig,ax1,ax2
    def plot_lineage_probability(self,clusters:str='',basis:str='',via_fine=None, 
                                idx=None, figsize:tuple=(8,4),
                                cmap:str='plasma', dpi:int=80, scatter_size =None,
                                marker_lineages:list = [], fontsize:int=12)->Tuple[matplotlib.figure.Figure,
                                                                                   matplotlib.axes._axes.Axes]:
        """G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph
        Arguments:
            clusters : column name of the adata.obs dataframe that contains the cluster labels
            basis : str, default = 'X_umap', which to use for the embedding
            via_fine : usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters
            idx : if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization
            figsize : size of the figure
            cmap : color map to use for the lineage probability
            dpi : dpi of the figure
            scatter_size : size of the scatter points
            marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
            fontsize : fontsize of the title
        Returns:
            fig : matplotlib figure
            axs : matplotlib axis
        """
        if clusters=='':
            clusters=self.clusters
        if basis=='':
            basis=self.basis
        self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
        embedding=self.adata.obsm[basis]
        fig, axs = draw_sc_lineage_probability(via_object=self.model,via_fine=via_fine, embedding=embedding,figsize=figsize,
                                               idx=idx, cmap_name=cmap, dpi=dpi, scatter_size =scatter_size,
                                            marker_lineages = marker_lineages, fontsize=fontsize)
        fig.tight_layout()
        return fig, axs
    def plot_gene_trend(self,gene_list:list=None,figsize:tuple=(8,4),
                        magic_steps:int=3, spline_order:int=5, dpi:int=80,cmap:str='jet', 
                        marker_genes:list = [], linewidth:float = 2.0,
                        n_splines:int=10,  fontsize_:int=12, marker_lineages=[])->Tuple[matplotlib.figure.Figure,
                                                                                        matplotlib.axes._axes.Axes]:
        """plots the gene expression trend along the pseudotime
        Arguments:
            gene_list : list of genes to plot
            figsize : size of the figure
            magic_steps : number of magic steps to use for imputation
            spline_order : order of the spline to use for smoothing
            dpi : dpi of the figure
            cmap : color map to use for the gene expression
            marker_genes : Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp.
            linewidth : width of the lines
            n_splines : number of splines to use for smoothing
            fontsize_ : fontsize of the title
            marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
        Returns:
            fig : matplotlib figure
            axs : matplotlib axis
        """
        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
        fig, axs=get_gene_expression_pyomic(self.model,df_magic,spline_order=spline_order,dpi=dpi,
                                   cmap=cmap, marker_genes=marker_genes, linewidth=linewidth,figsize=figsize,
                                   n_splines=n_splines,  fontsize_=fontsize_, marker_lineages=marker_lineages)
        fig.tight_layout()
        return fig, axs
    def plot_clustergraph(self,gene_list:list,arrow_head:float=0.1,figsize:tuple=(8,4),dpi=80,magic_steps=3,
                          edgeweight_scale:float=1.5, cmap=None, label_=True,)->Tuple[matplotlib.figure.Figure,
                                                                                        matplotlib.axes._axes.Axes]:
        """plot the gene in pie chart for each cluster
        Arguments:
            gene_list : list of genes to plot
            arrow_head : size of the arrow head
            figsize : size of the figure
            edgeweight_scale : scale of the edge weight
            cmap : color map to use for the gene expression
            label_ : whether to label the nodes
        Returns:
            fig : matplotlib figure
            axs : matplotlib axis
        """
        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
        df_magic['parc'] = self.model.labels
        df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
        fig, axs = draw_clustergraph_pyomic(via_object=self.model, type_data='gene', gene_exp=df_magic_cluster, 
                                    gene_list=gene_list, arrow_head=arrow_head,figsize=figsize,
                                    edgeweight_scale=edgeweight_scale, cmap=cmap, label_=label_,dpi=dpi)
        fig.tight_layout()
        return fig,axs
    def plot_gene_trend_heatmap(self,gene_list:list,marker_lineages:list = [], 
                             fontsize:int=8,cmap:str='viridis', normalize:bool=True, ytick_labelrotation:int = 0, 
                             figsize:tuple=(2,4))->Tuple[matplotlib.figure.Figure,
                                                                    list]:
        """Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages
        Arguments:
            gene_list : list of genes to plot
            marker_lineages : list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates
            fontsize : int default = 8
            cmap : str default = 'viridis'
            normalize : bool = True
            ytick_labelrotation : int default = 0
            figsize : size of the figure
        Returns:
            fig : matplotlib figure
            axs : list of matplotlib axis       
        """
        df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=3, gene_list=gene_list)
        df_magic['parc'] = self.model.labels
        df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
        fig,axs=plot_gene_trend_heatmaps_pyomic(via_object=self.model, df_gene_exp=df_magic, 
                                                cmap=cmap,fontsize=fontsize,normalize=normalize,
                                                ytick_labelrotation=ytick_labelrotation,figsize=figsize,
                                                marker_lineages=marker_lineages)
        fig.tight_layout()
        return fig,axs
__init__(adata, adata_key='X_pca', adata_ncomps=80, basis='X_umap', clusters='', dist_std_local=2, jac_std_global=0.15, labels=None, keep_all_local_dist='auto', too_big_factor=0.4, resolution_parameter=1.0, partition_type='ModularityVP', small_pop=10, jac_weighted_edges=True, knn=30, n_iter_leiden=5, random_seed=42, num_threads=-1, distance='l2', time_smallpop=15, super_cluster_labels=False, super_node_degree_list=False, super_terminal_cells=False, x_lazy=0.95, alpha_teleport=0.99, root_user=None, preserve_disconnected=True, dataset='', super_terminal_clusters=[], is_coarse=True, csr_full_graph='', csr_array_locally_pruned='', ig_full_graph='', full_neighbor_array='', full_distance_array='', df_annot=None, preserve_disconnected_after_pruning=False, secondary_annotations=None, pseudotime_threshold_TS=30, cluster_graph_pruning_std=0.15, visual_cluster_graph_pruning=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300, piegraph_arrow_head_width=0.1, piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges=2, via_coarse=None, velocity_matrix=None, gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo=None, CSM=None, edgebundle_pruning_twice=False, pca_loadings=None, time_series=False, time_series_labels=None, knn_sequential=10, knn_sequential_reverse=0, t_diff_step=1, single_cell_transition_matrix=None, embedding_type='via-mds', do_compute_embedding=False, color_dict=None, user_defined_terminal_cell=[], user_defined_terminal_group=[], do_gaussian_kernel_edgeweights=False, RW2_mode=False, working_dir_fp='/home/shobi/Trajectory/Datasets/')
¶
Initialize a pyVIA object for trajectory inference.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| adata | anndata.AnnData | An AnnData object containing the single-cell RNA-seq data | required | 
| adata_key | str | The key of the AnnData in obsm to perform VIA on (default: 'X_pca') | 'X_pca' | 
| adata_ncomps | int | The number of components to use from the AnnData in obsm (default: 80) | 80 | 
| basis | str | The key of the AnnData in obsm to use as the basis for embedding (default: 'X_umap') | 'X_umap' | 
| clusters | str | The clusters to use for the VIA analysis (default: '') | '' | 
| dist_std_local | float | local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention | 2 | 
| jac_std_global | (optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges | 0.15 | |
| labels | np.ndarray | default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations | None | 
| keep_all_local_dist | default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed. | 'auto' | |
| too_big_factor | float | (optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered | 0.4 | 
| resolution_parameter | float | (float) the resolution parameter for the Louvain algorithm. | 1.0 | 
| partition_type | str | (str, default: "ModularityVP") the partitioning algorithm to use. | 'ModularityVP' | 
| small_pop | int | (int, default: 10) the number of cells to be considered in a small population. | 10 | 
| jac_weighted_edges | bool | (bool, default: True) whether to use weighted edges in the PARC clustering step. | True | 
| knn | int | (int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells. | 30 | 
| n_iter_leiden | int | (int) the number of iterations for the Leiden algorithm. | 5 | 
| random_seed | int | (int) the random seed to pass to the clustering algorithm. | 42 | 
| num_threads | (int) the number of threads to use for the clustering algorithm. | -1 | |
| distance | (str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'. | 'l2' | |
| visual_cluster_graph_pruning | float | (float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods. | 0.15 | 
| cluster_graph_pruning_std | float | (float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value. | 0.15 | 
| time_smallpop | (max time to be allowed handling singletons) the maximum time allowed to handle singletons. | 15 | |
| super_cluster_labels | bool | False | |
| super_node_degree_list | bool | False | |
| super_terminal_cells | bool | False | |
| x_lazy | float | (float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable. | 0.95 | 
| alpha_teleport | float | (float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation. | 0.99 | 
| root_user | (list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group. | None | |
| preserve_disconnected | bool | bool (default = True) If you believe there may be disconnected trajectories then set this to False | True | 
| dataset | str | str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default) | '' | 
| embedding | ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates | required | |
| velo_weight | float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity | 0.5 | |
| neighboring_terminal_states_threshold | int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors | 3 | |
| knn_sequential | int | int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1) | 10 | 
| knn_sequential_reverse | int | int (default = 0) number of knn enforced from current to previous time point | 0 | 
| t_diff_step | int | int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained | 1 | 
| is_coarse | bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False | True | |
| via_coarse | VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object | None | |
| df_annot | DataFrame (default None) used for the Mouse Organ data | None | |
| preserve_disconnected_after_pruning | bool | bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis | False | 
| A_velo | ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus] | None | |
| velocity_matrix | matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too. | None | |
| gene_matrix | matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense()) | None | |
| time_series | if the data has time-series labels then set to True | False | |
| time_series_labels | list | list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering | None | 
| pca_loadings | array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs | None | |
| secondary_annotations | list | None (default None) | None | 
| edgebundle_pruning | float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges | None | |
| edgebundle_pruning_twice | bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations | False | |
| piegraph_arrow_head_width | float (default = 0.1) size of arrow heads in via cluster graph | 0.1 | |
| piegraph_edgeweight_scalingfactor | (defaulf = 1.5) scaling factor for edge thickness in via cluster graph | 1.5 | |
| max_visual_outgoing_edges | int | int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node. | 2 | 
| edgebundle_pruning | float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges | None | |
| edgebundle_pruning_twice | bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations | False | |
| pseudotime_threshold_TS | int | int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range | 30 | 
| num_mcmc_simulations | int (default = 1300) number of random walk simulations conducted | 1300 | |
| embedding_type | str | str (default = 'via-mds', other options are 'via-umap' and 'via-force' | 'via-mds' | 
| do_compute_embedding | bool | bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True | False | 
| do_gaussian_kernel_edgeweights | bool | bool (default = False) Type of edgeweighting on the graph edges | False | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def __init__(self,adata:anndata.AnnData,adata_key:str='X_pca',adata_ncomps:int=80,basis:str='X_umap',
             clusters:str='',dist_std_local:float=2, jac_std_global=0.15, labels:np.ndarray=None,
             keep_all_local_dist='auto', too_big_factor:float=0.4, resolution_parameter:float=1.0, partition_type:str="ModularityVP", small_pop:int=10,
             jac_weighted_edges:bool=True, knn:int=30, n_iter_leiden:int=5, random_seed:int=42,
             num_threads=-1, distance='l2', time_smallpop=15,
             super_cluster_labels:bool=False,                 super_node_degree_list:bool=False, super_terminal_cells:bool=False, x_lazy:float=0.95, alpha_teleport:float=0.99,
             root_user=None, preserve_disconnected:bool=True, dataset:str='', super_terminal_clusters:list=[],
             is_coarse=True, csr_full_graph:np.ndarray='', csr_array_locally_pruned='', ig_full_graph='',
             full_neighbor_array='', full_distance_array='',  df_annot=None,
             preserve_disconnected_after_pruning:bool=False,
             secondary_annotations:list=None, pseudotime_threshold_TS:int=30, cluster_graph_pruning_std:float=0.15,
             visual_cluster_graph_pruning:float=0.15, neighboring_terminal_states_threshold=3, num_mcmc_simulations=1300,
             piegraph_arrow_head_width=0.1,
             piegraph_edgeweight_scalingfactor=1.5, max_visual_outgoing_edges:int=2, via_coarse=None, velocity_matrix=None,
             gene_matrix=None, velo_weight=0.5, edgebundle_pruning=None, A_velo = None, CSM = None, edgebundle_pruning_twice=False, pca_loadings = None, time_series=False,
             time_series_labels:list=None, knn_sequential:int = 10, knn_sequential_reverse:int = 0,t_diff_step:int = 1,single_cell_transition_matrix = None,
             embedding_type:str='via-mds',do_compute_embedding:bool=False, color_dict:dict=None,user_defined_terminal_cell:list=[], user_defined_terminal_group:list=[],
             do_gaussian_kernel_edgeweights:bool=False,RW2_mode:bool=False,working_dir_fp:str ='/home/shobi/Trajectory/Datasets/') -> None:
    r"""Initialize a pyVIA object for trajectory inference.
    Arguments:
        adata: An AnnData object containing the single-cell RNA-seq data
        adata_key (str): The key of the AnnData in obsm to perform VIA on (default: 'X_pca')
        adata_ncomps (int): The number of components to use from the AnnData in obsm (default: 80)
        basis (str): The key of the AnnData in obsm to use as the basis for embedding (default: 'X_umap')
        clusters (str): The clusters to use for the VIA analysis (default: '')
        dist_std_local: local level of pruning for PARC graph clustering stage. Range (0.1,3) higher numbers mean more edge retention
        jac_std_global: (optional, default = 0.15, can also set as 'median') global level graph pruning for PARC clustering stage. Number of standard deviations below the network’s mean-jaccard-weighted edges. 0.1-1 provide reasonable pruning.higher value means less pruning (more edges retained). e.g. a value of 0.15 means all edges that are above mean(edgeweight)-0.15*std(edge-weights) are retained. We find both 0.15 and ‘median’ to yield good results/starting point and resulting in pruning away ~ 50-60% edges
        labels: default is None. and PARC clusters are used for the viagraph. alternatively provide a list of clustermemberships that are integer values (not strings) to construct the viagraph using another clustering method or available annotations
        keep_all_local_dist: default value of 'auto' means that for smaller datasets local-pruning is done prior to clustering, but for large datasets local pruning is set to False for speed.
        too_big_factor: (optional, default=0.4). Forces clusters > 0.4*n_cells to be re-clustered
        resolution_parameter: (float) the resolution parameter for the Louvain algorithm.
        partition_type: (str, default: "ModularityVP") the partitioning algorithm to use.
        small_pop: (int, default: 10) the number of cells to be considered in a small population.
        jac_weighted_edges: (bool, default: True) whether to use weighted edges in the PARC clustering step.
        knn: (int, optional, default: 30) the number of K-Nearest Neighbors for HNSWlib KNN graph. Larger knn means more graph connectivity. Lower knn means more loosely connected clusters/cells.
        n_iter_leiden: (int) the number of iterations for the Leiden algorithm.
        random_seed: (int) the random seed to pass to the clustering algorithm.
        num_threads: (int) the number of threads to use for the clustering algorithm.
        distance: (str, default: 'l2') the distance metric to use for graph construction and similarity. Options are 'l2', 'ip', and 'cosine'.
        visual_cluster_graph_pruning: (float, default: 0.15) the pruning level for the cluster graph. This only comes into play if the user deliberately chooses not to use the default edge-bundling method of visualizating edges (draw_piechart_graph()) and instead calls draw_piechart_graph_nobundle(). It controls the number of edges plotted for visual effect. This does not impact computation of terminal states, pseudotime, or lineage likelihoods.
        cluster_graph_pruning_std: (float, default: 0.15) the pruning level of the cluster graph. Often set to the same value as the PARC clustering level of jac_std_global. Reasonable range is [0.1, 1]. To retain more connectivity in the clustergraph underlying the trajectory computations, increase the value.
        time_smallpop: (max time to be allowed handling singletons) the maximum time allowed to handle singletons.
        super_cluster_labels:
        super_node_degree_list:
        super_terminal_cells:
        x_lazy: (float, default: 0.95) 1-x = probability of staying in the same node (lazy). Values between 0.9-0.99 are reasonable.
        alpha_teleport: (float, default: 0.99) 1-alpha is probability of jumping. Values between 0.95-0.99 are reasonable unless prior knowledge of teleportation.
        root_user: (list, None) the root user list. Can be a list of strings, a list of int, or None. The default is None. When the root_user is set as None and an RNA velocity matrix is available, a root will be automatically computed. If the root_user is None and no velocity matrix is provided, then an arbitrary root is selected. If the root_user is ['celltype_earlystage'] where the str corresponds to an item in true_label, then a suitable starting point will be selected corresponding to this group.
        preserve_disconnected: bool (default = True) If you believe there may be disconnected trajectories then set this to False
        dataset: str Can be set to 'group' or '' (default). this refers to the type of root label (group level root or single cell index) you are going to provide. if your true_label has a sensible group of cells for a root then you can set dataset to 'group' and make the root parameter ['labelname_root_cell_type'] if your root corresponds to one particular cell then set dataset = '' (default)
        embedding: ndarray (optional, default = None) embedding (e.g. precomputed tsne, umap, phate, via-umap) for plotting data. Size n_cells x 2 If an embedding is provided when running VIA, then a scatterplot colored by pseudotime, highlighting terminal fates
        velo_weight: float (optional, default = 0.5) #float between [0,1]. the weight assigned to directionality and connectivity derived from scRNA-velocity
        neighboring_terminal_states_threshold:int (default = 3). Candidates for terminal states that are neighbors of each other may be removed from the list if they have this number of more of terminal states as neighbors
        knn_sequential:int (default =10) number of knn in the adjacent time-point for time-series data (t_i and t_i+1)
        knn_sequential_reverse: int (default = 0) number of knn enforced from current to previous time point
        t_diff_step: int (default =1) Number of permitted temporal intervals between connected nodes. If time data is labeled as [0,25,50,75,100,..] then t_diff_step=1 corresponds to '25' and only edges within t_diff_steps are retained
        is_coarse:bool (default = True) If running VIA in two iterations where you wish to link the second fine-grained iteration with the initial iteration, then you set to False
        via_coarse: VIA (default = None) If instantiating a second iteration of VIA that needs to be linked to a previous iteration (e.g. via0), then set via_coarse to the previous via0 object
        df_annot: DataFrame (default None) used for the Mouse Organ data
        preserve_disconnected_after_pruning:bool (default = True) If you believe there are disconnected trajectories then set this to False and test your hypothesis
        A_velo: ndarray Cluster Graph Transition matrix based on rna velocity [n_clus x n_clus]
        velocity_matrix: matrix (default None) matrix of size [n_samples x n_genes]. this is the velocity matrix computed by scVelo (or similar package) and stored in adata.layers['velocity']. The genes used for computing velocity should correspond to those useing in gene_matrix Requires gene_matrix to be provided too.
        gene_matrix: matrix (default None) Only used if Velocity_matrix is available. matrix of size [n_samples x n_genes]. We recommend using a subset like HVGs rather than full set of genes. (need to densify input if taking from adata = adata.X.todense())
        time_series: if the data has time-series labels then set to True
        time_series_labels:list (default None) list of integer values of temporal annotations corresponding to e.g. hours (post fert), days, or sequential ordering
        pca_loadings: array (default None) the loadings of the pcs used to project the cells (to projected euclidean location based on velocity). n_cells x n_pcs
        secondary_annotations: None (default None)
        edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
        edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
        piegraph_arrow_head_width: float (default = 0.1) size of arrow heads in via cluster graph
        piegraph_edgeweight_scalingfactor: (defaulf = 1.5) scaling factor for edge thickness in via cluster graph
        max_visual_outgoing_edges: int (default =2) Rarely comes into play. Only used if the user later chooses to plot the via-graph without edgebunding using draw_piechart_graph_nobundle() Only allows max_visual_outgoing_edges to come out of any given node.
        edgebundle_pruning:float (default=None) will by default be set to the same as the cluster_graph_pruning_std and influences the visualized level of pruning of edges. Typical values can be between [0,1] with higher numbers retaining more edges
        edgebundle_pruning_twice:bool default: False. When True, the edgebundling is applied to a further visually pruned (visual_cluster_graph_pruning) and can sometimes simplify the visualization. it does not impact the pseudotime and lineage computations
        pseudotime_threshold_TS: int (default = 30) corresponds to the criteria for a state to be considered a candidate terminal cell fate to be 30% or later of the computed pseudotime range
        num_mcmc_simulations:int (default = 1300) number of random walk simulations conducted
        embedding_type: str (default = 'via-mds', other options are 'via-umap' and 'via-force'
        do_compute_embedding: bool (default = False) If you want an embedding (n_samples x2) to be computed on the basis of the via sc graph then set this to True
        do_gaussian_kernel_edgeweights: bool (default = False) Type of edgeweighting on the graph edges
    """
    self.adata = adata
    #self.adata_key = adata_key
    data = adata.obsm[adata_key][:, 0:adata_ncomps]
    embedding=self.adata.obsm[basis]
    true_label=adata.obs[clusters]
    self.clusters=clusters
    self.basis=basis
    if root_user is not None:
         dataset='group'
    self.model=VIA(data=data,true_label=true_label,
             dist_std_local=dist_std_local,jac_std_global=jac_std_global,labels=labels,
             keep_all_local_dist=keep_all_local_dist,too_big_factor=too_big_factor,resolution_parameter=resolution_parameter,partition_type=partition_type,small_pop=small_pop,
             jac_weighted_edges=jac_weighted_edges,knn=knn,n_iter_leiden=n_iter_leiden,random_seed=random_seed,
             num_threads=num_threads,distance=distance,time_smallpop=time_smallpop,
             super_cluster_labels=super_cluster_labels,super_node_degree_list=super_node_degree_list,super_terminal_cells=super_terminal_cells,x_lazy=x_lazy,alpha_teleport=alpha_teleport,
             root_user=root_user,preserve_disconnected=preserve_disconnected,dataset=dataset,super_terminal_clusters=super_terminal_clusters,
             is_coarse=is_coarse,csr_full_graph=csr_full_graph,csr_array_locally_pruned=csr_array_locally_pruned,ig_full_graph=ig_full_graph,
             full_neighbor_array=full_neighbor_array,full_distance_array=full_distance_array,embedding=embedding,df_annot=df_annot,
             preserve_disconnected_after_pruning=preserve_disconnected_after_pruning,
             secondary_annotations=secondary_annotations,pseudotime_threshold_TS=pseudotime_threshold_TS,cluster_graph_pruning_std=cluster_graph_pruning_std,
             visual_cluster_graph_pruning=visual_cluster_graph_pruning,neighboring_terminal_states_threshold=neighboring_terminal_states_threshold,num_mcmc_simulations=num_mcmc_simulations,
             piegraph_arrow_head_width=piegraph_arrow_head_width,
             piegraph_edgeweight_scalingfactor=piegraph_edgeweight_scalingfactor,max_visual_outgoing_edges=max_visual_outgoing_edges,via_coarse=via_coarse,velocity_matrix=velocity_matrix,
             gene_matrix=gene_matrix,velo_weight=velo_weight,edgebundle_pruning=edgebundle_pruning,A_velo=A_velo,CSM=CSM,edgebundle_pruning_twice=edgebundle_pruning_twice,pca_loadings=pca_loadings,time_series=time_series,
             time_series_labels=time_series_labels,knn_sequential=knn_sequential,knn_sequential_reverse=knn_sequential_reverse,t_diff_step=t_diff_step,single_cell_transition_matrix=single_cell_transition_matrix,
             embedding_type=embedding_type,do_compute_embedding=do_compute_embedding,color_dict=color_dict,user_defined_terminal_cell=user_defined_terminal_cell,user_defined_terminal_group=user_defined_terminal_group,
             do_gaussian_kernel_edgeweights=do_gaussian_kernel_edgeweights,RW2_mode=RW2_mode,working_dir_fp=working_dir_fp
             )
run()
¶
Calculate the VIA graph and pseudotime.
This method runs the main VIA algorithm to construct the trajectory graph and compute pseudotime for each cell.
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def run(self):
    r"""Calculate the VIA graph and pseudotime.
    This method runs the main VIA algorithm to construct the trajectory graph
    and compute pseudotime for each cell.
    """
    self.model.run_VIA()
    add_reference(self.adata,'VIA','trajectory inference with VIA')
get_piechart_dict(label=0, clusters='')
¶
Get cluster composition dictionary for pie chart visualization.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| label | int | Cluster label of pie chart (default: 0) | 0 | 
| clusters | str | The celltype column name of interest (default: '') | '' | 
Returns:
| Name | Type | Description | 
|---|---|---|
| dict | dict | Cluster composition dictionary for visualization | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def get_piechart_dict(self,label:int=0,clusters:str='')->dict:
    r"""Get cluster composition dictionary for pie chart visualization.
    Arguments:
        label (int): Cluster label of pie chart (default: 0)
        clusters (str): The celltype column name of interest (default: '')
    Returns:
        dict: Cluster composition dictionary for visualization
    """
    if clusters=='':
        clusters=self.clusters
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    cluster_i_loc=np.where(np.asarray(self.model.labels) == label)[0]
    res_dict=dict(self.adata.obs.iloc[cluster_i_loc].value_counts(clusters))
    return res_dict
get_pseudotime(adata=None)
¶
Extract the pseudotime values computed by VIA.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| adata | An AnnData object to add pseudotime to (default: None) If None, pseudotime will be added to self.adata.obs['pt_via'] | None | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def get_pseudotime(self,adata=None):
    r"""Extract the pseudotime values computed by VIA.
    Arguments:
        adata: An AnnData object to add pseudotime to (default: None)
              If None, pseudotime will be added to self.adata.obs['pt_via']
    """
    print('...the pseudotime of VIA added to AnnData obs named `pt_via`')
    if adata is None:
        self.adata.obs['pt_via']=self.model.single_cell_pt_markov
    else:
        adata.obs['pt_via']=self.model.single_cell_pt_markov
plot_piechart_graph(clusters='', type_data='pt', gene_exp=[], title='', cmap=None, ax_text=True, figsize=(8, 4), dpi=150, headwidth_arrow=0.1, alpha_edge=0.4, linewidth_edge=2, edge_color='darkblue', reference=None, show_legend=True, pie_size_scale=0.8, fontsize=8)
¶
Plot two subplots with clustergraph representation showing cluster composition and pseudotime/gene expression.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| clusters | str | Column name of the adata.obs dataframe containing cluster labels (default: '') | '' | 
| type_data | str | Type of data for coloring nodes - 'pt' for pseudotime or 'gene' (default: 'pt') | 'pt' | 
| gene_exp | list | List of values for gene expression to color nodes at cluster level (default: []) | [] | 
| title | str | Title for the plot (default: '') | '' | 
| cmap | str | Colormap - automatically chooses coolwarm for gene or viridis_r for pseudotime (default: None) | None | 
| ax_text | bool | Whether to annotate nodes with cluster number and population (default: True) | True | 
| figsize | tuple | Figure size (default: (8,4)) | (8, 4) | 
| dpi | int | DPI for the figure (default: 150) | 150 | 
| headwidth_arrow | float | Width of arrowhead for directed edges (default: 0.1) | 0.1 | 
| alpha_edge | float | Transparency of edges (default: 0.4) | 0.4 | 
| linewidth_edge | float | Width of edges (default: 2) | 2 | 
| edge_color | str | Color of edges (default: 'darkblue') | 'darkblue' | 
| reference | List of categorical labels for cluster composition (default: None) | None | |
| show_legend | bool | Whether to show legend (default: True) | True | 
| pie_size_scale | float | Scaling factor of the piechart nodes (default: 0.8) | 0.8 | 
| fontsize | float | Font size of text in piecharts (default: 8) | 8 | 
Returns:
| Name | Type | Description | 
|---|---|---|
| tuple | Tuple[matplotlib.figure.Figure, matplotlib.axes._axes.Axes, matplotlib.axes._axes.Axes] | (fig, ax, ax1) where fig is matplotlib figure, ax is left axis with cluster composition, ax1 is right axis with pseudotime/gene expression | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_piechart_graph(self,clusters:str='', type_data='pt',
                            gene_exp:list=[], title='', 
                            cmap:str=None, ax_text=True, figsize:tuple=(8,4),
                            dpi=150,headwidth_arrow = 0.1, 
                            alpha_edge=0.4, linewidth_edge=2, 
                            edge_color='darkblue',reference=None, 
                            show_legend:bool=True, pie_size_scale:float=0.8, fontsize:float=8)->Tuple[matplotlib.figure.Figure,
                                                                                                      matplotlib.axes._axes.Axes,
                                                                                                      matplotlib.axes._axes.Axes]:
    r"""Plot two subplots with clustergraph representation showing cluster composition and pseudotime/gene expression.
    Arguments:
        clusters (str): Column name of the adata.obs dataframe containing cluster labels (default: '')
        type_data (str): Type of data for coloring nodes - 'pt' for pseudotime or 'gene' (default: 'pt')
        gene_exp (list): List of values for gene expression to color nodes at cluster level (default: [])
        title (str): Title for the plot (default: '')
        cmap (str): Colormap - automatically chooses coolwarm for gene or viridis_r for pseudotime (default: None)
        ax_text (bool): Whether to annotate nodes with cluster number and population (default: True)
        figsize (tuple): Figure size (default: (8,4))
        dpi (int): DPI for the figure (default: 150)
        headwidth_arrow (float): Width of arrowhead for directed edges (default: 0.1)
        alpha_edge (float): Transparency of edges (default: 0.4)
        linewidth_edge (float): Width of edges (default: 2)
        edge_color (str): Color of edges (default: 'darkblue')
        reference: List of categorical labels for cluster composition (default: None)
        show_legend (bool): Whether to show legend (default: True)
        pie_size_scale (float): Scaling factor of the piechart nodes (default: 0.8)
        fontsize (float): Font size of text in piecharts (default: 8)
    Returns:
        tuple: (fig, ax, ax1) where fig is matplotlib figure, ax is left axis with cluster composition, ax1 is right axis with pseudotime/gene expression
    """
    if clusters=='':
        clusters=self.clusters
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    fig, ax, ax1 = draw_piechart_graph_pyomic(clusters=clusters,adata=self.adata,
                               via_object=self.model, type_data=type_data,
                            gene_exp=gene_exp, title=title, 
                            cmap=cmap, ax_text=ax_text,figsize=figsize,
                            dpi=dpi,headwidth_arrow = headwidth_arrow,
                            alpha_edge=alpha_edge, linewidth_edge=linewidth_edge,
                            edge_color=edge_color,reference=reference,
                            show_legend=show_legend, pie_size_scale=pie_size_scale, fontsize=fontsize)
    return fig, ax, ax1
plot_stream(clusters='', basis='', density_grid=0.5, arrow_size=0.7, arrow_color='k', arrow_style='-|>', max_length=4, linewidth=1, min_mass=1, cutoff_perc=5, scatter_size=500, scatter_alpha=0.5, marker_edgewidth=0.1, density_stream=2, smooth_transition=1, smooth_grid=0.5, color_scheme='annotation', add_outline_clusters=False, cluster_outline_edgewidth=0.001, gp_color='white', bg_color='black', dpi=80, title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None, other_labels=None, use_sequentially_augmented=False, cmap_str='rainbow')
¶
Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| clusters | column name of the adata.obs dataframe that contains the cluster labels | '' | |
| basis | str, default = 'X_umap', which to use for the embedding | '' | |
| density_grid | float, default = 0.5, density of the grid on which to project the directionality of cells | 0.5 | |
| arrow_size | float, default = 0.7, size of the arrows in the streamplot | 0.7 | |
| arrow_color | str, default = 'k', color of the arrows in the streamplot | 'k' | |
| arrow_style | str, default = "-|>", style of the arrows in the streamplot | '-|>' | |
| max_length | int, default = 4, maximum length of the arrows in the streamplot | 4 | |
| linewidth | float, default = 1, width of lines in streamplot | 1 | |
| min_mass | float, default = 1, minimum mass of the arrows in the streamplot | 1 | |
| cutoff_perc | int, default = 5, cutoff percentage of the arrows in the streamplot | 5 | |
| scatter_size | int, default = 500, size of scatter points | 500 | |
| scatter_alpha | float, default = 0.5, transpsarency of scatter points | 0.5 | |
| marker_edgewidth | float, default = 0.1, width of outline arround each scatter point | 0.1 | |
| density_stream | int, default = 2, density of the streamplot | 2 | |
| smooth_transition | int, default = 1, smoothness of the transition between the streamplot and the scatter points | 1 | |
| smooth_grid | float, default = 0.5, smoothness of the grid on which to project the directionality of cells | 0.5 | |
| color_scheme | str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters) | 'annotation' | |
| add_outline_clusters | bool, default = False, whether to add an outline to the clusters | False | |
| cluster_outline_edgewidth | float, default = 0.001, width of the outline around the clusters | 0.001 | |
| gp_color | str, default = 'white', color of the grid points | 'white' | |
| bg_color | str, default = 'black', color of the background | 'black' | |
| dpi | int, default = 80, dpi of the figure | 80 | |
| title | str, default = 'Streamplot', title of the figure | 'Streamplot' | |
| b_bias | int, default = 20, higher value makes the forward bias of pseudotime stronger | 20 | |
| n_neighbors_velocity_grid | int, default = None, number of neighbors to use for the velocity grid | None | |
| other_labels | list, default = None, list of other labels to plot in the streamplot | None | |
| use_sequentially_augmented | bool, default = False, whether to use the sequentially augmented data | False | |
| cmap_str | str, default = 'rainbow', color map to use for the streamplot | 'rainbow' | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| ax | matplotlib.axes._axes.Axes | matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_stream(self,clusters:str='',basis:str='',
               density_grid:float=0.5, arrow_size:float=0.7, arrow_color:str = 'k',
               arrow_style="-|>",  max_length:int=4, linewidth:float=1,min_mass = 1, cutoff_perc:int = 5,
               scatter_size:int=500, scatter_alpha:float=0.5,marker_edgewidth:float=0.1,
               density_stream:int = 2, smooth_transition:int=1, smooth_grid:float=0.5,
               color_scheme:str = 'annotation', add_outline_clusters:bool=False,
               cluster_outline_edgewidth = 0.001,gp_color = 'white', bg_color='black' ,
               dpi=80 , title='Streamplot', b_bias=20, n_neighbors_velocity_grid=None,
               other_labels:list = None,use_sequentially_augmented:bool=False, cmap_str:str='rainbow')->Tuple[matplotlib.figure.Figure,
                                                                                                      matplotlib.axes._axes.Axes]:
    """Construct vector streamplot on the embedding to show a fine-grained view of inferred directions in the trajectory
    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        density_grid : float, default = 0.5, density of the grid on which to project the directionality of cells
        arrow_size : float, default = 0.7, size of the arrows in the streamplot
        arrow_color : str, default = 'k', color of the arrows in the streamplot
        arrow_style : str, default = "-|>", style of the arrows in the streamplot
        max_length : int, default = 4, maximum length of the arrows in the streamplot
        linewidth : float, default = 1, width of  lines in streamplot
        min_mass : float, default = 1, minimum mass of the arrows in the streamplot
        cutoff_perc : int, default = 5, cutoff percentage of the arrows in the streamplot
        scatter_size : int, default = 500, size of scatter points
        scatter_alpha : float, default = 0.5, transpsarency of scatter points
        marker_edgewidth : float, default = 0.1, width of outline arround each scatter point
        density_stream : int, default = 2, density of the streamplot
        smooth_transition : int, default = 1, smoothness of the transition between the streamplot and the scatter points
        smooth_grid : float, default = 0.5, smoothness of the grid on which to project the directionality of cells
        color_scheme : str, default = 'annotation' corresponds to self.true_labels. Other options are 'time' (uses single-cell pseudotime) or 'clusters' (uses self.clusters)
        add_outline_clusters : bool, default = False, whether to add an outline to the clusters
        cluster_outline_edgewidth : float, default = 0.001, width of the outline around the clusters
        gp_color : str, default = 'white', color of the grid points
        bg_color : str, default = 'black', color of the background
        dpi : int, default = 80, dpi of the figure
        title : str, default = 'Streamplot', title of the figure
        b_bias : int, default = 20, higher value makes the forward bias of pseudotime stronger
        n_neighbors_velocity_grid : int, default = None, number of neighbors to use for the velocity grid
        other_labels : list, default = None, list of other labels to plot in the streamplot
        use_sequentially_augmented : bool, default = False, whether to use the sequentially augmented data
        cmap_str : str, default = 'rainbow', color map to use for the streamplot
    Returns:
        fig : matplotlib figure
        ax : matplotlib axis
    """
    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig,ax = via_streamplot_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                             embedding=embedding,density_grid=density_grid, arrow_size=arrow_size,
                             arrow_color=arrow_color,arrow_style=arrow_style,  max_length=max_length,
                             linewidth=linewidth,min_mass = min_mass, cutoff_perc=cutoff_perc,
                             scatter_size=scatter_size, scatter_alpha=scatter_alpha,marker_edgewidth=marker_edgewidth,
                             density_stream=density_stream, smooth_transition=smooth_transition, smooth_grid=smooth_grid,
                             color_scheme=color_scheme, add_outline_clusters=add_outline_clusters,
                             cluster_outline_edgewidth = cluster_outline_edgewidth,gp_color = gp_color, bg_color=bg_color,
                             dpi=dpi , title=title, b_bias=b_bias, n_neighbors_velocity_grid=n_neighbors_velocity_grid,
                             other_labels=other_labels,use_sequentially_augmented=use_sequentially_augmented, cmap_str=cmap_str)
    return fig,ax
plot_trajectory_gams(clusters='', basis='', via_fine=None, idx=None, title_str='Pseudotime', draw_all_curves=True, arrow_width_scale_factor=15.0, scatter_size=50, scatter_alpha=0.5, figsize=(8, 4), linewidth=1.5, marker_edgewidth=1, cmap_pseudotime='viridis_r', dpi=80, highlight_terminal_states=True, use_maxout_edgelist=False)
¶
projects the graph based coarse trajectory onto a umap/tsne embedding
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| clusters | column name of the adata.obs dataframe that contains the cluster labels | '' | |
| basis | str, default = 'X_umap', which to use for the embedding | '' | |
| via_fine | via object suggest to use via_object only unless you found that running via_fine gave better pathways | None | |
| idx | default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too | None | |
| title_str | title of figure | 'Pseudotime' | |
| draw_all_curves | if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways | True | |
| arrow_width_scale_factor | the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows | 15.0 | |
| scatter_size | size of the scatter points | 50 | |
| scatter_alpha | transparency of the scatter points | 0.5 | |
| linewidth | width of the lines | 1.5 | |
| marker_edgewidth | width of the outline around each scatter point | 1 | |
| cmap_pseudotime | color map to use for the pseudotime | 'viridis_r' | |
| dpi | dpi of the figure | 80 | |
| highlight_terminal_states | whether or not to highlight/distinguish the clusters which are detected as the terminal states by via | True | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| ax1 | matplotlib.axes._axes.Axes | matplotlib axis | 
| ax2 | matplotlib.axes._axes.Axes | matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_trajectory_gams(self,clusters:str='',basis:str='',via_fine=None, idx=None,
                     title_str:str= "Pseudotime", draw_all_curves:bool=True, arrow_width_scale_factor:float=15.0,
                     scatter_size:float=50, scatter_alpha:float=0.5,figsize:tuple=(8,4),
                     linewidth:float=1.5, marker_edgewidth:float=1, cmap_pseudotime:str='viridis_r',dpi:int=80,
                     highlight_terminal_states:bool=True, use_maxout_edgelist:bool =False)->Tuple[matplotlib.figure.Figure,
                                                                                             matplotlib.axes._axes.Axes,
                                                                                             matplotlib.axes._axes.Axes]:
    """projects the graph based coarse trajectory onto a umap/tsne embedding
    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        via_fine : via object suggest to use via_object only unless you found that running via_fine gave better pathways
        idx : default: None. Or List. if you had previously computed a umap/tsne (embedding) only on a subset of the total n_samples (subsampled as per idx), then the via objects and results will be indexed according to idx too
        title_str : title of figure
        draw_all_curves : if the clustergraph has too many edges to project in a visually interpretable way, set this to False to get a simplified view of the graph pathways
        arrow_width_scale_factor : the width of the arrows is proportional to the edge weight. This factor scales the width of the arrows
        scatter_size : size of the scatter points
        scatter_alpha : transparency of the scatter points
        linewidth : width of the lines
        marker_edgewidth : width of the outline around each scatter point
        cmap_pseudotime : color map to use for the pseudotime
        dpi : dpi of the figure
        highlight_terminal_states :  whether or not to highlight/distinguish the clusters which are detected as the terminal states by via
    Returns:
        fig : matplotlib figure
        ax1 : matplotlib axis
        ax2 : matplotlib axis
    """
    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig,ax1,ax2 = draw_trajectory_gams_pyomic(adata=self.adata,clusters=clusters,via_object=self.model, 
                                        via_fine=via_fine, embedding=embedding, idx=idx,
                                        title_str=title_str, draw_all_curves=draw_all_curves, arrow_width_scale_factor=arrow_width_scale_factor,
                                        scatter_size=scatter_size, scatter_alpha=scatter_alpha,figsize=figsize,
                                        linewidth=linewidth, marker_edgewidth=marker_edgewidth, cmap_pseudotime=cmap_pseudotime,dpi=dpi,
                                        highlight_terminal_states=highlight_terminal_states, use_maxout_edgelist=use_maxout_edgelist)
    return fig,ax1,ax2
plot_lineage_probability(clusters='', basis='', via_fine=None, idx=None, figsize=(8, 4), cmap='plasma', dpi=80, scatter_size=None, marker_lineages=[], fontsize=12)
¶
G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| clusters | column name of the adata.obs dataframe that contains the cluster labels | '' | |
| basis | str, default = 'X_umap', which to use for the embedding | '' | |
| via_fine | usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters | None | |
| idx | if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization | None | |
| figsize | size of the figure | (8, 4) | |
| cmap | color map to use for the lineage probability | 'plasma' | |
| dpi | dpi of the figure | 80 | |
| scatter_size | size of the scatter points | None | |
| marker_lineages | Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number). | [] | |
| fontsize | fontsize of the title | 12 | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| axs | matplotlib.axes._axes.Axes | matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_lineage_probability(self,clusters:str='',basis:str='',via_fine=None, 
                            idx=None, figsize:tuple=(8,4),
                            cmap:str='plasma', dpi:int=80, scatter_size =None,
                            marker_lineages:list = [], fontsize:int=12)->Tuple[matplotlib.figure.Figure,
                                                                               matplotlib.axes._axes.Axes]:
    """G is the igraph knn (low K) used for shortest path in high dim space. no idx needed as it's made on full sample, knn_hnsw is the knn made in the embedded space used for query to find the nearest point in the downsampled embedding that corresponds to the single cells in the full graph
    Arguments:
        clusters : column name of the adata.obs dataframe that contains the cluster labels
        basis : str, default = 'X_umap', which to use for the embedding
        via_fine : usually just set to same as via_coarse unless you ran a refined run and want to link it to initial via_coarse's terminal clusters
        idx : if one uses a downsampled embedding of the original data, then idx is the selected indices of the downsampled samples used in the visualization
        figsize : size of the figure
        cmap : color map to use for the lineage probability
        dpi : dpi of the figure
        scatter_size : size of the scatter points
        marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
        fontsize : fontsize of the title
    Returns:
        fig : matplotlib figure
        axs : matplotlib axis
    """
    if clusters=='':
        clusters=self.clusters
    if basis=='':
        basis=self.basis
    self.adata.obs[clusters]=self.adata.obs[clusters].astype('category')
    embedding=self.adata.obsm[basis]
    fig, axs = draw_sc_lineage_probability(via_object=self.model,via_fine=via_fine, embedding=embedding,figsize=figsize,
                                           idx=idx, cmap_name=cmap, dpi=dpi, scatter_size =scatter_size,
                                        marker_lineages = marker_lineages, fontsize=fontsize)
    fig.tight_layout()
    return fig, axs
plot_gene_trend(gene_list=None, figsize=(8, 4), magic_steps=3, spline_order=5, dpi=80, cmap='jet', marker_genes=[], linewidth=2.0, n_splines=10, fontsize_=12, marker_lineages=[])
¶
plots the gene expression trend along the pseudotime
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| gene_list | list of genes to plot | None | |
| figsize | size of the figure | (8, 4) | |
| magic_steps | number of magic steps to use for imputation | 3 | |
| spline_order | order of the spline to use for smoothing | 5 | |
| dpi | dpi of the figure | 80 | |
| cmap | color map to use for the gene expression | 'jet' | |
| marker_genes | Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp. | [] | |
| linewidth | width of the lines | 2.0 | |
| n_splines | number of splines to use for smoothing | 10 | |
| fontsize_ | fontsize of the title | 12 | |
| marker_lineages | Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number). | [] | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| axs | matplotlib.axes._axes.Axes | matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_gene_trend(self,gene_list:list=None,figsize:tuple=(8,4),
                    magic_steps:int=3, spline_order:int=5, dpi:int=80,cmap:str='jet', 
                    marker_genes:list = [], linewidth:float = 2.0,
                    n_splines:int=10,  fontsize_:int=12, marker_lineages=[])->Tuple[matplotlib.figure.Figure,
                                                                                    matplotlib.axes._axes.Axes]:
    """plots the gene expression trend along the pseudotime
    Arguments:
        gene_list : list of genes to plot
        figsize : size of the figure
        magic_steps : number of magic steps to use for imputation
        spline_order : order of the spline to use for smoothing
        dpi : dpi of the figure
        cmap : color map to use for the gene expression
        marker_genes : Default is to use all genes in gene_exp. other provide a list of marker genes that will be used from gene_exp.
        linewidth : width of the lines
        n_splines : number of splines to use for smoothing
        fontsize_ : fontsize of the title
        marker_lineages : Default is to use all lineage pathways. other provide a list of lineage number (terminal cluster number).
    Returns:
        fig : matplotlib figure
        axs : matplotlib axis
    """
    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
    fig, axs=get_gene_expression_pyomic(self.model,df_magic,spline_order=spline_order,dpi=dpi,
                               cmap=cmap, marker_genes=marker_genes, linewidth=linewidth,figsize=figsize,
                               n_splines=n_splines,  fontsize_=fontsize_, marker_lineages=marker_lineages)
    fig.tight_layout()
    return fig, axs
plot_gene_trend_heatmap(gene_list, marker_lineages=[], fontsize=8, cmap='viridis', normalize=True, ytick_labelrotation=0, figsize=(2, 4))
¶
Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| gene_list | list of genes to plot | required | |
| marker_lineages | list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates | [] | |
| fontsize | int default = 8 | 8 | |
| cmap | str default = 'viridis' | 'viridis' | |
| normalize | bool = True | True | |
| ytick_labelrotation | int default = 0 | 0 | |
| figsize | size of the figure | (2, 4) | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| axs | list | list of matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_gene_trend_heatmap(self,gene_list:list,marker_lineages:list = [], 
                         fontsize:int=8,cmap:str='viridis', normalize:bool=True, ytick_labelrotation:int = 0, 
                         figsize:tuple=(2,4))->Tuple[matplotlib.figure.Figure,
                                                                list]:
    """Plot the gene trends on heatmap: a heatmap is generated for each lineage (identified by terminal cluster number). Default selects all lineages
    Arguments:
        gene_list : list of genes to plot
        marker_lineages : list default = None and plots all detected all lineages. Optionally provide a list of integers corresponding to the cluster number of terminal cell fates
        fontsize : int default = 8
        cmap : str default = 'viridis'
        normalize : bool = True
        ytick_labelrotation : int default = 0
        figsize : size of the figure
    Returns:
        fig : matplotlib figure
        axs : list of matplotlib axis       
    """
    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=3, gene_list=gene_list)
    df_magic['parc'] = self.model.labels
    df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
    fig,axs=plot_gene_trend_heatmaps_pyomic(via_object=self.model, df_gene_exp=df_magic, 
                                            cmap=cmap,fontsize=fontsize,normalize=normalize,
                                            ytick_labelrotation=ytick_labelrotation,figsize=figsize,
                                            marker_lineages=marker_lineages)
    fig.tight_layout()
    return fig,axs
plot_clustergraph(gene_list, arrow_head=0.1, figsize=(8, 4), dpi=80, magic_steps=3, edgeweight_scale=1.5, cmap=None, label_=True)
¶
plot the gene in pie chart for each cluster
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| gene_list | list of genes to plot | required | |
| arrow_head | size of the arrow head | 0.1 | |
| figsize | size of the figure | (8, 4) | |
| edgeweight_scale | scale of the edge weight | 1.5 | |
| cmap | color map to use for the gene expression | None | |
| label_ | whether to label the nodes | True | 
Returns:
| Name | Type | Description | 
|---|---|---|
| fig | matplotlib.figure.Figure | matplotlib figure | 
| axs | matplotlib.axes._axes.Axes | matplotlib axis | 
Source code in /Users/fernandozeng/miniforge3/envs/space/lib/python3.10/site-packages/omicverse/single/_via.py
def plot_clustergraph(self,gene_list:list,arrow_head:float=0.1,figsize:tuple=(8,4),dpi=80,magic_steps=3,
                      edgeweight_scale:float=1.5, cmap=None, label_=True,)->Tuple[matplotlib.figure.Figure,
                                                                                    matplotlib.axes._axes.Axes]:
    """plot the gene in pie chart for each cluster
    Arguments:
        gene_list : list of genes to plot
        arrow_head : size of the arrow head
        figsize : size of the figure
        edgeweight_scale : scale of the edge weight
        cmap : color map to use for the gene expression
        label_ : whether to label the nodes
    Returns:
        fig : matplotlib figure
        axs : matplotlib axis
    """
    df_magic = self.model.do_impute(self.adata[:,gene_list].to_df(), magic_steps=magic_steps, gene_list=gene_list)
    df_magic['parc'] = self.model.labels
    df_magic_cluster = df_magic.groupby('parc', as_index=True).mean()
    fig, axs = draw_clustergraph_pyomic(via_object=self.model, type_data='gene', gene_exp=df_magic_cluster, 
                                gene_list=gene_list, arrow_head=arrow_head,figsize=figsize,
                                edgeweight_scale=edgeweight_scale, cmap=cmap, label_=label_,dpi=dpi)
    fig.tight_layout()
    return fig,axs