CRC-Epi
In [5]:
Copied!
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
In [2]:
Copied!
adata_can=ov.read('../data/crc_can.h5ad',compression='gzip')
adata_can
adata_can=ov.read('../data/crc_can.h5ad',compression='gzip')
adata_can
Out[2]:
AnnData object with n_obs × n_vars = 11410 × 1458 obs: 'n_genes', 'doublet_score', 'predicted_doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'scsa_celltype', 'major_celltype', 'scsa_true_celltype', 'scsa_celltype_cancer', 'pt_via', 'minor_celltype' var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std' uns: 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_colors', 'leiden_sizes', 'log1p', 'major_celltype_colors', 'major_celltype_sizes', 'minor_celltype_colors', 'minor_celltype_sizes', 'neighbors', 'paga', 'paga_graph', 'pca', 'rank_genes_groups', 'scrublet', 'scsa_celltype_cancer_colors', 'scsa_celltype_cancer_sizes', 'scsa_celltype_colors', 'scsa_true_celltype_colors', 'umap' obsm: 'X_pca', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances'
In [33]:
Copied!
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
adata_can,
basis="X_umap",
color=['minor_celltype'],
frameon='small',
#title="Automatic Annotation CRC",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
size=10,
ax=ax,
alpha=0.5,
#legend_loc='',
add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
adata_can,
basis="X_umap",
color=['minor_celltype'],
frameon='small',
#title="Automatic Annotation CRC",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
size=10,
ax=ax,
alpha=0.5,
#legend_loc='',
add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
In [3]:
Copied!
adata_sea=ov.read('../data/crc_50000.h5ad',compression='gzip')
adata_sea
adata_sea=ov.read('../data/crc_50000.h5ad',compression='gzip')
adata_sea
Out[3]:
AnnData object with n_obs × n_vars = 50000 × 25121 obs: 'n_genes', 'doublet_score', 'predicted_doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'scsa_celltype', 'major_celltype', 'scsa_true_celltype' var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_colors', 'leiden_sizes', 'log1p', 'major_celltype_colors', 'neighbors', 'paga', 'pca', 'rank_genes_groups', 'scrublet', 'scsa_celltype_colors', 'umap' obsm: 'X_pca', 'X_umap' obsp: 'connectivities', 'distances'
In [4]:
Copied!
## User defined parameters
## Core parameters
n_SEACells = 200
build_kernel_on = 'X_pca' # key in ad.obsm to use for computing metacells
# This would be replaced by 'X_svd' for ATAC data
## Additional parameters
n_waypoint_eigs = 10 # Number of eigenvalues to consider when initializing metacells
## User defined parameters
## Core parameters
n_SEACells = 200
build_kernel_on = 'X_pca' # key in ad.obsm to use for computing metacells
# This would be replaced by 'X_svd' for ATAC data
## Additional parameters
n_waypoint_eigs = 10 # Number of eigenvalues to consider when initializing metacells
In [5]:
Copied!
ret_cellidx=list(set(adata_sea.obs.index) & set(adata_can.obs.index))
len(ret_cellidx)
ret_cellidx=list(set(adata_sea.obs.index) & set(adata_can.obs.index))
len(ret_cellidx)
Out[5]:
4303
In [7]:
Copied!
adata_can1=adata_can[ret_cellidx].raw.to_adata()
adata_can1
adata_can1=adata_can[ret_cellidx].raw.to_adata()
adata_can1
Out[7]:
AnnData object with n_obs × n_vars = 4303 × 25121 obs: 'n_genes', 'doublet_score', 'predicted_doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'scsa_celltype', 'major_celltype', 'scsa_true_celltype', 'scsa_celltype_cancer', 'pt_via', 'minor_celltype' var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_sizes', 'log1p', 'major_celltype_colors', 'major_celltype_sizes', 'minor_celltype_colors', 'minor_celltype_sizes', 'neighbors', 'paga', 'paga_graph', 'pca', 'rank_genes_groups', 'scrublet', 'scsa_celltype_cancer_colors', 'scsa_celltype_cancer_sizes', 'scsa_celltype_colors', 'scsa_true_celltype_colors', 'umap' obsm: 'X_pca', 'X_umap' obsp: 'connectivities', 'distances'
In [8]:
Copied!
import SEACells
model = SEACells.core.SEACells(adata_can1,
build_kernel_on=build_kernel_on,
n_SEACells=n_SEACells,
n_waypoint_eigs=n_waypoint_eigs,
convergence_epsilon = 1e-5)
import SEACells
model = SEACells.core.SEACells(adata_can1,
build_kernel_on=build_kernel_on,
n_SEACells=n_SEACells,
n_waypoint_eigs=n_waypoint_eigs,
convergence_epsilon = 1e-5)
findfont: Font family ['Raleway'] not found. Falling back to DejaVu Sans.
Welcome to SEACells!
In [9]:
Copied!
model.construct_kernel_matrix()
M = model.kernel_matrix
model.construct_kernel_matrix()
M = model.kernel_matrix
Computing kNN graph using scanpy NN ... computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:02) Computing radius for adaptive bandwidth kernel...
0%| | 0/4303 [00:00<?, ?it/s]
Making graph symmetric... Parameter graph_construction = union being used to build KNN graph... Computing RBF kernel...
0%| | 0/4303 [00:00<?, ?it/s]
Building similarity LIL matrix...
0%| | 0/4303 [00:00<?, ?it/s]
Constructing CSR matrix...
In [10]:
Copied!
%matplotlib inline
# Plot the initilization to ensure they are spread across phenotypic space
ax=SEACells.plot.plot_initialization(adata_can1, model,plot_basis='X_umap',
#save_as='../figures/scrna/sea_meta1.png',
show=True)
#plt.savefig("../figures/scrna/sea_meta1.png",dpi=300,bbox_inches = 'tight')
#plt.savefig("../pdf/scrna/sea_meta1.pdf",dpi=300,bbox_inches = 'tight')
%matplotlib inline
# Plot the initilization to ensure they are spread across phenotypic space
ax=SEACells.plot.plot_initialization(adata_can1, model,plot_basis='X_umap',
#save_as='../figures/scrna/sea_meta1.png',
show=True)
#plt.savefig("../figures/scrna/sea_meta1.png",dpi=300,bbox_inches = 'tight')
#plt.savefig("../pdf/scrna/sea_meta1.pdf",dpi=300,bbox_inches = 'tight')
In [11]:
Copied!
model.fit(min_iter=10, max_iter=100)
model.fit(min_iter=10, max_iter=100)
Building kernel on X_pca Computing diffusion components from X_pca for waypoint initialization ... Determing nearest neighbor graph... computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) Done. Sampling waypoints ... Done. Selecting 187 cells from waypoint initialization. Initializing residual matrix using greedy column selection Initializing f and g...
100%|██████████| 23/23 [00:00<00:00, 190.54it/s]
Selecting 13 cells from greedy initialization.
Randomly initialized A matrix. Setting convergence threshold at 0.00108 Starting iteration 1. Completed iteration 1. Starting iteration 10. Completed iteration 10. Starting iteration 20. Completed iteration 20. Starting iteration 30. Completed iteration 30. Starting iteration 40. Completed iteration 40. Starting iteration 50. Completed iteration 50. Starting iteration 60. Completed iteration 60. Converged after 60 iterations.
In [17]:
Copied!
# Check for convergence
fig, ax = plt.subplots(figsize=(4,4))
plt.plot(model.RSS_iters,color=ov.utils.blue_color[6])
plt.title("CRC Epi Reconstruction Error\nover Iterations")
plt.xlabel("Iterations")
plt.ylabel("Squared Error")
#plt.title('Celltype Purity')
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_loss.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_loss.pdf",dpi=300,bbox_inches = 'tight')
# Check for convergence
fig, ax = plt.subplots(figsize=(4,4))
plt.plot(model.RSS_iters,color=ov.utils.blue_color[6])
plt.title("CRC Epi Reconstruction Error\nover Iterations")
plt.xlabel("Iterations")
plt.ylabel("Squared Error")
#plt.title('Celltype Purity')
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_loss.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_loss.pdf",dpi=300,bbox_inches = 'tight')
In [12]:
Copied!
adata_can1.write_h5ad('../data/crc_epi_SEACell.h5ad',compression='gzip')
adata_can1.write_h5ad('../data/crc_epi_SEACell.h5ad',compression='gzip')
In [4]:
Copied!
adata_can1=sc.read('../data/crc_epi_SEACell.h5ad',compression='gzip')
adata_can1=sc.read('../data/crc_epi_SEACell.h5ad',compression='gzip')
In [13]:
Copied!
import pickle
with open('seamodel_epi.pkl','wb') as f:
pickle.dump(model,f)
import pickle
with open('seamodel_epi.pkl','wb') as f:
pickle.dump(model,f)
In [3]:
Copied!
import pickle
with open('seamodel_epi.pkl','rb') as f:
model=pickle.load(f)
import pickle
with open('seamodel_epi.pkl','rb') as f:
model=pickle.load(f)
findfont: Font family ['Raleway'] not found. Falling back to DejaVu Sans.
In [10]:
Copied!
SEACell_purity = SEACells.evaluate.compute_celltype_purity(adata_can1, 'minor_celltype')
SEACell_purity.head()
SEACell_purity = SEACells.evaluate.compute_celltype_purity(adata_can1, 'minor_celltype')
SEACell_purity.head()
Out[10]:
minor_celltype | minor_celltype_purity | |
---|---|---|
SEACell | ||
SEACell-0 | Metastasis-1 | 0.469388 |
SEACell-1 | Quiescence | 0.864865 |
SEACell-2 | EMT | 0.750000 |
SEACell-3 | Quiescence | 1.000000 |
SEACell-4 | Cell Cycle | 0.904762 |
In [11]:
Copied!
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=SEACell_purity, y='minor_celltype_purity',ax=ax,
color=ov.utils.blue_color[3])
plt.title('Celltype Purity')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Purity.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Purity.pdf",dpi=300,bbox_inches = 'tight')
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=SEACell_purity, y='minor_celltype_purity',ax=ax,
color=ov.utils.blue_color[3])
plt.title('Celltype Purity')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Purity.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Purity.pdf",dpi=300,bbox_inches = 'tight')
In [7]:
Copied!
import SEACells
separation = SEACells.evaluate.separation(adata_can1, 'X_pca',nth_nbr=1)
separation.head()
import SEACells
separation = SEACells.evaluate.separation(adata_can1, 'X_pca',nth_nbr=1)
separation.head()
Determing nearest neighbor graph... computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
Out[7]:
separation | |
---|---|
SEACell | |
SEACell-0 | 0.016507 |
SEACell-1 | 0.071745 |
SEACell-2 | 0.567868 |
SEACell-3 | 0.034363 |
SEACell-4 | 0.005667 |
In [12]:
Copied!
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=separation, y='separation',ax=ax,
color=ov.utils.blue_color[4])
plt.title('Separation')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Separation.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Separation.pdf",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=separation, y='separation',ax=ax,
color=ov.utils.blue_color[4])
plt.title('Separation')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Separation.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Separation.pdf",dpi=300,bbox_inches = 'tight')
In [14]:
Copied!
compactness = SEACells.evaluate.compactness(adata_can1, 'X_pca')
compactness.head()
compactness = SEACells.evaluate.compactness(adata_can1, 'X_pca')
compactness.head()
Determing nearest neighbor graph... computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
Out[14]:
compactness | |
---|---|
SEACell | |
SEACell-0 | 0.001148 |
SEACell-1 | 0.010558 |
SEACell-2 | 0.021555 |
SEACell-3 | 0.000377 |
SEACell-4 | 0.000110 |
In [15]:
Copied!
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=compactness, y='compactness',ax=ax,
color=ov.utils.blue_color[4])
plt.title('Compactness')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Compactness.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Compactness.pdf",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(1,4))
sns.boxplot(data=compactness, y='compactness',ax=ax,
color=ov.utils.blue_color[4])
plt.title('Compactness')
sns.despine()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.savefig("../figures/scrna/sea_epi_Compactness.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/sea_epi_Compactness.pdf",dpi=300,bbox_inches = 'tight')
In [22]:
Copied!
import pandas as pd
umap = pd.DataFrame(adata_can1.obsm['X_umap']).set_index(adata_can1.obs_names).join(adata_can1.obs["SEACell"])
umap["SEACell"] = umap["SEACell"].astype("category")
mcs = umap.groupby("SEACell").mean().reset_index()
import pandas as pd
umap = pd.DataFrame(adata_can1.obsm['X_umap']).set_index(adata_can1.obs_names).join(adata_can1.obs["SEACell"])
umap["SEACell"] = umap["SEACell"].astype("category")
mcs = umap.groupby("SEACell").mean().reset_index()
In [44]:
Copied!
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
adata_can1,
basis="X_umap",
color=['minor_celltype'],
frameon='small',
title="Meta cells CRC Epi",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
alpha=0.2,
#legend_loc='',
add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
#palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
ax.scatter(mcs[0],mcs[1],s=15,c=ov.utils.red_color[2],
edgecolors='b',linewidths=0.6,
alpha=1)
fig.savefig("../figures/scrna/umap_celltype_meta_epi_meta.png",dpi=300,bbox_inches = 'tight')
fig.savefig("../pdf/scrna/umap_celltype_meta_epi_meta.pdf",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
adata_can1,
basis="X_umap",
color=['minor_celltype'],
frameon='small',
title="Meta cells CRC Epi",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
alpha=0.2,
#legend_loc='',
add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
#palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
ax.scatter(mcs[0],mcs[1],s=15,c=ov.utils.red_color[2],
edgecolors='b',linewidths=0.6,
alpha=1)
fig.savefig("../figures/scrna/umap_celltype_meta_epi_meta.png",dpi=300,bbox_inches = 'tight')
fig.savefig("../pdf/scrna/umap_celltype_meta_epi_meta.pdf",dpi=300,bbox_inches = 'tight')
In [25]:
Copied!
adata_can1.raw=adata_can1.copy()
adata_can1.raw=adata_can1.copy()
In [26]:
Copied!
SEACell_soft_ad = SEACells.core.summarize_by_soft_SEACell(adata_can1, model.A_,
celltype_label='minor_celltype',
summarize_layer='raw', minimum_weight=0.05)
SEACell_soft_ad
SEACell_soft_ad = SEACells.core.summarize_by_soft_SEACell(adata_can1, model.A_,
celltype_label='minor_celltype',
summarize_layer='raw', minimum_weight=0.05)
SEACell_soft_ad
100%|██████████| 200/200 [01:31<00:00, 2.20it/s]
Out[26]:
AnnData object with n_obs × n_vars = 200 × 25121 obs: 'Pseudo-sizes', 'celltype', 'celltype_purity'
In [27]:
Copied!
SEACell_soft_ad.write_h5ad('../data/crc_epi_SEACell_soft_ad.h5ad',compression='gzip')
SEACell_soft_ad.write_h5ad('../data/crc_epi_SEACell_soft_ad.h5ad',compression='gzip')
In [28]:
Copied!
sc.pp.highly_variable_genes(SEACell_soft_ad, n_top_genes=2500, inplace=True)
sc.tl.pca(SEACell_soft_ad, use_highly_variable=True)
sc.pp.neighbors(SEACell_soft_ad, use_rep='X_pca')
sc.tl.umap(SEACell_soft_ad)
sc.pp.highly_variable_genes(SEACell_soft_ad, n_top_genes=2500, inplace=True)
sc.tl.pca(SEACell_soft_ad, use_highly_variable=True)
sc.pp.neighbors(SEACell_soft_ad, use_rep='X_pca')
sc.tl.umap(SEACell_soft_ad)
If you pass `n_top_genes`, all cutoffs are ignored. extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) computing PCA on highly variable genes with n_comps=50 finished (0:00:00) computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) computing UMAP finished: added 'X_umap', UMAP coordinates (adata.obsm) (0:00:00)
In [32]:
Copied!
adata_can1
adata_can1
Out[32]:
AnnData object with n_obs × n_vars = 4303 × 25121 obs: 'n_genes', 'doublet_score', 'predicted_doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'scsa_celltype', 'major_celltype', 'scsa_true_celltype', 'scsa_celltype_cancer', 'pt_via', 'minor_celltype', 'SEACell' var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_sizes', 'log1p', 'major_celltype_colors', 'major_celltype_sizes', 'minor_celltype_colors', 'minor_celltype_sizes', 'neighbors', 'paga', 'paga_graph', 'pca', 'rank_genes_groups', 'scrublet', 'scsa_celltype_cancer_colors', 'scsa_celltype_cancer_sizes', 'scsa_celltype_colors', 'scsa_true_celltype_colors', 'umap' obsm: 'X_pca', 'X_umap' obsp: 'connectivities', 'distances'
In [34]:
Copied!
import matplotlib.pyplot as plt
from matplotlib import patheffects
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
SEACell_soft_ad,
basis="X_umap",
color=['celltype'],
frameon='small',
title="CRC Epi metacells celltypes",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
legend_loc=None, add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
ov.utils.gen_mpl_labels(
SEACell_soft_ad,
'celltype',
exclude=("None",),
basis='X_umap',
ax=ax,
adjust_kwargs=dict(arrowprops=dict(arrowstyle='-', color='black')),
text_kwargs=dict(fontsize= 12 ,weight='bold',
path_effects=[patheffects.withStroke(linewidth=2, foreground='w')] ),
)
fig.savefig("../figures/scrna/umap_celltype_meta_epi_scsa.png",dpi=300,bbox_inches = 'tight')
fig.savefig("../pdf/scrna/umap_celltype_meta_epi_scsa.pdf",dpi=300,bbox_inches = 'tight')
import matplotlib.pyplot as plt
from matplotlib import patheffects
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
SEACell_soft_ad,
basis="X_umap",
color=['celltype'],
frameon='small',
title="CRC Epi metacells celltypes",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
legend_loc=None, add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
#legend_fontweight='normal'
)
ov.utils.gen_mpl_labels(
SEACell_soft_ad,
'celltype',
exclude=("None",),
basis='X_umap',
ax=ax,
adjust_kwargs=dict(arrowprops=dict(arrowstyle='-', color='black')),
text_kwargs=dict(fontsize= 12 ,weight='bold',
path_effects=[patheffects.withStroke(linewidth=2, foreground='w')] ),
)
fig.savefig("../figures/scrna/umap_celltype_meta_epi_scsa.png",dpi=300,bbox_inches = 'tight')
fig.savefig("../pdf/scrna/umap_celltype_meta_epi_scsa.pdf",dpi=300,bbox_inches = 'tight')
In [35]:
Copied!
v1 = ov.single.pyVIA(adata=SEACell_soft_ad,adata_key='X_pca',adata_ncomps=50, basis='X_umap',
clusters='celltype',knn=10,random_seed=4,root_user=['Stemness'])
v1.run()
v1 = ov.single.pyVIA(adata=SEACell_soft_ad,adata_key='X_pca',adata_ncomps=50, basis='X_umap',
clusters='celltype',knn=10,random_seed=4,root_user=['Stemness'])
v1.run()
2023-10-20 21:01:51.634623 Running VIA over input data of 200 (samples) x 50 (features) 2023-10-20 21:01:51.634663 Knngraph has 10 neighbors 2023-10-20 21:01:51.888010 Finished global pruning of 10-knn graph used for clustering at level of 0.15. Kept 53.2 % of edges. 2023-10-20 21:01:51.893711 Number of connected components used for clustergraph is 1 2023-10-20 21:01:51.895771 Commencing community detection 2023-10-20 21:01:51.933592 Finished running Leiden algorithm. Found 26 clusters. 2023-10-20 21:01:51.934038 Merging 16 very small clusters (<10) 2023-10-20 21:01:51.934478 Finished detecting communities. Found 10 communities 2023-10-20 21:01:51.934518 Making cluster graph. Global cluster graph pruning level: 0.15 2023-10-20 21:01:51.936309 Graph has 1 connected components before pruning 2023-10-20 21:01:51.937992 Graph has 3 connected components after pruning 2023-10-20 21:01:51.940116 Graph has 1 connected components after reconnecting 2023-10-20 21:01:51.940746 0.0% links trimmed from local pruning relative to start 2023-10-20 21:01:51.940767 56.5% links trimmed from global pruning relative to start 2023-10-20 21:01:51.943217 Starting make edgebundle viagraph... 2023-10-20 21:01:51.943239 Make via clustergraph edgebundle 2023-10-20 21:01:53.255455 Hammer dims: Nodes shape: (10, 2) Edges shape: (20, 3) 2023-10-20 21:01:53.257311 component number 0 out of [0] 2023-10-20 21:01:53.259101\group root method 2023-10-20 21:01:53.259113or component 0, the root is Stemness and ri Stemness 2023-10-20 21:01:53.259525 New root is 3 and majority Stemness 2023-10-20 21:01:53.259627 Computing lazy-teleporting expected hitting times 2023-10-20 21:01:53.640971 Identifying terminal clusters corresponding to unique lineages... 2023-10-20 21:01:53.641047 Closeness:[1, 4] 2023-10-20 21:01:53.641058 Betweenness:[0, 1, 3, 5, 7, 9] 2023-10-20 21:01:53.641063 Out Degree:[0, 1, 2, 4, 7, 9] 2023-10-20 21:01:53.641480 We removed cluster 0 from the shortlist of terminal states remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing 2023-10-20 21:01:53.641631 Terminal clusters corresponding to unique lineages in this component are [1, 4, 7, 9] 2023-10-20 21:01:53.852100 From root 3, the Terminal state 1 is reached 5 times. 2023-10-20 21:01:54.092109 From root 3, the Terminal state 4 is reached 5 times. 2023-10-20 21:01:54.281488 From root 3, the Terminal state 7 is reached 623 times. 2023-10-20 21:01:54.508227 From root 3, the Terminal state 9 is reached 200 times. 2023-10-20 21:01:54.535980 Terminal clusters corresponding to unique lineages are {1: 'Cell Cycle', 4: 'Differentiation', 7: 'EMT', 9: 'Differentiation'} 2023-10-20 21:01:54.536010 Begin projection of pseudotime and lineage likelihood 2023-10-20 21:01:54.546326 Graph has 1 connected components before pruning 2023-10-20 21:01:54.547488 Graph has 3 connected components after pruning 2023-10-20 21:01:54.548953 Graph has 1 connected components after reconnecting 2023-10-20 21:01:54.549378 15.0% links trimmed from local pruning relative to start 2023-10-20 21:01:54.549396 40.0% links trimmed from global pruning relative to start 2023-10-20 21:01:54.550742 Start making edgebundle milestone... 2023-10-20 21:01:54.550767 Start finding milestones 2023-10-20 21:01:54.971375 End milestones 2023-10-20 21:01:54.971420 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-20 21:01:54.971956 Recompute weights 2023-10-20 21:01:54.977970 pruning milestone graph based on recomputed weights 2023-10-20 21:01:54.978669 Graph has 1 connected components before pruning 2023-10-20 21:01:54.979199 Graph has 17 connected components after pruning 2023-10-20 21:01:54.991655 Graph has 1 connected components after reconnecting 2023-10-20 21:01:54.992286 56.0% links trimmed from global pruning relative to start 2023-10-20 21:01:54.992309 regenerate igraph on pruned edges 2023-10-20 21:01:54.995511 Setting numeric label as single cell pseudotime for coloring edges 2023-10-20 21:01:54.999293 Making smooth edges 2023-10-20 21:01:55.334156 Time elapsed 3.7 seconds
In [41]:
Copied!
fig,ax=v1.plot_stream(basis='X_umap',clusters='celltype',
density_grid=0.8, #scatter_size=30,
scatter_alpha=0.3, linewidth=0.5)
plt.title('Epithelial Meatcells Streamplot',fontsize=13)
fig.savefig("../figures/scrna/epi_meta_streamplot.png",dpi=300,bbox_inches = 'tight')
fig,ax=v1.plot_stream(basis='X_umap',clusters='celltype',
density_grid=0.8, #scatter_size=30,
scatter_alpha=0.3, linewidth=0.5)
plt.title('Epithelial Meatcells Streamplot',fontsize=13)
fig.savefig("../figures/scrna/epi_meta_streamplot.png",dpi=300,bbox_inches = 'tight')
In [38]:
Copied!
v1.get_pseudotime(SEACell_soft_ad)
#sc.pp.neighbors(SEACell_soft_ad_epi1,n_neighbors= 15,use_rep='X_pca')
ov.utils.cal_paga(SEACell_soft_ad,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v1.get_pseudotime(SEACell_soft_ad)
#sc.pp.neighbors(SEACell_soft_ad_epi1,n_neighbors= 15,use_rep='X_pca')
ov.utils.cal_paga(SEACell_soft_ad,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [40]:
Copied!
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
SEACell_soft_ad,
basis="X_umap",
color=['pt_via'],
frameon='small',
title="Epithelial Metacells pseudotime",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
legend_loc=None, add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
cmap='Greens'
#legend_fontweight='normal'
)
plt.savefig("../figures/scrna/epi_meta_time.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/epi_meta_time.pdf",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(4,4))
ov.utils.embedding(
SEACell_soft_ad,
basis="X_umap",
color=['pt_via'],
frameon='small',
title="Epithelial Metacells pseudotime",
#legend_loc='on data',
legend_fontsize=14,
legend_fontoutline=2,
#size=10,
ax=ax,
legend_loc=None, add_outline=False,
#add_outline=True,
outline_color='black',
outline_width=1,
show=False,
palette=ov.utils.blue_color[:],
cmap='Greens'
#legend_fontweight='normal'
)
plt.savefig("../figures/scrna/epi_meta_time.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/epi_meta_time.pdf",dpi=300,bbox_inches = 'tight')
In [43]:
Copied!
ov.utils.plot_paga(SEACell_soft_ad,basis='umap', #size=50,
alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('Epithelial Meta PAGA-Graph',fontsize=13)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
plt.savefig("../figures/scrna/epi_meta_paga.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/epi_meta_paga.pdf",dpi=300,bbox_inches = 'tight')
ov.utils.plot_paga(SEACell_soft_ad,basis='umap', #size=50,
alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('Epithelial Meta PAGA-Graph',fontsize=13)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
plt.savefig("../figures/scrna/epi_meta_paga.png",dpi=300,bbox_inches = 'tight')
plt.savefig("../pdf/scrna/epi_meta_paga.pdf",dpi=300,bbox_inches = 'tight')
In [ ]:
Copied!