BulkTrajBlend

In [1]:

Copied!





import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()

In [2]:

Copied!

adata_hpc=sc.read('data/liver_hpc_anno.h5ad',compression='gzip')
adata_hpc=sc.read('data/liver_hpc_anno.h5ad',compression='gzip')

In [3]:

Copied!

adata_hpc.X.max()
adata_hpc.X.max()

Out[3]:

8615.0

In [4]:

Copied!





import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE58827_FPKM.txt.gz',index_col=0,sep='\t',header=1)
#bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE58827_FPKM.txt.gz',index_col=0,sep='\t',header=1)
#bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()

Out[4]:

	D-2	D-2.1	D-2.2	D0	D0	D0.1	D1	D1.1	D1.2	D3	...	D25.2	D30	D30.1	D30.2	D45	D45.1	D45.2	D60	D60.1	D60.2
Gene Symbol
0610007C21Rik	18.48160	28.29490	24.61790	42.37870	33.34830	41.60700	57.6597	63.41470	64.04740	37.80720	...	51.30120	63.78840	45.19360	54.98550	45.92580	48.11880	56.38370	50.75280	48.55550	54.92510
0610007L01Rik	14.19070	12.70240	11.80150	12.56880	8.31761	9.62451	16.7531	13.15530	13.00190	12.49530	...	7.70820	6.89976	10.68430	11.19820	7.71271	7.78097	6.92665	8.95075	8.89625	7.78528
0610007P08Rik	1.71090	0.83838	0.76108	0.36203	0.50162	0.39988	1.0015	0.64103	0.59142	1.03540	...	0.37039	0.17313	0.62976	0.34409	0.34740	0.27636	0.23581	0.56421	0.71718	0.46741
0610007P14Rik	41.85590	48.17630	45.65490	15.94520	41.84570	49.94160	32.0791	41.87910	30.60340	51.52930	...	49.23110	28.13490	35.72540	44.27430	32.64790	43.05270	31.75820	33.15400	42.72720	49.06030
0610007P22Rik	6.69505	9.71821	8.49785	9.61188	6.70561	7.89604	8.6380	10.05780	11.05510	6.81731	...	6.45745	8.29360	8.04557	7.57137	5.29742	6.44480	7.82238	5.25578	6.53128	7.03206

5 rows × 36 columns

In [5]:

Copied!

#raw_columns=bulk.columns.tolist()
#new_columns=[i+'-'+j for i, j in zip(bulk.iloc[0],raw_columns)]
#raw_columns=bulk.columns.tolist()
#new_columns=[i+'-'+j for i, j in zip(bulk.iloc[0],raw_columns)]

In [5]:

Copied!





bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata_hpc,
                                    bulk_group=bulk.columns.tolist(),
                                    celltype_key='celltype',)
#bulktb.bulk_preprocess_lazy()
#bulktb.single_preprocess_lazy()
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata_hpc,
                                    bulk_group=bulk.columns.tolist(),
                                    celltype_key='celltype',)
#bulktb.bulk_preprocess_lazy()
#bulktb.single_preprocess_lazy()

In [6]:

Copied!

bulktb.vae_configure(cell_target_num=200)
bulktb.vae_configure(cell_target_num=200)

......random select 5000 single cells
......drop duplicates index in bulk data
......deseq2 normalize the bulk data
......log10 the bulk data
......calculate the mean of each group
......normalize the single data
normalizing counts per cell
    finished (0:00:00)
......log1p the single data
WARNING: adata.X seems to be already log-transformed.
......prepare the input of bulk2single
...loading data

In [25]:

Copied!





bulktb.vae_train(batch_size=256,
    learning_rate=1e-4,
    hidden_size=256,
    epoch_num=3500,
    vae_save_dir='model',
    vae_save_name='hpc_vae',
    generate_save_dir='output',
    generate_save_name='hpc')

#bulktb.vae_load('model/dgd_vae.pth')
bulktb.vae_train(batch_size=256,
    learning_rate=1e-4,
    hidden_size=256,
    epoch_num=3500,
    vae_save_dir='model',
    vae_save_name='hpc_vae',
    generate_save_dir='output',
    generate_save_name='hpc')

#bulktb.vae_load('model/dgd_vae.pth')

...begin vae training

Train Epoch: 2551:  73%|███████▎  | 2552/3500 [20:43<07:41,  2.05it/s, loss=1.1536, min_loss=1.1536]

Early stopping at epoch 2553...
min loss = 1.1536177322268486
...vae training done!

...save trained vae in model/hpc_vae.pth.

In [8]:

Copied!

bulktb.vae_load('model/hpc_vae.pth')
bulktb.vae_load('model/hpc_vae.pth')

loading model from model/hpc_vae.pth
loading model from model/hpc_vae.pth

In [9]:

Copied!

test_adata=bulktb.vae_generate(leiden_size=25)
test_adata=bulktb.vae_generate(leiden_size=25)

...generating

generating: 100%|██████████| 3600/3600 [00:00<00:00, 6438.35it/s]

generated done!
extracting highly variable genes

    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
computing PCA
Note that scikit-learn's randomized PCA might not be exactly reproducible across different computational platforms. For exact reproducibility, choose `svd_solver='arpack'.`
    on highly variable genes
    with n_comps=100
    finished (0:00:01)
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
running Leiden clustering
    finished: found 37 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
The filter leiden is  ['27', '28', '29', '30', '31', '32', '33', '34', '35', '36']

In [26]:

Copied!





import matplotlib.pyplot as plt
fig,ax=bulktb.vae_model.plot_loss(figsize=(3,3))

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Beta Vae Loss:\nHPC',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_vae_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_vae_hpc.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=bulktb.vae_model.plot_loss(figsize=(3,3))

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Beta Vae Loss:\nHPC',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_vae_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_vae_hpc.pdf',dpi=300,bbox_inches='tight')

In [11]:

Copied!

ov.bulk2single.bulk2single_plot_cellprop(test_adata,
                                         celltype_key='celltype',
                                        )
ov.bulk2single.bulk2single_plot_cellprop(test_adata,
                                         celltype_key='celltype',
                                        )

Out[11]:

<AxesSubplot: title={'center': 'The number of cells per cell type in bulk-seq data'}, xlabel='Cell type', ylabel='Cell number'>

In [12]:

Copied!

adata_hpc.uns['log1p']['base']=None
adata_hpc.uns['log1p']['base']=None

In [13]:

Copied!

cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_hpc,test_adata,celltype_key='celltype',
                                           return_table=True)
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_hpc,test_adata,celltype_key='celltype',
                                           return_table=True)

ranking genes
WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:32)

In [14]:

Copied!





#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata_hpc.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))

# the mean expression of 200 marker genes of input sc data
sc_marker = adata_hpc[:,marker].to_df()
sc_marker['celltype'] = adata_hpc.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata_hpc.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))

# the mean expression of 200 marker genes of input sc data
sc_marker = adata_hpc[:,marker].to_df()
sc_marker['celltype'] = adata_hpc.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()

In [15]:

Copied!

sc_marker_mean=sc_marker_mean.T
sc_marker_mean=sc_marker_mean.T

In [16]:

Copied!

rf_ct = list(sc_marker_mean.columns)
rf_ct = list(sc_marker_mean.columns)

In [17]:

Copied!

cor_pd=pd.DataFrame(cor_pd,
                   index=rf_ct,
                   columns=rf_ct)
cor_pd=pd.DataFrame(cor_pd,
                   index=rf_ct,
                   columns=rf_ct)

In [19]:

Copied!





import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(BulkTrajBlend):HPC")
plt.savefig('figures/heatmap_expcor_btb_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_btb_hpc.pdf',dpi=300,bbox_inches='tight')
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(BulkTrajBlend):HPC")
plt.savefig('figures/heatmap_expcor_btb_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_btb_hpc.pdf',dpi=300,bbox_inches='tight')

In [12]:

Copied!

cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
                                  scores_type='logfoldchanges')
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
                                  scores_type='logfoldchanges')

...get cell type marker
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:03)

In [13]:

Copied!

adata_hpc1=ov.pp.preprocess(adata_hpc,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)
adata_hpc1=ov.pp.preprocess(adata_hpc,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)

Begin robust gene identification
After filtration, 18578/18648 genes are kept. Among 18578 genes, 17425 genes are robust.
End of robust gene identification.
Begin size normalization: shiftlog and HVGs selection pearson
normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['Hba-a1', 'Igha', 'Gm26917', 'Malat1', 'S100a8', 'S100a9', 'Igkc', 'Hbb-bs', 'Camp']
    finished (0:00:00)
WARNING: adata.X seems to be already log-transformed.
extracting highly variable genes
--> added
    'highly_variable', boolean vector (adata.var)
    'highly_variable_rank', float vector (adata.var)
    'highly_variable_nbatches', int vector (adata.var)
    'highly_variable_intersection', boolean vector (adata.var)
    'means', float vector (adata.var)
    'variances', float vector (adata.var)
    'residual_variances', float vector (adata.var)
End of size normalization: shiftlog and HVGs selection pearson

In [14]:

Copied!

cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
                                  scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
                                  scores_type='logfoldchanges')

...get cell type marker
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:32)

In [15]:

Copied!





cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(test_adata, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk1[clt]=degs['names'][:100].tolist()
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(test_adata, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk1[clt]=degs['names'][:100].tolist()

In [16]:

Copied!





cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk2[clt]=degs['names'][:100].tolist()
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk2[clt]=degs['names'][:100].tolist()

In [17]:

Copied!





all_gene=[]
for clt in cmk1.keys():
    all_gene+=cmk1[clt]
for clt in cmk2.keys():
    all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
all_gene=[]
for clt in cmk1.keys():
    all_gene+=cmk1[clt]
for clt in cmk2.keys():
    all_gene+=cmk2[clt]
all_gene=list(set(all_gene))

In [18]:

Copied!





cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
    cmk1_pd[clt]=0
    cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
    cmk2_pd[clt]=0
    cmk2_pd.loc[cmk2[clt],clt]=1
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
    cmk1_pd[clt]=0
    cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
    cmk2_pd[clt]=0
    cmk2_pd.loc[cmk2[clt],clt]=1

In [19]:

Copied!

cmk1_pd['B'].sum()
cmk1_pd['B'].sum()

Out[19]:

In [20]:

Copied!

cmk2_pd['B'].sum()
cmk2_pd['B'].sum()

Out[20]:

In [21]:

Copied!





from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
                      columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
    for clt2 in cmk1.keys():
    #print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
        plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
                      columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
    for clt2 in cmk1.keys():
    #print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
        plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])

In [22]:

Copied!

plot_data=plot_data.astype(float)
plot_data=plot_data.astype(float)

In [24]:

Copied!





fig, ax = plt.subplots(figsize=(5,5))
import seaborn as sns
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,vmax=0.3)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(BulkTrajBlend):HPC")
plt.savefig('figures/heatmap_cossim_btb_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_btb_hpc.pdf',dpi=300,bbox_inches='tight')
fig, ax = plt.subplots(figsize=(5,5))
import seaborn as sns
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,vmax=0.3)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(BulkTrajBlend):HPC")
plt.savefig('figures/heatmap_cossim_btb_hpc.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_btb_hpc.pdf',dpi=300,bbox_inches='tight')

In [43]:

Copied!





# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)

# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

print("对角线均值:", diagonal_mean)
print("非对角线均值:", non_diagonal_mean)
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)

# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

print("对角线均值:", diagonal_mean)
print("非对角线均值:", non_diagonal_mean)

对角线均值: 0.5631987742145932
非对角线均值: -0.023593777334333103

In [45]:

Copied!





# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

print("对角线均值:", diagonal_mean)
print("非对角线均值:", non_diagonal_mean)
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

print("对角线均值:", diagonal_mean)
print("非对角线均值:", non_diagonal_mean)

对角线均值: 0.6440443313050281
非对角线均值: 0.04967595132995179

In [53]:

Copied!

adata3=test_adata[test_adata.obs['celltype']=='Basophil']
adata3
adata3=test_adata[test_adata.obs['celltype']=='Basophil']
adata3

Out[53]:

View of AnnData object with n_obs × n_vars = 67 × 1714
    obs: 'celltype', 'leiden'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg', 'pca', 'neighbors', 'leiden', 'rank_genes_groups'
    obsm: 'X_pca'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [54]:

Copied!





import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
                     merge='same')
adata1
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
                     merge='same')
adata1

Out[54]:

AnnData object with n_obs × n_vars = 5067 × 1714
    obs: 'celltype'

In [48]:

Copied!

adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]

extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

In [55]:

Copied!

ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)

... as `zero_center=True`, sparse input is densified and may lead to large memory consumption

In [56]:

Copied!

adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1

Out[56]:

AnnData object with n_obs × n_vars = 5067 × 1714
    obs: 'celltype'
    uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'scaled|original|X_pca', 'X_mde'
    varm: 'scaled|original|pca_loadings'
    layers: 'scaled', 'lognorm'

In [57]:

Copied!





ov.utils.embedding(adata1,
                basis='X_mde',
           color=['celltype'],title='HPC Cell Type (BulkTrajBlend)',
                   frameon='small',show=False,
           wspace=0.4,palette=sc.pl.palettes.default_102)
plt.savefig('figures/umap_hpc_btb.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_hpc_btb.pdf',dpi=300,bbox_inches='tight')
ov.utils.embedding(adata1,
                basis='X_mde',
           color=['celltype'],title='HPC Cell Type (BulkTrajBlend)',
                   frameon='small',show=False,
           wspace=0.4,palette=sc.pl.palettes.default_102)
plt.savefig('figures/umap_hpc_btb.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_hpc_btb.pdf',dpi=300,bbox_inches='tight')

In [7]:

Copied!

adata2=bulktb.vae_model.single_data.copy()
adata2=bulktb.vae_model.single_data.copy()

In [8]:

Copied!

adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]

extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

In [9]:

Copied!

ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)

... as `zero_center=True`, sparse input is densified and may lead to large memory consumption

In [10]:

Copied!

adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2

Out[10]:

AnnData object with n_obs × n_vars = 5000 × 3575
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'lib_ID', 'lib', 'percent.mt', 'timepoint', 'celltype'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'percent_cells', 'robust', 'mean', 'var', 'residual_variances', 'highly_variable_rank', 'highly_variable_features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'celltype_colors', 'hvg', 'log1p', 'neighbors', 'timepoint_colors', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'X_mde', 'X_mde_hpc', 'scaled|original|X_pca'
    varm: 'scaled|original|pca_loadings'
    layers: 'counts', 'scaled', 'lognorm'
    obsp: 'connectivities', 'distances'

In [14]:

Copied!

adata2.uns['scaled|original|pca_var_ratios']
adata2.uns['scaled|original|pca_var_ratios']

Out[14]:

array([0.03828995, 0.02203958, 0.02010177, 0.00981365, 0.00765554,
       0.00663802, 0.0060489 , 0.00546947, 0.00491379, 0.00448196,
       0.00392125, 0.00360923, 0.00308134, 0.00280451, 0.00270297,
       0.00258297, 0.00222222, 0.00196814, 0.00181953, 0.00180166,
       0.0016758 , 0.00156872, 0.00145701, 0.00140349, 0.00137118,
       0.00133003, 0.00132554, 0.00129611, 0.00127141, 0.0012159 ,
       0.00115621, 0.00115   , 0.00111434, 0.00109806, 0.00107792,
       0.00107288, 0.00105312, 0.00104424, 0.0010417 , 0.00102835,
       0.0010232 , 0.00101685, 0.0010093 , 0.00100412, 0.00099931,
       0.00099478, 0.00098051, 0.00097637, 0.0009724 , 0.00096164],
      dtype=float32)

In [12]:

Copied!

adata2.obsm['X_pca']=adata2.obsm['scaled|original|X_pca']
adata2.obsm['X_pca']=adata2.obsm['scaled|original|X_pca']

In [64]:

Copied!





sc.pl.embedding(adata2,
                basis='X_mde',
           color=['celltype'],
           wspace=0.4,palette=sc.pl.palettes.default_102)
sc.pl.embedding(adata2,
                basis='X_mde',
           color=['celltype'],
           wspace=0.4,palette=sc.pl.palettes.default_102)

In [65]:

Copied!





v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['HPC'],
                    dataset='group')
v0.run()
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['HPC'],
                    dataset='group')
v0.run()

2023-10-04 00:51:52.266483	Running VIA over input data of 5000 (samples) x 50 (features)
2023-10-04 00:51:52.266528	Knngraph has 15 neighbors
2023-10-04 00:51:53.799258	Finished global pruning of 15-knn graph used for clustering at level of 0.15. Kept 48.1 % of edges. 
2023-10-04 00:51:53.813071	Number of connected components used for clustergraph  is 1
2023-10-04 00:51:53.882640	Commencing community detection
2023-10-04 00:51:54.113333	Finished running Leiden algorithm. Found 269 clusters.
2023-10-04 00:51:54.114420	Merging 226 very small clusters (<10)
2023-10-04 00:51:54.116425	Finished detecting communities. Found 43 communities
2023-10-04 00:51:54.116650	Making cluster graph. Global cluster graph pruning level: 0.15
2023-10-04 00:51:54.122134	Graph has 1 connected components before pruning
2023-10-04 00:51:54.123925	Graph has 3 connected components after pruning
2023-10-04 00:51:54.125250	Graph has 1 connected components after reconnecting
2023-10-04 00:51:54.125655	0.0% links trimmed from local pruning relative to start
2023-10-04 00:51:54.125669	69.8% links trimmed from global pruning relative to start
2023-10-04 00:51:54.128966	Starting make edgebundle viagraph...
2023-10-04 00:51:54.128979	Make via clustergraph edgebundle
2023-10-04 00:51:55.383536	Hammer dims: Nodes shape: (43, 2) Edges shape: (156, 3)
2023-10-04 00:51:55.385450	component number 0 out of  [0]
2023-10-04 00:51:55.404793\group root method
2023-10-04 00:51:55.404805or component 0, the root is HPC and ri HPC
2023-10-04 00:51:55.414741	New root is 11 and majority HPC
2023-10-04 00:51:55.419315	Computing lazy-teleporting expected hitting times
2023-10-04 00:51:56.405670	Identifying terminal clusters corresponding to unique lineages...
2023-10-04 00:51:56.405748	Closeness:[2, 3, 6, 7, 8, 17, 18, 20, 21, 23, 25, 26, 29, 31, 35, 36, 37, 38, 39]
2023-10-04 00:51:56.405759	Betweenness:[3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 35, 36, 37, 38, 39, 41]
2023-10-04 00:51:56.405766	Out Degree:[1, 2, 3, 4, 5, 8, 9, 13, 14, 16, 17, 18, 20, 21, 23, 25, 26, 28, 29, 31, 35, 36, 37, 38, 39, 41]
2023-10-04 00:51:56.406220	Cluster 4 had 3 or more neighboring terminal states [5, 18, 26] and so we removed cluster 5
2023-10-04 00:51:56.406257	Cluster 8 had 3 or more neighboring terminal states [29, 35, 36, 37] and so we removed cluster 35
2023-10-04 00:51:56.406321	Cluster 26 had 3 or more neighboring terminal states [4, 18, 28] and so we removed cluster 28
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
2023-10-04 00:51:56.406512	Terminal clusters corresponding to unique lineages in this component are [2, 3, 4, 8, 13, 14, 16, 17, 18, 20, 21, 23, 25, 26, 29, 36, 37, 38, 39] 
2023-10-04 00:51:56.796941	From root 11,  the Terminal state 2 is reached 150 times.
2023-10-04 00:51:57.187928	From root 11,  the Terminal state 3 is reached 268 times.
2023-10-04 00:51:57.575305	From root 11,  the Terminal state 4 is reached 235 times.
2023-10-04 00:51:58.034134	From root 11,  the Terminal state 8 is reached 40 times.
2023-10-04 00:51:58.444399	From root 11,  the Terminal state 13 is reached 151 times.
2023-10-04 00:51:58.843621	From root 11,  the Terminal state 14 is reached 203 times.
2023-10-04 00:51:59.252916	From root 11,  the Terminal state 16 is reached 173 times.
2023-10-04 00:51:59.632808	From root 11,  the Terminal state 17 is reached 295 times.
2023-10-04 00:52:00.105628	From root 11,  the Terminal state 18 is reached 19 times.
2023-10-04 00:52:00.548167	From root 11,  the Terminal state 20 is reached 71 times.
2023-10-04 00:52:00.969732	From root 11,  the Terminal state 21 is reached 164 times.
2023-10-04 00:52:01.440416	From root 11,  the Terminal state 23 is reached 27 times.
2023-10-04 00:52:01.885821	From root 11,  the Terminal state 25 is reached 59 times.
2023-10-04 00:52:02.295557	From root 11,  the Terminal state 26 is reached 204 times.
2023-10-04 00:52:02.750794	From root 11,  the Terminal state 29 is reached 42 times.
2023-10-04 00:52:03.207702	From root 11,  the Terminal state 36 is reached 30 times.
2023-10-04 00:52:03.587912	From root 11,  the Terminal state 37 is reached 281 times.
2023-10-04 00:52:04.054367	From root 11,  the Terminal state 38 is reached 5 times.
2023-10-04 00:52:04.520652	From root 11,  the Terminal state 39 is reached 21 times.
2023-10-04 00:52:04.553839	Terminal clusters corresponding to unique lineages are {2: 'Small Pre-B', 3: 'B', 4: 'Erythroblast', 8: 'Kupffer', 13: 'cDC1', 14: 'imNP', 16: 'imNP', 17: 'B', 18: 'Erythroblast', 20: 'Pro-erythroblast', 21: 'B', 23: 'pDC', 25: 'Pro-B', 26: 'Erythroblast', 29: 'Kupffer', 36: 'Kupffer', 37: 'Kupffer', 38: 'Basophil', 39: 'Kupffer'}
2023-10-04 00:52:04.553877	Begin projection of pseudotime and lineage likelihood
2023-10-04 00:52:05.183074	Graph has 1 connected components before pruning
2023-10-04 00:52:05.185348	Graph has 16 connected components after pruning
2023-10-04 00:52:05.194040	Graph has 1 connected components after reconnecting
2023-10-04 00:52:05.194459	46.8% links trimmed from local pruning relative to start
2023-10-04 00:52:05.194479	60.9% links trimmed from global pruning relative to start
2023-10-04 00:52:05.196378	Start making edgebundle milestone...
2023-10-04 00:52:05.196401	Start finding milestones
2023-10-04 00:52:05.877314	End milestones
2023-10-04 00:52:05.877371	Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object
2023-10-04 00:52:05.884240	Recompute weights
2023-10-04 00:52:05.892485	pruning milestone graph based on recomputed weights
2023-10-04 00:52:05.893164	Graph has 1 connected components before pruning
2023-10-04 00:52:05.893649	Graph has 3 connected components after pruning
2023-10-04 00:52:05.895332	Graph has 1 connected components after reconnecting
2023-10-04 00:52:05.895912	74.4% links trimmed from global pruning relative to start
2023-10-04 00:52:05.895933	regenerate igraph on pruned edges
2023-10-04 00:52:05.902186	Setting numeric label as single cell pseudotime for coloring edges
2023-10-04 00:52:05.914148	Making smooth edges
2023-10-04 00:52:06.161079	Time elapsed 13.2 seconds

In [66]:

Copied!





import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw HPC',fontsize=12)
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw HPC',fontsize=12)

Out[66]:

Text(0.5, 1.0, 'Raw HPC')

In [68]:

Copied!





v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')

...the pseudotime of VIA added to AnnData obs named `pt_via`
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running PAGA using priors: ['pt_via']
    finished
added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns)
    'paga/transitions_confidence', velocity transitions (adata.uns)

In [69]:

Copied!

raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
            index=adata2.obs['celltype'].cat.categories,
            columns=adata2.obs['celltype'].cat.categories)
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
            index=adata2.obs['celltype'].cat.categories,
            columns=adata2.obs['celltype'].cat.categories)

In [70]:

Copied!

raw_transitions.loc['Basophil','HPC']
raw_transitions.loc['Basophil','HPC']

Out[70]:

0.0

In [71]:

Copied!





v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['HPC'],
                     #jac_std_global=0.01,
                    dataset='group')

v1.run()
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['HPC'],
                     #jac_std_global=0.01,
                    dataset='group')

v1.run()

2023-10-04 00:53:09.628408	Running VIA over input data of 5067 (samples) x 50 (features)
2023-10-04 00:53:09.628449	Knngraph has 15 neighbors
2023-10-04 00:53:11.227187	Finished global pruning of 15-knn graph used for clustering at level of 0.15. Kept 48.0 % of edges. 
2023-10-04 00:53:11.241659	Number of connected components used for clustergraph  is 1
2023-10-04 00:53:11.312945	Commencing community detection
2023-10-04 00:53:11.547295	Finished running Leiden algorithm. Found 276 clusters.
2023-10-04 00:53:11.548402	Merging 236 very small clusters (<10)
2023-10-04 00:53:11.550459	Finished detecting communities. Found 40 communities
2023-10-04 00:53:11.550685	Making cluster graph. Global cluster graph pruning level: 0.15
2023-10-04 00:53:11.555895	Graph has 1 connected components before pruning
2023-10-04 00:53:11.557532	Graph has 4 connected components after pruning
2023-10-04 00:53:11.559369	Graph has 1 connected components after reconnecting
2023-10-04 00:53:11.559761	0.0% links trimmed from local pruning relative to start
2023-10-04 00:53:11.559775	70.3% links trimmed from global pruning relative to start
2023-10-04 00:53:11.562797	Starting make edgebundle viagraph...
2023-10-04 00:53:11.562810	Make via clustergraph edgebundle
2023-10-04 00:53:11.689086	Hammer dims: Nodes shape: (40, 2) Edges shape: (130, 3)
2023-10-04 00:53:11.690624	component number 0 out of  [0]
2023-10-04 00:53:11.710853\group root method
2023-10-04 00:53:11.710865or component 0, the root is HPC and ri HPC
2023-10-04 00:53:11.723451	New root is 23 and majority HPC
2023-10-04 00:53:11.725809	Computing lazy-teleporting expected hitting times
2023-10-04 00:53:12.620869	Identifying terminal clusters corresponding to unique lineages...
2023-10-04 00:53:12.620956	Closeness:[0, 4, 5, 6, 8, 10, 14, 15, 20, 24, 25, 27, 29, 34, 38]
2023-10-04 00:53:12.620968	Betweenness:[0, 1, 2, 4, 5, 10, 13, 14, 15, 16, 18, 19, 20, 22, 24, 25, 27, 30, 31, 32, 34, 35, 38]
2023-10-04 00:53:12.620974	Out Degree:[2, 4, 5, 6, 8, 9, 12, 13, 14, 15, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38]
2023-10-04 00:53:12.621454	Cluster 6 had 3 or more neighboring terminal states [4, 13, 25] and so we removed cluster 13
2023-10-04 00:53:12.621487	Cluster 10 had 3 or more neighboring terminal states [27, 29, 34, 38] and so we removed cluster 10
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
remove the [0:2] just using to speed up testing
2023-10-04 00:53:12.621718	Terminal clusters corresponding to unique lineages in this component are [0, 2, 4, 5, 6, 8, 14, 15, 22, 25, 27, 29, 30, 31, 32, 34, 38] 
2023-10-04 00:53:13.037699	From root 23,  the Terminal state 0 is reached 15 times.
2023-10-04 00:53:13.406985	From root 23,  the Terminal state 2 is reached 251 times.
2023-10-04 00:53:13.847531	From root 23,  the Terminal state 4 is reached 14 times.
2023-10-04 00:53:14.297608	From root 23,  the Terminal state 5 is reached 14 times.
2023-10-04 00:53:14.737494	From root 23,  the Terminal state 6 is reached 13 times.
2023-10-04 00:53:15.059342	From root 23,  the Terminal state 8 is reached 334 times.
2023-10-04 00:53:15.500413	From root 23,  the Terminal state 14 is reached 23 times.
2023-10-04 00:53:15.846519	From root 23,  the Terminal state 15 is reached 306 times.
2023-10-04 00:53:16.289389	From root 23,  the Terminal state 22 is reached 21 times.
2023-10-04 00:53:16.732543	From root 23,  the Terminal state 25 is reached 7 times.
2023-10-04 00:53:17.179419	From root 23,  the Terminal state 27 is reached 5 times.
2023-10-04 00:53:17.627761	From root 23,  the Terminal state 29 is reached 5 times.
2023-10-04 00:53:18.072584	From root 23,  the Terminal state 30 is reached 16 times.
2023-10-04 00:53:18.482375	From root 23,  the Terminal state 31 is reached 128 times.
2023-10-04 00:53:18.831369	From root 23,  the Terminal state 32 is reached 307 times.
2023-10-04 00:53:19.280994	From root 23,  the Terminal state 34 is reached 5 times.
2023-10-04 00:53:19.634525	From root 23,  the Terminal state 38 is reached 295 times.
2023-10-04 00:53:19.667705	Terminal clusters corresponding to unique lineages are {0: 'Erythroblast', 2: 'imNP', 4: 'Small Pre-B', 5: 'Erythroblast', 6: 'B', 8: 'Kupffer', 14: 'Pro-erythroblast', 15: 'imNP', 22: 'Pro-B', 25: 'B', 27: 'Kupffer', 29: 'Kupffer', 30: 'Pro-B', 31: 'imNP', 32: 'mNP', 34: 'Kupffer', 38: 'Kupffer'}
2023-10-04 00:53:19.667740	Begin projection of pseudotime and lineage likelihood
2023-10-04 00:53:20.317285	Graph has 1 connected components before pruning
2023-10-04 00:53:20.319340	Graph has 14 connected components after pruning
2023-10-04 00:53:20.327021	Graph has 1 connected components after reconnecting
2023-10-04 00:53:20.327426	40.8% links trimmed from local pruning relative to start
2023-10-04 00:53:20.327445	56.9% links trimmed from global pruning relative to start
2023-10-04 00:53:20.329203	Start making edgebundle milestone...
2023-10-04 00:53:20.329226	Start finding milestones
2023-10-04 00:53:20.977078	End milestones
2023-10-04 00:53:20.977118	Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object
2023-10-04 00:53:20.982429	Recompute weights
2023-10-04 00:53:20.989686	pruning milestone graph based on recomputed weights
2023-10-04 00:53:20.990359	Graph has 1 connected components before pruning
2023-10-04 00:53:20.990848	Graph has 2 connected components after pruning
2023-10-04 00:53:20.991807	Graph has 1 connected components after reconnecting
2023-10-04 00:53:20.992391	74.1% links trimmed from global pruning relative to start
2023-10-04 00:53:20.992413	regenerate igraph on pruned edges
2023-10-04 00:53:20.998343	Setting numeric label as single cell pseudotime for coloring edges
2023-10-04 00:53:21.010215	Making smooth edges
2023-10-04 00:53:21.267450	Time elapsed 10.9 seconds

In [72]:

Copied!





import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('HPC (BulkTrajBlend)',fontsize=12)
plt.savefig('figures/via_hpc_btb.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('HPC (BulkTrajBlend)',fontsize=12)
plt.savefig('figures/via_hpc_btb.png',dpi=300,bbox_inches='tight')

In [73]:

Copied!

ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)

WARNING: Invalid color key. Using grey instead.

Out[73]:

<AxesSubplot: title={'center': 'PAGA LTNN-graph'}>

In [74]:

Copied!





v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')

...the pseudotime of VIA added to AnnData obs named `pt_via`
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running PAGA using priors: ['pt_via']
    finished
added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns)
    'paga/transitions_confidence', velocity transitions (adata.uns)

In [75]:

Copied!

after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
            index=adata1.obs['celltype'].cat.categories,
            columns=adata1.obs['celltype'].cat.categories)
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
            index=adata1.obs['celltype'].cat.categories,
            columns=adata1.obs['celltype'].cat.categories)

In [76]:

Copied!





ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA HPC (BulkTrajBlend)',fontsize=13)
plt.savefig('figures/paga_hpc_btb.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_hpc_btb.pdf',dpi=300,bbox_inches='tight')
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA HPC (BulkTrajBlend)',fontsize=13)
plt.savefig('figures/paga_hpc_btb.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_hpc_btb.pdf',dpi=300,bbox_inches='tight')

WARNING: Invalid color key. Using grey instead.

In [77]:

Copied!

import seaborn as sns
sns.kdeplot(adata1.obs.loc[adata1.obs['celltype']=='Basophil'],x='pt_via')
plt.xlim(0,1)
import seaborn as sns
sns.kdeplot(adata1.obs.loc[adata1.obs['celltype']=='Basophil'],x='pt_via')
plt.xlim(0,1)

Out[77]:

(0.0, 1.0)

In [78]:

Copied!

np.var(adata1.obs.loc[adata1.obs['celltype']=='Basophil','pt_via'])
np.var(adata1.obs.loc[adata1.obs['celltype']=='Basophil','pt_via'])

Out[78]:

0.012282284923191812

In [79]:

Copied!

import seaborn as sns
sns.kdeplot(adata2.obs.loc[adata2.obs['celltype']=='Basophil'],x='pt_via')
plt.xlim(0,1)
import seaborn as sns
sns.kdeplot(adata2.obs.loc[adata2.obs['celltype']=='Basophil'],x='pt_via')
plt.xlim(0,1)

Out[79]:

(0.0, 1.0)

In [81]:

Copied!

np.var(adata2.obs.loc[adata2.obs['celltype']=='Basophil','pt_via'])
np.var(adata2.obs.loc[adata2.obs['celltype']=='Basophil','pt_via'])

Out[81]:

0.007093842603434545

In [82]:

Copied!





res_dict={}

#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean

#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean

#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['Basophil'].max()
res_dict['Trans_after']=after_transitions.loc['Basophil'].max()


#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='Basophil','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='Basophil','pt_via'])
res_dict={}

#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean

#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean

#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['Basophil'].max()
res_dict['Trans_after']=after_transitions.loc['Basophil'].max()


#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='Basophil','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='Basophil','pt_via'])

In [83]:

Copied!

res_dict
res_dict

Out[83]:

{'Cor_mean': 0.5631987742145932,
 'non_Cor_mean': -0.023593777334333103,
 'Cos_mean': 0.6440443313050281,
 'non_Cos_mean': 0.04967595132995179,
 'Trans_raw': 0.0,
 'Trans_after': 0.021442250696755896,
 'Var_raw': 0.007093842603434545,
 'Var_after': 0.012282284923191812}

In [84]:

Copied!

import pickle
with open('result/metric_btb_hpc.pkl','wb') as f:
    pickle.dump(res_dict,f)
import pickle
with open('result/metric_btb_hpc.pkl','wb') as f:
    pickle.dump(res_dict,f)

In [85]:

Copied!

with open('result/metric_btb_hpc.pkl','rb') as f:
    res_dict=pickle.load(f)
res_dict
with open('result/metric_btb_hpc.pkl','rb') as f:
    res_dict=pickle.load(f)
res_dict

Out[85]:

{'Cor_mean': 0.5631987742145932,
 'non_Cor_mean': -0.023593777334333103,
 'Cos_mean': 0.6440443313050281,
 'non_Cos_mean': 0.04967595132995179,
 'Trans_raw': 0.0,
 'Trans_after': 0.021442250696755896,
 'Var_raw': 0.007093842603434545,
 'Var_after': 0.012282284923191812}

In [ ]: