metric
In [1]:
Copied!
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
In [2]:
Copied!
adata=scv.datasets.dentategyrus()
adata
adata=scv.datasets.dentategyrus()
adata
Out[2]:
AnnData object with n_obs × n_vars = 2930 × 13913 obs: 'clusters', 'age(days)', 'clusters_enlarged' uns: 'clusters_colors' obsm: 'X_umap' layers: 'ambiguous', 'spliced', 'unspliced'
In [3]:
Copied!
adata.obs['celltype']=adata.obs['clusters'].copy()
adata.obs['celltype']=adata.obs['clusters'].copy()
In [4]:
Copied!
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
Out[4]:
dg_d_1 | dg_d_2 | dg_d_3 | dg_v_1 | dg_v_2 | dg_v_3 | ca4_1 | ca4_2 | ca4_3 | ca3_d_1 | ... | ca3_v_3 | ca2_1 | ca2_2 | ca2_3 | ca1_d_1 | ca1_d_2 | ca1_d_3 | ca1_v_1 | ca1_v_2 | ca1_v_3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Crtam | 0 | 0 | 0 | 0 | 1 | 11 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Rbp1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 |
Tmem248 | 422 | 231 | 236 | 648 | 584 | 412 | 760 | 571 | 629 | 786 | ... | 894 | 652 | 653 | 574 | 973 | 1146 | 1106 | 300 | 201 | 288 |
Nme9 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Rfesd | 248 | 31 | 76 | 328 | 266 | 303 | 275 | 540 | 296 | 273 | ... | 639 | 592 | 404 | 366 | 434 | 370 | 223 | 64 | 61 | 101 |
5 rows × 24 columns
In [5]:
Copied!
adata.obs['celltype'].value_counts().loc['OPC']
adata.obs['celltype'].value_counts().loc['OPC']
Out[5]:
53
In [6]:
Copied!
adata1=scv.datasets.dentategyrus()
adata1.obs['celltype']=adata1.obs['clusters'].copy()
adata_t=ov.pp.preprocess(adata1,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata1=scv.datasets.dentategyrus()
adata1.obs['celltype']=adata1.obs['clusters'].copy()
adata_t=ov.pp.preprocess(adata1,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
Begin robust gene identification After filtration, 13264/13913 genes are kept. Among 13264 genes, 13189 genes are robust. End of robust gene identification. Begin size normalization: shiftlog and HVGs selection pearson normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation: ['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt'] finished (0:00:00) extracting highly variable genes --> added 'highly_variable', boolean vector (adata.var) 'highly_variable_rank', float vector (adata.var) 'highly_variable_nbatches', int vector (adata.var) 'highly_variable_intersection', boolean vector (adata.var) 'means', float vector (adata.var) 'variances', float vector (adata.var) 'residual_variances', float vector (adata.var) End of size normalization: shiftlog and HVGs selection pearson
In [7]:
Copied!
adata2=adata_t
adata2.raw = adata2
sc.pp.highly_variable_genes(
adata2,
n_top_genes=3000,
)
adata2 = adata2[:, adata2.var.highly_variable]
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
adata2=adata_t
adata2.raw = adata2
sc.pp.highly_variable_genes(
adata2,
n_top_genes=3000,
)
adata2 = adata2[:, adata2.var.highly_variable]
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
If you pass `n_top_genes`, all cutoffs are ignored. extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
Out[7]:
AnnData object with n_obs × n_vars = 2930 × 3000 obs: 'clusters', 'age(days)', 'clusters_enlarged', 'celltype' var: 'n_cells', 'percent_cells', 'robust', 'mean', 'var', 'residual_variances', 'highly_variable_rank', 'highly_variable_features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'clusters_colors', 'log1p', 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'X_umap', 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'ambiguous', 'spliced', 'unspliced', 'counts', 'scaled', 'lognorm'
In [9]:
Copied!
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC','Neuroblast'],
dataset='group')
v0.run()
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC','Neuroblast'],
dataset='group')
v0.run()
2023-10-17 20:19:31.872249 Running VIA over input data of 2930 (samples) x 50 (features) 2023-10-17 20:19:31.872419 Knngraph has 15 neighbors 2023-10-17 20:19:32.816749 Finished global pruning of 15-knn graph used for clustering at level of 0.15. Kept 41.1 % of edges. 2023-10-17 20:19:32.826385 Number of connected components used for clustergraph is 1 2023-10-17 20:19:32.871972 Commencing community detection 2023-10-17 20:19:33.024390 Finished running Leiden algorithm. Found 552 clusters. 2023-10-17 20:19:33.025540 Merging 521 very small clusters (<10) 2023-10-17 20:19:33.029753 Finished detecting communities. Found 81 communities 2023-10-17 20:19:33.029909 Making cluster graph. Global cluster graph pruning level: 0.15 2023-10-17 20:19:33.035098 Graph has 1 connected components before pruning 2023-10-17 20:19:33.038089 Graph has 18 connected components after pruning 2023-10-17 20:19:33.050020 Graph has 1 connected components after reconnecting 2023-10-17 20:19:33.050530 14.0% links trimmed from local pruning relative to start 2023-10-17 20:19:33.050552 60.7% links trimmed from global pruning relative to start 2023-10-17 20:19:33.059742 Starting make edgebundle viagraph... 2023-10-17 20:19:33.059759 Make via clustergraph edgebundle 2023-10-17 20:19:34.562572 Hammer dims: Nodes shape: (81, 2) Edges shape: (482, 3) 2023-10-17 20:19:34.564614 component number 0 out of [0] 2023-10-17 20:19:34.577285\group root method 2023-10-17 20:19:34.577300or component 0, the root is nIPC and ri nIPC 2023-10-17 20:19:34.577306\group root method 2023-10-17 20:19:34.577311or component 0, the root is Neuroblast and ri Neuroblast 2023-10-17 20:19:34.586749 New root is 5 and majority Neuroblast 2023-10-17 20:19:34.587082 New root is 8 and majority Neuroblast 2023-10-17 20:19:34.593348 Computing lazy-teleporting expected hitting times 2023-10-17 20:19:37.063577 Identifying terminal clusters corresponding to unique lineages... 2023-10-17 20:19:37.063670 Closeness:[6, 7, 9, 10, 14, 15, 16, 17, 18, 20, 21, 22, 26] 2023-10-17 20:19:37.063682 Betweenness:[2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 25, 26, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 47, 48, 51, 52, 54, 55, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 73, 77, 78, 79, 80] 2023-10-17 20:19:37.063693 Out Degree:[2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 43, 44, 45, 47, 48, 49, 51, 52, 55, 57, 58, 59, 60, 62, 64, 65, 68, 69, 71, 72, 73, 77, 78, 79, 80] remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing 2023-10-17 20:19:37.064640 Terminal clusters corresponding to unique lineages in this component are [6, 9, 10, 15, 20, 21, 25, 26, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 42, 43, 44, 45, 47, 51, 52, 55, 57, 58, 59, 60, 62, 64, 65, 68, 69, 71, 73, 77, 78, 79, 80] 2023-10-17 20:19:38.113724 From root 8, the Terminal state 6 is reached 5 times. 2023-10-17 20:19:39.047449 From root 8, the Terminal state 9 is reached 63 times. 2023-10-17 20:19:40.054585 From root 8, the Terminal state 10 is reached 5 times. 2023-10-17 20:19:41.000769 From root 8, the Terminal state 15 is reached 56 times. 2023-10-17 20:19:42.103080 From root 8, the Terminal state 20 is reached 5 times. 2023-10-17 20:19:43.211637 From root 8, the Terminal state 21 is reached 5 times. 2023-10-17 20:19:44.027028 From root 8, the Terminal state 25 is reached 167 times. 2023-10-17 20:19:45.030673 From root 8, the Terminal state 26 is reached 5 times. 2023-10-17 20:19:46.029898 From root 8, the Terminal state 29 is reached 5 times. 2023-10-17 20:19:47.120803 From root 8, the Terminal state 30 is reached 5 times. 2023-10-17 20:19:48.206189 From root 8, the Terminal state 31 is reached 6 times. 2023-10-17 20:19:49.044044 From root 8, the Terminal state 32 is reached 162 times. 2023-10-17 20:19:49.982204 From root 8, the Terminal state 33 is reached 52 times. 2023-10-17 20:19:51.044689 From root 8, the Terminal state 34 is reached 20 times. 2023-10-17 20:19:52.201161 From root 8, the Terminal state 37 is reached 5 times. 2023-10-17 20:19:53.209786 From root 8, the Terminal state 38 is reached 5 times. 2023-10-17 20:19:54.217056 From root 8, the Terminal state 39 is reached 8 times. 2023-10-17 20:19:55.336570 From root 8, the Terminal state 40 is reached 5 times. 2023-10-17 20:19:56.676971 From root 8, the Terminal state 42 is reached 20 times. 2023-10-17 20:19:57.738734 From root 8, the Terminal state 43 is reached 152 times. 2023-10-17 20:19:58.988413 From root 8, the Terminal state 44 is reached 45 times. 2023-10-17 20:20:00.015061 From root 8, the Terminal state 45 is reached 150 times. 2023-10-17 20:20:00.973797 From root 8, the Terminal state 47 is reached 223 times. 2023-10-17 20:20:01.970078 From root 8, the Terminal state 51 is reached 163 times. 2023-10-17 20:20:03.087770 From root 8, the Terminal state 52 is reached 24 times. 2023-10-17 20:20:04.155657 From root 8, the Terminal state 55 is reached 52 times. 2023-10-17 20:20:05.126218 From root 8, the Terminal state 57 is reached 37 times. 2023-10-17 20:20:06.144261 From root 8, the Terminal state 58 is reached 8 times. 2023-10-17 20:20:07.052637 From root 8, the Terminal state 59 is reached 172 times. 2023-10-17 20:20:08.202142 From root 8, the Terminal state 60 is reached 5 times. 2023-10-17 20:20:09.332576 From root 8, the Terminal state 62 is reached 5 times. 2023-10-17 20:20:10.445076 From root 8, the Terminal state 64 is reached 5 times. 2023-10-17 20:20:11.542867 From root 8, the Terminal state 65 is reached 13 times. 2023-10-17 20:20:12.363030 From root 8, the Terminal state 68 is reached 197 times. 2023-10-17 20:20:13.345124 From root 8, the Terminal state 69 is reached 12 times. 2023-10-17 20:20:14.427206 From root 8, the Terminal state 71 is reached 5 times. 2023-10-17 20:20:15.387272 From root 8, the Terminal state 73 is reached 104 times. 2023-10-17 20:20:16.532201 From root 8, the Terminal state 77 is reached 29 times. 2023-10-17 20:20:17.754377 From root 8, the Terminal state 78 is reached 7 times. 2023-10-17 20:20:18.856330 From root 8, the Terminal state 79 is reached 12 times. 2023-10-17 20:20:19.866695 From root 8, the Terminal state 80 is reached 13 times. 2023-10-17 20:20:19.919104 Terminal clusters corresponding to unique lineages are {6: 'Microglia', 9: 'Astrocytes', 10: 'Endothelial', 15: 'Astrocytes', 20: 'Cajal Retzius', 21: 'Cck-Tox', 25: 'Granule mature', 26: 'Endothelial', 29: 'Granule mature', 30: 'Endothelial', 31: 'Granule mature', 32: 'Granule immature', 33: 'Granule mature', 34: 'Granule mature', 37: 'Granule mature', 38: 'Granule mature', 39: 'Granule mature', 40: 'Granule mature', 42: 'Granule mature', 43: 'Granule mature', 44: 'Granule mature', 45: 'Granule mature', 47: 'Granule mature', 51: 'Granule immature', 52: 'Granule mature', 55: 'Granule mature', 57: 'Granule mature', 58: 'Granule immature', 59: 'Granule immature', 60: 'Granule immature', 62: 'Granule immature', 64: 'Granule mature', 65: 'Granule mature', 68: 'Granule mature', 69: 'Granule mature', 71: 'Granule mature', 73: 'Granule mature', 77: 'Granule mature', 78: 'Granule mature', 79: 'Granule mature', 80: 'Granule mature'} 2023-10-17 20:20:19.919157 Begin projection of pseudotime and lineage likelihood 2023-10-17 20:20:20.478695 Graph has 1 connected components before pruning 2023-10-17 20:20:20.482324 Graph has 36 connected components after pruning 2023-10-17 20:20:20.510623 Graph has 1 connected components after reconnecting 2023-10-17 20:20:20.511279 67.7% links trimmed from local pruning relative to start 2023-10-17 20:20:20.511307 71.8% links trimmed from global pruning relative to start 2023-10-17 20:20:20.515002 Start making edgebundle milestone... 2023-10-17 20:20:20.515032 Start finding milestones 2023-10-17 20:20:21.474056 End milestones 2023-10-17 20:20:21.474592 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-17 20:20:21.481435 Recompute weights 2023-10-17 20:20:21.522933 pruning milestone graph based on recomputed weights 2023-10-17 20:20:21.523909 Graph has 1 connected components before pruning 2023-10-17 20:20:21.524557 Graph has 11 connected components after pruning 2023-10-17 20:20:21.543787 Graph has 1 connected components after reconnecting 2023-10-17 20:20:21.544608 67.8% links trimmed from global pruning relative to start 2023-10-17 20:20:21.544638 regenerate igraph on pruned edges 2023-10-17 20:20:21.557810 Setting numeric label as single cell pseudotime for coloring edges 2023-10-17 20:20:21.571345 Making smooth edges 2023-10-17 20:20:22.204579 Time elapsed 49.9 seconds
In [10]:
Copied!
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:02) running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [11]:
Copied!
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
In [12]:
Copied!
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
In [13]:
Copied!
raw_transitions
raw_transitions
Out[13]:
Astrocytes | Cajal Retzius | Cck-Tox | Endothelial | GABA | Granule immature | Granule mature | Microglia | Mossy | Neuroblast | OL | OPC | Radial Glia-like | nIPC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Astrocytes | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.021305 | 0.000000 |
Cajal Retzius | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Cck-Tox | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Endothelial | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
GABA | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Granule immature | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.01876 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Granule mature | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.092163 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Microglia | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Mossy | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
Neuroblast | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
OL | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
OPC | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.027311 |
Radial Glia-like | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.012850 |
nIPC | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.000000 | 0.000000 |
In [14]:
Copied!
raw_transitions.loc['OPC','nIPC']
raw_transitions.loc['OPC','nIPC']
Out[14]:
0.027310984638741025
In [15]:
Copied!
def cor_mean(adata,generate_adata,celltype_key):
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata,generate_adata,celltype_key=celltype_key,
return_table=True)
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs[celltype_key]
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
generate_sc_data_new = generate_adata[:,marker].to_df()
generate_sc_data_new['celltype'] = generate_adata.obs[celltype_key]
generate_sc_marker_mean = generate_sc_data_new.groupby('celltype')[marker].mean()
intersect_cell = list(set(sc_marker_mean.index).intersection(set(generate_sc_marker_mean.index)))
generate_sc_marker_mean= generate_sc_marker_mean.loc[intersect_cell]
sc_marker_mean= sc_marker_mean.loc[intersect_cell]
sc_marker_mean=sc_marker_mean.T
rf_ct = list(sc_marker_mean.columns)
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
return cor_pd
def cor_mean(adata,generate_adata,celltype_key):
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata,generate_adata,celltype_key=celltype_key,
return_table=True)
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs[celltype_key]
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
generate_sc_data_new = generate_adata[:,marker].to_df()
generate_sc_data_new['celltype'] = generate_adata.obs[celltype_key]
generate_sc_marker_mean = generate_sc_data_new.groupby('celltype')[marker].mean()
intersect_cell = list(set(sc_marker_mean.index).intersection(set(generate_sc_marker_mean.index)))
generate_sc_marker_mean= generate_sc_marker_mean.loc[intersect_cell]
sc_marker_mean= sc_marker_mean.loc[intersect_cell]
sc_marker_mean=sc_marker_mean.T
rf_ct = list(sc_marker_mean.columns)
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
return cor_pd
In [16]:
Copied!
def cos_mean(adata,generate_adata,celltype_key):
cmk1=ov.single.get_celltype_marker(generate_adata,clustertype=celltype_key,
scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata,clustertype=celltype_key,
scores_type='logfoldchanges')
cmk1={}
for clt in generate_adata.obs[celltype_key].cat.categories:
degs = sc.get.rank_genes_groups_df(generate_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
cmk2={}
for clt in adata.obs[celltype_key].cat.categories:
degs = sc.get.rank_genes_groups_df(adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
from scipy import spatial
plot_data=pd.DataFrame(index=generate_adata.obs['celltype'].cat.categories,
columns=generate_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
plot_data=plot_data.astype(float)
return plot_data
def cos_mean(adata,generate_adata,celltype_key):
cmk1=ov.single.get_celltype_marker(generate_adata,clustertype=celltype_key,
scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata,clustertype=celltype_key,
scores_type='logfoldchanges')
cmk1={}
for clt in generate_adata.obs[celltype_key].cat.categories:
degs = sc.get.rank_genes_groups_df(generate_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
cmk2={}
for clt in adata.obs[celltype_key].cat.categories:
degs = sc.get.rank_genes_groups_df(adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
from scipy import spatial
plot_data=pd.DataFrame(index=generate_adata.obs['celltype'].cat.categories,
columns=generate_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
plot_data=plot_data.astype(float)
return plot_data
Cell cluster size¶
In [38]:
Copied!
scale_value=1
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_scale_{scale_value}',
generate_save_dir='output',
generate_save_name=f'dg_scale_{scale_value}')
scale_value=1
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_scale_{scale_value}',
generate_save_dir='output',
generate_save_name=f'dg_scale_{scale_value}')
......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3152: 90%|█████████ | 3153/3500 [17:24<01:54, 3.02it/s, loss=1.3107, min_loss=1.2998]
Early stopping at epoch 3154... min loss = 1.299808457493782 ...vae training done!
...save trained vae in model/dg_vae_scale_1.pth.
In [9]:
Copied!
import gc
for scale_value in [2,4,6,8,10]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_scale_{scale_value}',
generate_save_dir='output',
generate_save_name=f'dg_scale_{scale_value}')
gc.collect()
import gc
for scale_value in [2,4,6,8,10]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_scale_{scale_value}',
generate_save_dir='output',
generate_save_name=f'dg_scale_{scale_value}')
gc.collect()
......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 2963: 85%|████████▍ | 2964/3500 [15:10<02:44, 3.26it/s, loss=1.4432, min_loss=1.4368]
Early stopping at epoch 2965... min loss = 1.4368178471922874 ...vae training done!
...save trained vae in model/dg_vae_scale_2.pth. ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3385: 97%|█████████▋| 3386/3500 [17:30<00:35, 3.22it/s, loss=1.3731, min_loss=1.3717]
Early stopping at epoch 3387... min loss = 1.371699959039688 ...vae training done!
...save trained vae in model/dg_vae_scale_4.pth. ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3411: 97%|█████████▋| 3412/3500 [17:28<00:27, 3.26it/s, loss=1.4313, min_loss=1.4296]
Early stopping at epoch 3413... min loss = 1.4296203181147575 ...vae training done!
...save trained vae in model/dg_vae_scale_6.pth. ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3499: 100%|██████████| 3500/3500 [17:59<00:00, 3.24it/s, loss=1.3607, min_loss=1.3552]
min loss = 1.3551789224147797 ...vae training done! ...save trained vae in model/dg_vae_scale_8.pth. ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 2996: 86%|████████▌ | 2997/3500 [15:25<02:35, 3.24it/s, loss=1.2778, min_loss=1.2738]
Early stopping at epoch 2998... min loss = 1.2738360241055489 ...vae training done!
...save trained vae in model/dg_vae_scale_10.pth.
In [51]:
Copied!
bulktb.vae_model.single_data.X.max()
bulktb.vae_model.single_data.X.max()
Out[51]:
8.292124
In [ ]:
Copied!
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for scale_value in [1,2,4,6,8,10]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_load(f'model/dg_vae_scale_{scale_value}.pth')
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[scale_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
#sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
#test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[scale_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[scale_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_scale_{scale_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[scale_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[scale_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[scale_value] = adata_dict[scale_value][:, adata_dict[scale_value].var.highly_variable]
ov.pp.scale(adata_dict[scale_value])
ov.pp.pca(adata_dict[scale_value],layer='scaled',n_pcs=50)
adata_dict[scale_value].obsm["X_mde"] = ov.utils.mde(adata_dict[scale_value].obsm["scaled|original|X_pca"])
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for scale_value in [1,2,4,6,8,10]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*scale_value)
bulktb.vae_load(f'model/dg_vae_scale_{scale_value}.pth')
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[scale_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
#sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
#test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[scale_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[scale_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_scale_{scale_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[scale_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[scale_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[scale_value] = adata_dict[scale_value][:, adata_dict[scale_value].var.highly_variable]
ov.pp.scale(adata_dict[scale_value])
ov.pp.pca(adata_dict[scale_value],layer='scaled',n_pcs=50)
adata_dict[scale_value].obsm["X_mde"] = ov.utils.mde(adata_dict[scale_value].obsm["scaled|original|X_pca"])
In [100]:
Copied!
adata3.raw.to_adata()
adata3.raw.to_adata()
Out[100]:
AnnData object with n_obs × n_vars = 0 × 12953 obs: 'celltype', 'leiden' uns: 'hvg', 'pca', 'neighbors', 'leiden', 'noisy_leiden', 'rank_genes_groups' obsm: 'X_pca' obsp: 'distances', 'connectivities'
In [96]:
Copied!
for scale_value in [1,2,4,6,8,10]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for scale_value in [1,2,4,6,8,10]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
1 对角线均值: 0.9911414293585704 非对角线均值: 0.24798798631613003 2 对角线均值: 0.986117705547958 非对角线均值: 0.23809306953940032 4 对角线均值: 0.9939601770172272 非对角线均值: 0.21210402922769656 6 对角线均值: 0.964987777586368 非对角线均值: 0.30298249830842083 8 对角线均值: 0.9625920239881667 非对角线均值: 0.3447438910389296 10 对角线均值: 0.9649344906782974 非对角线均值: 0.29292021055642786
In [86]:
Copied!
for scale_value in [1,2,4,6,8,10]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for scale_value in [1,2,4,6,8,10]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
1 对角线均值: 0.5591708480021848 非对角线均值: 0.038698385393148856 2 对角线均值: 0.5285731256699996 非对角线均值: 0.03832302617894754 4 对角线均值: 0.5045355789591006 非对角线均值: 0.03668416168762468 6 对角线均值: 0.47956310295951027 非对角线均值: 0.03995235344345557 8 对角线均值: 0.45997961981468793 非对角线均值: 0.03554772472920512 10 对角线均值: 0.49161256932723313 非对角线均值: 0.02964676970771194
In [87]:
Copied!
for scale_value in [1,2,4,6,8,10]:
print(scale_value,len(noisy_dict[scale_value]))
for scale_value in [1,2,4,6,8,10]:
print(scale_value,len(noisy_dict[scale_value]))
1 1 2 21 4 70 6 166 8 265 10 332
In [105]:
Copied!
ov.utils.embedding(adata_dict[10],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
ov.utils.embedding(adata_dict[10],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
In [ ]:
Copied!
via_dict={}
for scale_value in [1,2,4,6,8,10]:
via_dict[scale_value] = ov.single.pyVIA(adata=adata_dict[scale_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[scale_value].run()
via_dict[scale_value].get_pseudotime(adata_dict[scale_value])
sc.pp.neighbors(adata_dict[scale_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
via_dict={}
for scale_value in [1,2,4,6,8,10]:
via_dict[scale_value] = ov.single.pyVIA(adata=adata_dict[scale_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[scale_value].run()
via_dict[scale_value].get_pseudotime(adata_dict[scale_value])
sc.pp.neighbors(adata_dict[scale_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
In [ ]:
Copied!
for scale_value in [1,2,4,6,8,10]:
adata_dict[scale_value].uns['iroot'] = np.flatnonzero(adata_dict[scale_value].obs['celltype'] == 'nIPC')[0]
sc.tl.dpt(adata_dict[scale_value])
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='dpt_pseudotime',vkey='paga',
groups='celltype')
for scale_value in [1,2,4,6,8,10]:
adata_dict[scale_value].uns['iroot'] = np.flatnonzero(adata_dict[scale_value].obs['celltype'] == 'nIPC')[0]
sc.tl.dpt(adata_dict[scale_value])
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='dpt_pseudotime',vkey='paga',
groups='celltype')
In [ ]:
Copied!
import scvelo as scv
for scale_value in [1,2,4,6,8,10]:
#adata_dict[scale_value].uns['iroot'] = np.flatnonzero(adata_dict[scale_value].obs['celltype'] == 'nIPC')[0]
#sc.tl.dpt(adata_dict[scale_value])
adata_dict[scale_value].layers["spliced"] = adata_dict[scale_value].X
adata_dict[scale_value].layers["unspliced"] = adata_dict[scale_value].X
scv.pp.moments(adata_dict[scale_value], n_pcs=30, n_neighbors=30)
from cellrank.kernels import CytoTRACEKernel
ctk = CytoTRACEKernel(adata_dict[scale_value]).compute_cytotrace()
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='ct_pseudotime',vkey='paga',
groups='celltype')
import scvelo as scv
for scale_value in [1,2,4,6,8,10]:
#adata_dict[scale_value].uns['iroot'] = np.flatnonzero(adata_dict[scale_value].obs['celltype'] == 'nIPC')[0]
#sc.tl.dpt(adata_dict[scale_value])
adata_dict[scale_value].layers["spliced"] = adata_dict[scale_value].X
adata_dict[scale_value].layers["unspliced"] = adata_dict[scale_value].X
scv.pp.moments(adata_dict[scale_value], n_pcs=30, n_neighbors=30)
from cellrank.kernels import CytoTRACEKernel
ctk = CytoTRACEKernel(adata_dict[scale_value]).compute_cytotrace()
ov.utils.cal_paga(adata_dict[scale_value],use_time_prior='ct_pseudotime',vkey='paga',
groups='celltype')
In [114]:
Copied!
ov.utils.plot_paga(adata_dict[4],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
ov.utils.plot_paga(adata_dict[4],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
In [99]:
Copied!
ov.utils.embedding(adata_dict[4],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
ov.utils.embedding(adata_dict[4],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
In [81]:
Copied!
transitions_dict={}
for scale_value in [1,2,4,6,8,10]:
after_transitions=pd.DataFrame(adata_dict[scale_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[scale_value].obs['celltype'].cat.categories,
columns=adata_dict[scale_value].obs['celltype'].cat.categories)
transitions_dict[scale_value]=after_transitions
transitions_dict={}
for scale_value in [1,2,4,6,8,10]:
after_transitions=pd.DataFrame(adata_dict[scale_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[scale_value].obs['celltype'].cat.categories,
columns=adata_dict[scale_value].obs['celltype'].cat.categories)
transitions_dict[scale_value]=after_transitions
In [82]:
Copied!
res={}
for scale_value in [1,2,4,6,8,10]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[scale_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[scale_value].obs.loc[adata_dict[scale_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[scale_value])
res_dict['Inter_cells']=adata_dict[scale_value].shape[0]-adata2.shape[0]
res[scale_value]=res_dict
res={}
for scale_value in [1,2,4,6,8,10]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[scale_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[scale_value].obs.loc[adata_dict[scale_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[scale_value])
res_dict['Inter_cells']=adata_dict[scale_value].shape[0]-adata2.shape[0]
res[scale_value]=res_dict
In [83]:
Copied!
res
res
Out[83]:
{1: {'Cor_mean': 0.9914859552060807, 'non_Cor_mean': 0.26546881383206783, 'Cos_mean': 0.5578940666755912, 'non_Cos_mean': 0.03780892497865078, 'Trans_raw': 0, 'Trans_after': 0.019311782438935412, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.0012765205323550873, 'noisy': 1, 'Inter_cells': 53}, 2: {'Cor_mean': 0.9769536585132764, 'non_Cor_mean': 0.21828462869405107, 'Cos_mean': 0.5196151664752741, 'non_Cos_mean': 0.03683457644737572, 'Trans_raw': 0, 'Trans_after': 0.0, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.005385726663410967, 'noisy': 16, 'Inter_cells': 106}, 4: {'Cor_mean': 0.987388076287127, 'non_Cor_mean': 0.23143881900726115, 'Cos_mean': 0.5011262221138088, 'non_Cos_mean': 0.03561918997438063, 'Trans_raw': 0, 'Trans_after': 0.0, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.10604824652340568, 'noisy': 71, 'Inter_cells': 212}, 6: {'Cor_mean': 0.9650342126310978, 'non_Cor_mean': 0.30391610356513454, 'Cos_mean': 0.4827851551300972, 'non_Cos_mean': 0.03996720658264976, 'Trans_raw': 0, 'Trans_after': 0.01726529347100601, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.0019662063243804386, 'noisy': 161, 'Inter_cells': 60}, 8: {'Cor_mean': 0.9598699302607905, 'non_Cor_mean': 0.29668834238585845, 'Cos_mean': 0.47908290437185885, 'non_Cos_mean': 0.030834468555038765, 'Trans_raw': 0, 'Trans_after': 0.02310929469431933, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.00746124148120382, 'noisy': 269, 'Inter_cells': 0}, 10: {'Cor_mean': 0.9856552038084535, 'non_Cor_mean': 0.25458672698172224, 'Cos_mean': 0.49681434246746, 'non_Cos_mean': 0.030102727839399535, 'Trans_raw': 0, 'Trans_after': 0.02310929469431933, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.00746124148120382, 'noisy': 325, 'Inter_cells': 0}}
In [215]:
Copied!
import pickle
with open('result/metric_dg_cell_scale.pkl','wb') as f:
pickle.dump(res,f)
import pickle
with open('result/metric_dg_cell_scale.pkl','wb') as f:
pickle.dump(res,f)
In [115]:
Copied!
with open('result/metric_dg_cell_scale.pkl','rb') as f:
res=pickle.load(f)
with open('result/metric_dg_cell_scale.pkl','rb') as f:
res=pickle.load(f)
In [117]:
Copied!
for scale_value in [1,2,4,6,8,10]:
res_dict=res[scale_value]
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
for scale_value in [1,2,4,6,8,10]:
res_dict=res[scale_value]
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
In [119]:
Copied!
import pickle
with open('result/metric_dg_cell_scale.pkl','wb') as f:
pickle.dump(res,f)
import pickle
with open('result/metric_dg_cell_scale.pkl','wb') as f:
pickle.dump(res,f)
hidden_size¶
In [ ]:
Copied!
import gc
for hidden_value in [64,128,256,512,1024]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=hidden_value,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_hidden_{hidden_value}',
generate_save_dir='output',
generate_save_name=f'dg_hidden_{hidden_value}')
gc.collect()
import gc
for hidden_value in [64,128,256,512,1024]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=hidden_value,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_hidden_{hidden_value}',
generate_save_dir='output',
generate_save_name=f'dg_hidden_{hidden_value}')
gc.collect()
In [120]:
Copied!
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for hidden_value in [64,128,256,512,1024]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_load(f'model/dg_vae_hidden_{hidden_value}.pth',hidden_size=hidden_value)
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[hidden_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
#sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
#test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[hidden_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[hidden_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_scale_{scale_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[hidden_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[hidden_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[hidden_value] = adata_dict[hidden_value][:, adata_dict[hidden_value].var.highly_variable]
ov.pp.scale(adata_dict[hidden_value])
ov.pp.pca(adata_dict[hidden_value],layer='scaled',n_pcs=50)
adata_dict[hidden_value].obsm["X_mde"] = ov.utils.mde(adata_dict[hidden_value].obsm["scaled|original|X_pca"])
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for hidden_value in [64,128,256,512,1024]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_load(f'model/dg_vae_hidden_{hidden_value}.pth',hidden_size=hidden_value)
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[hidden_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
#sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
#test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[hidden_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[hidden_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_scale_{scale_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[hidden_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[hidden_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[hidden_value] = adata_dict[hidden_value][:, adata_dict[hidden_value].var.highly_variable]
ov.pp.scale(adata_dict[hidden_value])
ov.pp.pca(adata_dict[hidden_value],layer='scaled',n_pcs=50)
adata_dict[hidden_value].obsm["X_mde"] = ov.utils.mde(adata_dict[hidden_value].obsm["scaled|original|X_pca"])
loading model from model/dg_vae_hidden_1024.pth loading model from model/dg_vae_hidden_1024.pth ...generating
generating: 100%|██████████| 1484/1484 [00:00<00:00, 20222.77it/s]
generated done! extracting highly variable genes
finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) computing PCA Note that scikit-learn's randomized PCA might not be exactly reproducible across different computational platforms. For exact reproducibility, choose `svd_solver='arpack'.` on highly variable genes with n_comps=100 finished (0:00:00) computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Leiden clustering finished: found 33 clusters and added 'leiden', the cluster labels (adata.obs, categorical) (0:00:00) The filter leiden is ['13', '14', '15', '21', '24', '23', '22', '16', '20', '19', '18', '17', '25', '26', '27', '28', '29', '30', '31', '32'] extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01) ...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01) ...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01) If you pass `n_top_genes`, all cutoffs are ignored. extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
In [220]:
Copied!
for hidden_value in [64,128,256,512,1024]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[hidden_value].values) / len(cor_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[hidden_value].values) - np.trace(cor_dict[hidden_value].values)) / (len(cor_dict[hidden_value])**2 - len(cor_dict[hidden_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for hidden_value in [64,128,256,512,1024]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[hidden_value].values) / len(cor_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[hidden_value].values) - np.trace(cor_dict[hidden_value].values)) / (len(cor_dict[hidden_value])**2 - len(cor_dict[hidden_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
10 对角线均值: 0.7613242048327084 非对角线均值: -0.022544399466025048 10 对角线均值: 0.7671973017236132 非对角线均值: -0.02257989110500617 10 对角线均值: 0.7719218843759043 非对角线均值: -0.02589611936305697 10 对角线均值: 0.764301108181588 非对角线均值: -0.02668720252922111 10 对角线均值: 0.7828976243661295 非对角线均值: -0.02663324283176528
In [221]:
Copied!
for hidden_value in [64,128,256,512,1024]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[hidden_value].values) / len(cos_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[hidden_value].values) - np.trace(cos_dict[hidden_value].values)) / (len(cos_dict[hidden_value])**2 - len(cos_dict[hidden_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for hidden_value in [64,128,256,512,1024]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[hidden_value].values) / len(cos_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[hidden_value].values) - np.trace(cos_dict[hidden_value].values)) / (len(cos_dict[hidden_value])**2 - len(cos_dict[hidden_value]))
print(scale_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
10 对角线均值: 0.5407541820594016 非对角线均值: 0.037832231446375084 10 对角线均值: 0.546871534658343 非对角线均值: 0.03941247989652701 10 对角线均值: 0.5506689556224444 非对角线均值: 0.03817278403655849 10 对角线均值: 0.5555023602707589 非对角线均值: 0.03956331975627342 10 对角线均值: 0.5401374015910572 非对角线均值: 0.039282826688193696
In [223]:
Copied!
for hidden_value in [64,128,256,512,1024]:
print(hidden_value,len(noisy_dict[hidden_value]))
for hidden_value in [64,128,256,512,1024]:
print(hidden_value,len(noisy_dict[hidden_value]))
64 14 128 17 256 21 512 22 1024 17
In [225]:
Copied!
ov.utils.embedding(adata_dict[256],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
ov.utils.embedding(adata_dict[256],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
/mnt/home/zehuazeng/miniconda3/envs/omicverse/lib/python3.8/site-packages/omicverse/utils/_scatterplot.py:417: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
Out[225]:
<AxesSubplot: title={'center': 'Dentategyrus Cell Type (BulkTrajBlend)'}, xlabel='X_mde1', ylabel='X_mde2'>
In [ ]:
Copied!
via_dict={}
for hidden_value in [64,128,256,512,1024]:
via_dict[hidden_value] = ov.single.pyVIA(adata=adata_dict[hidden_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[hidden_value].run()
via_dict[hidden_value].get_pseudotime(adata_dict[hidden_value])
sc.pp.neighbors(adata_dict[hidden_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[hidden_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
via_dict={}
for hidden_value in [64,128,256,512,1024]:
via_dict[hidden_value] = ov.single.pyVIA(adata=adata_dict[hidden_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[hidden_value].run()
via_dict[hidden_value].get_pseudotime(adata_dict[hidden_value])
sc.pp.neighbors(adata_dict[hidden_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[hidden_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
In [249]:
Copied!
ov.utils.plot_paga(adata_dict[256],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
ov.utils.plot_paga(adata_dict[256],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
WARNING: Invalid color key. Using grey instead.
/mnt/home/zehuazeng/miniconda3/envs/omicverse/lib/python3.8/site-packages/networkx/convert.py:158: DeprecationWarning: The scipy.sparse array containers will be used instead of matrices in Networkx 3.0. Use `from_scipy_sparse_array` instead. return nx.from_scipy_sparse_matrix(data, create_using=create_using)
Out[249]:
<AxesSubplot: title={'center': 'PAGA LTNN-graph'}>
In [255]:
Copied!
ov.utils.embedding(adata_dict[512],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
ov.utils.embedding(adata_dict[512],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
/mnt/home/zehuazeng/miniconda3/envs/omicverse/lib/python3.8/site-packages/omicverse/utils/_scatterplot.py:483: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. cb = pl.colorbar(cax, orientation="vertical", cax=cax1)
Out[255]:
<AxesSubplot: title={'center': 'Dentategyrus Cell Type (BulkTrajBlend)'}, xlabel='X_mde1', ylabel='X_mde2'>
In [227]:
Copied!
transitions_dict={}
for hidden_value in [64,128,256,512,1024]:
after_transitions=pd.DataFrame(adata_dict[hidden_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[hidden_value].obs['celltype'].cat.categories,
columns=adata_dict[hidden_value].obs['celltype'].cat.categories)
transitions_dict[hidden_value]=after_transitions
transitions_dict={}
for hidden_value in [64,128,256,512,1024]:
after_transitions=pd.DataFrame(adata_dict[hidden_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[hidden_value].obs['celltype'].cat.categories,
columns=adata_dict[hidden_value].obs['celltype'].cat.categories)
transitions_dict[hidden_value]=after_transitions
In [250]:
Copied!
res={}
for hidden_value in [64,128,256,512,1024]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[hidden_value].values) / len(cor_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[hidden_value].values) - np.trace(cor_dict[hidden_value].values)) / (len(cor_dict[hidden_value])**2 - len(cor_dict[hidden_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[hidden_value].values) / len(cos_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[hidden_value].values) - np.trace(cos_dict[hidden_value].values)) / (len(cos_dict[hidden_value])**2 - len(cos_dict[hidden_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[hidden_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[hidden_value].obs.loc[adata_dict[hidden_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[hidden_value])
res_dict['Inter_cells']=adata_dict[hidden_value].shape[0]-adata2.shape[0]
res[hidden_value]=res_dict
res={}
for hidden_value in [64,128,256,512,1024]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[hidden_value].values) / len(cor_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[hidden_value].values) - np.trace(cor_dict[hidden_value].values)) / (len(cor_dict[hidden_value])**2 - len(cor_dict[hidden_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[hidden_value].values) / len(cos_dict[hidden_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[hidden_value].values) - np.trace(cos_dict[hidden_value].values)) / (len(cos_dict[hidden_value])**2 - len(cos_dict[hidden_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[hidden_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[hidden_value].obs.loc[adata_dict[hidden_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[hidden_value])
res_dict['Inter_cells']=adata_dict[hidden_value].shape[0]-adata2.shape[0]
res[hidden_value]=res_dict
In [251]:
Copied!
import pickle
with open('result/metric_dg_hidden.pkl','wb') as f:
pickle.dump(res,f)
import pickle
with open('result/metric_dg_hidden.pkl','wb') as f:
pickle.dump(res,f)
In [254]:
Copied!
res[256]
res[256]
Out[254]:
{'Cor_mean': 0.7719218843759043, 'non_Cor_mean': -0.02589611936305697, 'Cos_mean': 0.5506689556224444, 'non_Cos_mean': 0.03817278403655849, 'Trans_raw': 0, 'Trans_after': 0.022113511604496823, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.00121420192726986, 'noisy': 21, 'Inter_cells': 90}
In [121]:
Copied!
with open('result/metric_dg_hidden.pkl','rb') as f:
res=pickle.load(f)
with open('result/metric_dg_hidden.pkl','rb') as f:
res=pickle.load(f)
In [122]:
Copied!
for scale_value in [64,128,256,512,1024]:
res_dict=res[scale_value]
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
for scale_value in [64,128,256,512,1024]:
res_dict=res[scale_value]
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[scale_value].values) / len(cor_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[scale_value].values) - np.trace(cor_dict[scale_value].values)) / (len(cor_dict[scale_value])**2 - len(cor_dict[scale_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[scale_value].values) / len(cos_dict[scale_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[scale_value].values) - np.trace(cos_dict[scale_value].values)) / (len(cos_dict[scale_value])**2 - len(cos_dict[scale_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
In [124]:
Copied!
import pickle
with open('result/metric_dg_hidden.pkl','wb') as f:
pickle.dump(res,f)
import pickle
with open('result/metric_dg_hidden.pkl','wb') as f:
pickle.dump(res,f)
Cell all size¶
In [17]:
Copied!
import gc
cell_dict={}
for cell_value in [1000,2000,5000]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',max_single_cells=cell_value)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
cell_dict[cell_value]=bulktb.vae_model.single_data.copy()
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_cell_{cell_value}',
generate_save_dir='output',
generate_save_name=f'dg_cell_{cell_value}')
gc.collect()
import gc
cell_dict={}
for cell_value in [1000,2000,5000]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',max_single_cells=cell_value)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
cell_dict[cell_value]=bulktb.vae_model.single_data.copy()
bulktb.vae_train(batch_size=256,
learning_rate=1e-4,
hidden_size=256,
epoch_num=3500,
vae_save_dir='model',
vae_save_name=f'dg_vae_cell_{cell_value}',
generate_save_dir='output',
generate_save_name=f'dg_cell_{cell_value}')
gc.collect()
......random select 1000 single cells ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3499: 100%|██████████| 3500/3500 [09:12<00:00, 6.33it/s, loss=0.3108, min_loss=0.3106]
min loss = 0.31059974431991577 ...vae training done! ...save trained vae in model/dg_vae_cell_1000.pth. ......random select 2000 single cells ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 3499: 100%|██████████| 3500/3500 [18:24<00:00, 3.17it/s, loss=0.7993, min_loss=0.7926]
min loss = 0.792562261223793 ...vae training done! ...save trained vae in model/dg_vae_cell_2000.pth. ......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data ...begin vae training
Train Epoch: 2876: 82%|████████▏ | 2877/3500 [22:54<04:57, 2.09it/s, loss=1.4128, min_loss=1.4063]
Early stopping at epoch 2878... min loss = 1.4063413962721825 ...vae training done!
...save trained vae in model/dg_vae_cell_5000.pth.
In [ ]:
Copied!
In [ ]:
Copied!
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for cell_value in [1000,2000,5000]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',max_single_cells=cell_value)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_load(f'model/dg_vae_cell_{cell_value}.pth',hidden_size=256)
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[cell_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[cell_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[cell_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_cell_{cell_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[cell_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[cell_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[cell_value] = adata_dict[cell_value][:, adata_dict[cell_value].var.highly_variable]
ov.pp.scale(adata_dict[cell_value])
ov.pp.pca(adata_dict[cell_value],layer='scaled',n_pcs=50)
adata_dict[cell_value].obsm["X_mde"] = ov.utils.mde(adata_dict[cell_value].obsm["scaled|original|X_pca"])
import anndata
cos_dict={}
cor_dict={}
adata_dict={}
noisy_dict={}
for cell_value in [1000,2000,5000]:
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=bulk.columns.tolist(),
celltype_key='celltype',max_single_cells=cell_value)
bulktb.vae_configure(cell_target_num=adata.obs['celltype'].value_counts().loc['OPC']*2)
bulktb.vae_load(f'model/dg_vae_cell_{cell_value}.pth',hidden_size=256)
test_adata=bulktb.vae_generate(leiden_size=25)
noisy_dict[cell_value]=test_adata.uns['noisy_leiden']
filter_leiden=list(test_adata.obs['celltype'].value_counts()[test_adata.obs['celltype'].value_counts()<10].index)
test_adata=test_adata[~test_adata.obs['celltype'].isin(filter_leiden)]
test_adata.raw=test_adata.copy()
#sc.pp.highly_variable_genes(
# test_adata,
# n_top_genes=3000,
#)
sc.pp.highly_variable_genes(test_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
test_adata = test_adata[:, test_adata.var.highly_variable]
cor_dict[cell_value]=cor_mean(adata_t,test_adata,'celltype')
cos_dict[cell_value]=cos_mean(adata_t,test_adata,'celltype')
test_adata=test_adata.raw.to_adata()
test_adata.write_h5ad(f'output/dg_vae_cell_{cell_value}.h5ad')
#bulktb.gnn_configure(max_epochs=2000)
#bulktb.gnn_train()
#res_pd=bulktb.gnn_generate()
#adata_dict[scale_value]=bulktb.interpolation('OPC')
adata3=test_adata[test_adata.obs['celltype']=='OPC']
#sc.pp.highly_variable_genes(bulktb.vae_model.single_data, min_mean=0.0125, max_mean=3, min_disp=0.5)
#bulktb.vae_model.single_data = bulktb.vae_model.single_data[:, bulktb.vae_model.single_data.var.highly_variable]
adata_dict[cell_value]=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
sc.pp.highly_variable_genes(
adata_dict[cell_value],
n_top_genes=3000,
)
#sc.pp.highly_variable_genes(adata_dict[scale_value], min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_dict[cell_value] = adata_dict[cell_value][:, adata_dict[cell_value].var.highly_variable]
ov.pp.scale(adata_dict[cell_value])
ov.pp.pca(adata_dict[cell_value],layer='scaled',n_pcs=50)
adata_dict[cell_value].obsm["X_mde"] = ov.utils.mde(adata_dict[cell_value].obsm["scaled|original|X_pca"])
In [19]:
Copied!
bulktb.vae_model.single_data
bulktb.vae_model.single_data
Out[19]:
AnnData object with n_obs × n_vars = 2930 × 13913 obs: 'clusters', 'age(days)', 'clusters_enlarged', 'celltype' uns: 'clusters_colors', 'log1p' obsm: 'X_umap' layers: 'ambiguous', 'spliced', 'unspliced'
In [56]:
Copied!
adata3
adata3
Out[56]:
View of AnnData object with n_obs × n_vars = 303 × 13849 obs: 'celltype', 'leiden' uns: 'hvg', 'pca', 'neighbors', 'leiden', 'noisy_leiden', 'rank_genes_groups' obsm: 'X_pca' obsp: 'distances', 'connectivities'
In [46]:
Copied!
for cell_value in [1000,2000,5000]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[cell_value].values) / len(cor_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[cell_value].values) - np.trace(cor_dict[cell_value].values)) / (len(cor_dict[cell_value])**2 - len(cor_dict[cell_value]))
print(cell_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for cell_value in [1000,2000,5000]:
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[cell_value].values) / len(cor_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[cell_value].values) - np.trace(cor_dict[cell_value].values)) / (len(cor_dict[cell_value])**2 - len(cor_dict[cell_value]))
print(cell_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
1000 对角线均值: 0.8820645387497325 非对角线均值: 0.32087001823032074 2000 对角线均值: 0.9551969268440144 非对角线均值: 0.33171390228135317 5000 对角线均值: 0.9936090825269592 非对角线均值: 0.2873230495806181
In [47]:
Copied!
for cell_value in [1000,2000,5000]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[cell_value].values) / len(cos_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[cell_value].values) - np.trace(cos_dict[cell_value].values)) / (len(cos_dict[cell_value])**2 - len(cos_dict[cell_value]))
print(cell_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
for cell_value in [1000,2000,5000]:
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[cell_value].values) / len(cos_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[cell_value].values) - np.trace(cos_dict[cell_value].values)) / (len(cos_dict[cell_value])**2 - len(cos_dict[cell_value]))
print(cell_value,"对角线均值:", diagonal_mean,"非对角线均值:", non_diagonal_mean)
1000 对角线均值: 0.2936351076519817 非对角线均值: 0.030904722311809484 2000 对角线均值: 0.443953855876079 非对角线均值: 0.03456423131114027 5000 对角线均值: 0.5488427650588512 非对角线均值: 0.03938964011558192
In [22]:
Copied!
for cell_value in [1000,2000,5000]:
print(cell_value,len(noisy_dict[cell_value]))
for cell_value in [1000,2000,5000]:
print(cell_value,len(noisy_dict[cell_value]))
1000 34 2000 27 5000 18
In [50]:
Copied!
ov.utils.embedding(adata_dict[5000],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,
palette=adata.uns['clusters_colors'].tolist()
)
ov.utils.embedding(adata_dict[5000],
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4,
palette=adata.uns['clusters_colors'].tolist()
)
In [ ]:
Copied!
via_dict={}
for cell_value in [1000,2000,5000]:
via_dict[cell_value] = ov.single.pyVIA(adata=adata_dict[cell_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=20,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[cell_value].run()
via_dict[cell_value].get_pseudotime(adata_dict[cell_value])
sc.pp.neighbors(adata_dict[cell_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[cell_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
via_dict={}
for cell_value in [1000,2000,5000]:
via_dict[cell_value] = ov.single.pyVIA(adata=adata_dict[cell_value],adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=20,random_seed=112,root_user=['nIPC','Neuroblast'],
dataset='group')
via_dict[cell_value].run()
via_dict[cell_value].get_pseudotime(adata_dict[cell_value])
sc.pp.neighbors(adata_dict[cell_value],n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata_dict[cell_value],use_time_prior='pt_via',vkey='paga',
groups='celltype')
In [55]:
Copied!
ov.utils.plot_paga(adata_dict[5000],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
ov.utils.plot_paga(adata_dict[5000],basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
#plt.title('PAGA Dentategyrus (BulkTrajBlend)',fontsize=13)
In [69]:
Copied!
adata_dict[5000]
adata_dict[5000]
Out[69]:
AnnData object with n_obs × n_vars = 3036 × 3000 obs: 'celltype', 'pt_via' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues', 'celltype_colors', 'neighbors', 'paga_graph', 'paga', 'celltype_sizes' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm' obsp: 'distances', 'connectivities'
In [62]:
Copied!
ov.utils.embedding(adata_dict[5000],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
ov.utils.embedding(adata_dict[5000],
basis='X_mde',
color=['pt_via'],title='Dentategyrus Cell Type (BulkTrajBlend)',
frameon='small',show=False,
wspace=0.4)
In [56]:
Copied!
transitions_dict={}
for cell_value in [1000,2000,5000]:
after_transitions=pd.DataFrame(adata_dict[cell_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[cell_value].obs['celltype'].cat.categories,
columns=adata_dict[cell_value].obs['celltype'].cat.categories)
transitions_dict[cell_value]=after_transitions
transitions_dict={}
for cell_value in [1000,2000,5000]:
after_transitions=pd.DataFrame(adata_dict[cell_value].uns['paga']['transitions_confidence'].toarray(),
index=adata_dict[cell_value].obs['celltype'].cat.categories,
columns=adata_dict[cell_value].obs['celltype'].cat.categories)
transitions_dict[cell_value]=after_transitions
In [57]:
Copied!
res={}
for cell_value in [1000,2000,5000]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[cell_value].values) / len(cor_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[cell_value].values) - np.trace(cor_dict[cell_value].values)) / (len(cor_dict[cell_value])**2 - len(cor_dict[cell_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[cell_value].values) / len(cos_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[cell_value].values) - np.trace(cos_dict[cell_value].values)) / (len(cos_dict[cell_value])**2 - len(cos_dict[cell_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[cell_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[cell_value].obs.loc[adata_dict[cell_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[cell_value])
if cell_value==5000:
cell_value1=2930
else:
cell_value1=cell_value
res_dict['Inter_cells']=adata_dict[cell_value].shape[0]-cell_value1
res[cell_value]=res_dict
res={}
for cell_value in [1000,2000,5000]:
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_dict[cell_value].values) / len(cor_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_dict[cell_value].values) - np.trace(cor_dict[cell_value].values)) / (len(cor_dict[cell_value])**2 - len(cor_dict[cell_value]))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(cos_dict[cell_value].values) / len(cos_dict[cell_value])
# 计算非对角线均值
non_diagonal_mean = (np.sum(cos_dict[cell_value].values) - np.trace(cos_dict[cell_value].values)) / (len(cos_dict[cell_value])**2 - len(cos_dict[cell_value]))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=0
res_dict['Trans_after']=transitions_dict[cell_value].loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata_dict[cell_value].obs.loc[adata_dict[cell_value].obs['celltype']=='OPC','pt_via'])
res_dict['noisy']=len(noisy_dict[cell_value])
if cell_value==5000:
cell_value1=2930
else:
cell_value1=cell_value
res_dict['Inter_cells']=adata_dict[cell_value].shape[0]-cell_value1
res[cell_value]=res_dict
In [63]:
Copied!
import pickle
with open('result/metric_dg_cell.pkl','wb') as f:
pickle.dump(res,f)
import pickle
with open('result/metric_dg_cell.pkl','wb') as f:
pickle.dump(res,f)
In [62]:
Copied!
res[5000]
res[5000]
Out[62]:
{'Cor_mean': 0.9936090825269592, 'non_Cor_mean': 0.2873230495806181, 'Cos_mean': 0.5488427650588512, 'non_Cos_mean': 0.03938964011558192, 'Trans_raw': 0, 'Trans_after': 0.023045544794400494, 'Var_raw': 0.0005237087854723636, 'Var_after': 0.0001361129715928766, 'noisy': 19, 'Inter_cells': 106}
In [ ]:
Copied!