CGAN
In [1]:
Copied!
import argparse
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
import argparse
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
In [2]:
Copied!
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
In [3]:
Copied!
adata=scv.datasets.dentategyrus()
adata
adata=scv.datasets.dentategyrus()
adata
Out[3]:
AnnData object with n_obs × n_vars = 2930 × 13913 obs: 'clusters', 'age(days)', 'clusters_enlarged' uns: 'clusters_colors' obsm: 'X_umap' layers: 'ambiguous', 'spliced', 'unspliced'
In [4]:
Copied!
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
Out[4]:
dg_d_1 | dg_d_2 | dg_d_3 | dg_v_1 | dg_v_2 | dg_v_3 | ca4_1 | ca4_2 | ca4_3 | ca3_d_1 | ... | ca3_v_3 | ca2_1 | ca2_2 | ca2_3 | ca1_d_1 | ca1_d_2 | ca1_d_3 | ca1_v_1 | ca1_v_2 | ca1_v_3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Olfr692 | 0 | 0 | 0 | 6 | 8 | 0 | 19 | 3 | 0 | 0 | ... | 10 | 12 | 16 | 3 | 0 | 3 | 1 | 1 | 0 | 0 |
Olfr637-ps1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Olfr1272 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Gm23613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Tap2 | 0 | 0 | 0 | 5 | 0 | 3 | 80 | 29 | 16 | 8 | ... | 9 | 14 | 2 | 46 | 0 | 0 | 1 | 0 | 0 | 0 |
5 rows × 24 columns
In [5]:
Copied!
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy(group=['dg_d_1','dg_d_2','dg_d_3'])
#bulktb.single_preprocess_lazy()
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy(group=['dg_d_1','dg_d_2','dg_d_3'])
#bulktb.single_preprocess_lazy()
In [6]:
Copied!
bulktb.vae_configure(cell_target_num=200)
bulktb.vae_configure(cell_target_num=200)
......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data
In [7]:
Copied!
features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')
# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)
# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values) # 使用 LongTensor 存储整数标签
features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')
# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)
# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values) # 使用 LongTensor 存储整数标签
In [8]:
Copied!
# 创建 DataLoader(如果需要)
# DataLoader 可以帮助你批量加载数据,方便训练
from torch.utils.data import DataLoader, TensorDataset
batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# 创建 DataLoader(如果需要)
# DataLoader 可以帮助你批量加载数据,方便训练
from torch.utils.data import DataLoader, TensorDataset
batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
In [11]:
Copied!
n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels))) # 类别数量
img_size=32
channels=1
sample_interval=400
n_features=features.shape[1]
n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels))) # 类别数量
img_size=32
channels=1
sample_interval=400
n_features=features.shape[1]
In [10]:
Copied!
features.shape
features.shape
Out[10]:
(2930, 12953)
In [12]:
Copied!
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(n_classes,n_classes)
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
self.model = nn.Sequential(
*block(latent_dim + n_classes, 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, n_features),
#nn.Sigmoid()
)
def forward(self, noise, labels):
# Concatenate label embedding and image to produce input
gen_input = torch.cat((self.label_emb(labels), noise), -1)
img = self.model(gen_input)
#img = img.view(img.size(0), *img_shape)
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(n_classes, n_classes)
self.model = nn.Sequential(
nn.Linear(n_classes + n_features, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 1),
)
def forward(self, img, labels):
# Concatenate label embedding and image to produce input
d_in = torch.cat((img, self.label_embedding(labels)), -1)
validity = self.model(d_in)
return validity
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(n_classes,n_classes)
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
self.model = nn.Sequential(
*block(latent_dim + n_classes, 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, n_features),
#nn.Sigmoid()
)
def forward(self, noise, labels):
# Concatenate label embedding and image to produce input
gen_input = torch.cat((self.label_emb(labels), noise), -1)
img = self.model(gen_input)
#img = img.view(img.size(0), *img_shape)
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(n_classes, n_classes)
self.model = nn.Sequential(
nn.Linear(n_classes + n_features, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 1),
)
def forward(self, img, labels):
# Concatenate label embedding and image to produce input
d_in = torch.cat((img, self.label_embedding(labels)), -1)
validity = self.model(d_in)
return validity
In [13]:
Copied!
# Loss functions
adversarial_loss = torch.nn.MSELoss()
# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()
cuda = True if torch.cuda.is_available() else False
if cuda:
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
# Loss functions
adversarial_loss = torch.nn.MSELoss()
# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()
cuda = True if torch.cuda.is_available() else False
if cuda:
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
In [14]:
Copied!
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
In [15]:
Copied!
# ----------
# Training
# ----------
from tqdm import trange,tqdm
n_epochs=5000
bar = tqdm(range(n_epochs))
#for epoch in trange(n_epochs):
d_loss_li=[]
g_loss_li=[]
for epoch in bar:
for i, (imgs, labels) in enumerate(data_loader):
batch_size = imgs.shape[0]
# Adversarial ground truths
valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(FloatTensor))
labels = Variable(labels.type(LongTensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Sample noise and labels as generator input
z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))
# Generate a batch of images
gen_imgs = generator(z, gen_labels)
# Loss measures generator's ability to fool the discriminator
validity = discriminator(gen_imgs, gen_labels)
g_loss = adversarial_loss(validity, valid)
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
validity_real = discriminator(real_imgs, labels)
d_real_loss = adversarial_loss(validity_real, valid)
# Loss for fake images
validity_fake = discriminator(gen_imgs.detach(), gen_labels)
d_fake_loss = adversarial_loss(validity_fake, fake)
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
d_loss.backward()
optimizer_D.step()
break
bar.set_description(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
)
d_loss_li.append(d_loss.item())
g_loss_li.append(g_loss.item())
# ----------
# Training
# ----------
from tqdm import trange,tqdm
n_epochs=5000
bar = tqdm(range(n_epochs))
#for epoch in trange(n_epochs):
d_loss_li=[]
g_loss_li=[]
for epoch in bar:
for i, (imgs, labels) in enumerate(data_loader):
batch_size = imgs.shape[0]
# Adversarial ground truths
valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(FloatTensor))
labels = Variable(labels.type(LongTensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Sample noise and labels as generator input
z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))
# Generate a batch of images
gen_imgs = generator(z, gen_labels)
# Loss measures generator's ability to fool the discriminator
validity = discriminator(gen_imgs, gen_labels)
g_loss = adversarial_loss(validity, valid)
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
validity_real = discriminator(real_imgs, labels)
d_real_loss = adversarial_loss(validity_real, valid)
# Loss for fake images
validity_fake = discriminator(gen_imgs.detach(), gen_labels)
d_fake_loss = adversarial_loss(validity_fake, fake)
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
d_loss.backward()
optimizer_D.step()
break
bar.set_description(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
)
d_loss_li.append(d_loss.item())
g_loss_li.append(g_loss.item())
[Epoch 4999/5000] [Batch 0/46] [D loss: 0.064183] [G loss: 0.598119]: 100%|██████████| 5000/5000 [01:12<00:00, 68.73it/s]
In [16]:
Copied!
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Discriminator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_d_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Discriminator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_d_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_cgan_dg.pdf',dpi=300,bbox_inches='tight')
In [17]:
Copied!
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Generator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_g_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Generator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_g_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_cgan_dg.pdf',dpi=300,bbox_inches='tight')
In [18]:
Copied!
Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape
Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape
Out[18]:
torch.Size([100, 100])
In [19]:
Copied!
n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)
n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)
In [20]:
Copied!
gen_imgs.max()
gen_imgs.max()
Out[20]:
tensor(9.9819, device='cuda:0', grad_fn=<MaxBackward1>)
In [21]:
Copied!
gen_imgs.min()
gen_imgs.min()
Out[21]:
tensor(-1.9895, device='cuda:0', grad_fn=<MinBackward1>)
In [22]:
Copied!
import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)
import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)
In [23]:
Copied!
test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata
test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata
Out[23]:
AnnData object with n_obs × n_vars = 1400 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [24]:
Copied!
test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata
test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata
Out[24]:
AnnData object with n_obs × n_vars = 1400 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [25]:
Copied!
test_adata.var.index=features.columns
test_adata.var.index=features.columns
In [27]:
Copied!
adata.uns['clusters_colors']
adata.uns['clusters_colors']
Out[27]:
array(['#3ba458', '#404040', '#7a7a7a', '#fda762', '#6950a3', '#2575b7', '#08306b', '#e1bfb0', '#e5d8bd', '#79b5d9', '#f14432', '#fc8a6a', '#98d594', '#d0e1f2'], dtype=object)
In [29]:
Copied!
adata.obs['celltype']=adata.obs['clusters'].copy()
adata.obs['celltype']=adata.obs['clusters'].copy()
In [30]:
Copied!
ov.bulk2single.bulk2single_plot_correlation(test_adata,adata,celltype_key='celltype')
ov.bulk2single.bulk2single_plot_correlation(test_adata,adata,celltype_key='celltype')
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
Out[30]:
(<Figure size 480x480 with 2 Axes>, <AxesSubplot: title={'center': 'Expression correlation'}, xlabel='scRNA-seq reference', ylabel='deconvoluted bulk-seq'>)
In [32]:
Copied!
#adata.uns['log1p']['base']=10
#adata.uns['log1p']['base']=10
In [40]:
Copied!
adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
Begin robust gene identification After filtration, 13264/13913 genes are kept. Among 13264 genes, 13189 genes are robust. End of robust gene identification. Begin size normalization: shiftlog and HVGs selection pearson normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation: ['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt'] finished (0:00:00) extracting highly variable genes --> added 'highly_variable', boolean vector (adata.var) 'highly_variable_rank', float vector (adata.var) 'highly_variable_nbatches', int vector (adata.var) 'highly_variable_intersection', boolean vector (adata.var) 'means', float vector (adata.var) 'variances', float vector (adata.var) 'residual_variances', float vector (adata.var) End of size normalization: shiftlog and HVGs selection pearson
In [41]:
Copied!
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
return_table=True)
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
return_table=True)
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [42]:
Copied!
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
In [43]:
Copied!
sc_marker_mean=sc_marker_mean.T
sc_marker_mean=sc_marker_mean.T
In [44]:
Copied!
rf_ct = list(sc_marker_mean.columns)
rf_ct = list(sc_marker_mean.columns)
In [45]:
Copied!
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
In [46]:
Copied!
import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_cgan_dg.pdf',dpi=300,bbox_inches='tight')
In [47]:
Copied!
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
scores_type='logfoldchanges')
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
scores_type='logfoldchanges')
...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [48]:
Copied!
adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
Begin robust gene identification After filtration, 13264/13264 genes are kept. Among 13264 genes, 13189 genes are robust. End of robust gene identification. Begin size normalization: shiftlog and HVGs selection pearson normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation: ['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt'] finished (0:00:00) extracting highly variable genes --> added 'highly_variable', boolean vector (adata.var) 'highly_variable_rank', float vector (adata.var) 'highly_variable_nbatches', int vector (adata.var) 'highly_variable_intersection', boolean vector (adata.var) 'means', float vector (adata.var) 'variances', float vector (adata.var) 'residual_variances', float vector (adata.var) End of size normalization: shiftlog and HVGs selection pearson
In [49]:
Copied!
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
scores_type='logfoldchanges')
...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [97]:
Copied!
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(test_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(test_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
In [98]:
Copied!
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
In [99]:
Copied!
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
In [100]:
Copied!
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
In [101]:
Copied!
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
In [102]:
Copied!
plot_data=plot_data.astype(float)
plot_data=plot_data.astype(float)
In [103]:
Copied!
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_cgan_dg.pdf',dpi=300,bbox_inches='tight')
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_cgan_dg.pdf',dpi=300,bbox_inches='tight')
In [58]:
Copied!
adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3
adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3
Out[58]:
View of AnnData object with n_obs × n_vars = 100 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues', 'celltype_colors', 'rank_genes_groups' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [64]:
Copied!
bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']
bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']
In [67]:
Copied!
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
adata1
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
adata1
Out[67]:
AnnData object with n_obs × n_vars = 3030 × 12953 obs: 'celltype'
In [68]:
Copied!
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var)
In [69]:
Copied!
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
In [70]:
Copied!
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
Out[70]:
AnnData object with n_obs × n_vars = 3030 × 1802 obs: 'celltype' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [71]:
Copied!
ov.utils.embedding(adata1,
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (CGAN)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
plt.savefig('figures/umap_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_dg_cgan.pdf',dpi=300,bbox_inches='tight')
ov.utils.embedding(adata1,
basis='X_mde',
color=['celltype'],title='Dentategyrus Cell Type (CGAN)',
frameon='small',show=False,
wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
plt.savefig('figures/umap_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_dg_cgan.pdf',dpi=300,bbox_inches='tight')
In [72]:
Copied!
adata2=bulktb.vae_model.single_data.copy()
adata2=bulktb.vae_model.single_data.copy()
In [73]:
Copied!
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var)
In [74]:
Copied!
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
In [75]:
Copied!
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
Out[75]:
AnnData object with n_obs × n_vars = 2930 × 2254 obs: 'clusters', 'age(days)', 'clusters_enlarged', 'celltype' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'clusters_colors', 'log1p', 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'X_umap', 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'ambiguous', 'spliced', 'unspliced', 'scaled', 'lognorm'
In [77]:
Copied!
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
dataset='group')
v0.run()
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
dataset='group')
v0.run()
2023-10-05 14:34:43.856616 From root 26, the Terminal state 36 is reached 5 times. 2023-10-05 14:34:45.015598 From root 26, the Terminal state 37 is reached 184 times. 2023-10-05 14:34:45.941438 From root 26, the Terminal state 38 is reached 346 times. 2023-10-05 14:34:47.343073 From root 26, the Terminal state 39 is reached 45 times. 2023-10-05 14:34:48.557044 From root 26, the Terminal state 41 is reached 155 times. 2023-10-05 14:34:49.734404 From root 26, the Terminal state 45 is reached 177 times. 2023-10-05 14:34:51.109114 From root 26, the Terminal state 46 is reached 45 times. 2023-10-05 14:34:52.566552 From root 26, the Terminal state 50 is reached 11 times. 2023-10-05 14:34:53.956050 From root 26, the Terminal state 51 is reached 49 times. 2023-10-05 14:34:53.986477 Terminal clusters corresponding to unique lineages are {8: 'Granule mature', 9: 'Microglia', 11: 'Astrocytes', 13: 'Endothelial', 15: 'Astrocytes', 16: 'Radial Glia-like', 28: 'Endothelial', 29: 'Endothelial', 30: 'Granule mature', 31: 'Granule mature', 32: 'Granule mature', 33: 'Granule mature', 34: 'Granule mature', 36: 'Granule mature', 37: 'Granule mature', 38: 'Granule mature', 39: 'Granule mature', 41: 'Granule mature', 45: 'Granule immature', 46: 'Granule mature', 50: 'Granule mature', 51: 'Granule mature'} 2023-10-05 14:34:53.986520 Begin projection of pseudotime and lineage likelihood 2023-10-05 14:34:54.431027 Graph has 1 connected components before pruning 2023-10-05 14:34:54.433313 Graph has 24 connected components after pruning 2023-10-05 14:34:54.447372 Graph has 1 connected components after reconnecting 2023-10-05 14:34:54.447843 60.1% links trimmed from local pruning relative to start 2023-10-05 14:34:54.447866 66.8% links trimmed from global pruning relative to start 2023-10-05 14:34:54.450036 Start making edgebundle milestone... 2023-10-05 14:34:54.450060 Start finding milestones 2023-10-05 14:34:54.989903 End milestones 2023-10-05 14:34:54.989959 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-05 14:34:54.994086 Recompute weights 2023-10-05 14:34:55.008398 pruning milestone graph based on recomputed weights 2023-10-05 14:34:55.009266 Graph has 1 connected components before pruning 2023-10-05 14:34:55.009824 Graph has 11 connected components after pruning 2023-10-05 14:34:55.017485 Graph has 1 connected components after reconnecting 2023-10-05 14:34:55.018179 67.6% links trimmed from global pruning relative to start 2023-10-05 14:34:55.018205 regenerate igraph on pruned edges 2023-10-05 14:34:55.025160 Setting numeric label as single cell pseudotime for coloring edges 2023-10-05 14:34:55.034682 Making smooth edges 2023-10-05 14:34:55.626372 Time elapsed 37.0 seconds
In [83]:
Copied!
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw.png',dpi=300,bbox_inches='tight')
In [79]:
Copied!
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:01) running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [80]:
Copied!
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
In [81]:
Copied!
raw_transitions.loc['nIPC','OPC']
raw_transitions.loc['nIPC','OPC']
Out[81]:
0.0
In [82]:
Copied!
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
#jac_std_global=0.01,
dataset='group')
v1.run()
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
#jac_std_global=0.01,
dataset='group')
v1.run()
2023-10-05 14:35:58.663237 From root 23, the Terminal state 19 is reached 10 times. 2023-10-05 14:36:03.266199 From root 23, the Terminal state 21 is reached 15 times. 2023-10-05 14:36:07.796891 From root 23, the Terminal state 24 is reached 38 times. 2023-10-05 14:36:12.414739 From root 23, the Terminal state 25 is reached 19 times. 2023-10-05 14:36:17.159413 From root 23, the Terminal state 29 is reached 5 times. 2023-10-05 14:36:21.469924 From root 23, the Terminal state 30 is reached 72 times. 2023-10-05 14:36:25.833879 From root 23, the Terminal state 32 is reached 51 times. 2023-10-05 14:36:30.351587 From root 23, the Terminal state 33 is reached 49 times. 2023-10-05 14:36:35.023002 From root 23, the Terminal state 35 is reached 5 times. 2023-10-05 14:36:39.681285 From root 23, the Terminal state 36 is reached 20 times. 2023-10-05 14:36:43.998816 From root 23, the Terminal state 38 is reached 58 times. 2023-10-05 14:36:48.744762 From root 23, the Terminal state 39 is reached 5 times. 2023-10-05 14:36:53.343414 From root 23, the Terminal state 42 is reached 19 times. 2023-10-05 14:36:58.134378 From root 23, the Terminal state 44 is reached 5 times. 2023-10-05 14:37:02.731043 From root 23, the Terminal state 45 is reached 9 times. 2023-10-05 14:37:06.841653 From root 23, the Terminal state 48 is reached 108 times. 2023-10-05 14:37:11.482964 From root 23, the Terminal state 49 is reached 9 times. 2023-10-05 14:37:16.096431 From root 23, the Terminal state 50 is reached 24 times. 2023-10-05 14:37:20.681880 From root 23, the Terminal state 52 is reached 28 times. 2023-10-05 14:37:25.452697 From root 23, the Terminal state 53 is reached 5 times. 2023-10-05 14:37:30.074774 From root 23, the Terminal state 55 is reached 27 times. 2023-10-05 14:37:34.431676 From root 23, the Terminal state 57 is reached 47 times. 2023-10-05 14:37:39.106907 From root 23, the Terminal state 60 is reached 5 times. 2023-10-05 14:37:43.885457 From root 23, the Terminal state 61 is reached 5 times. 2023-10-05 14:37:48.623371 From root 23, the Terminal state 63 is reached 6 times. 2023-10-05 14:37:53.374386 From root 23, the Terminal state 64 is reached 5 times. 2023-10-05 14:37:58.198307 From root 23, the Terminal state 65 is reached 5 times. 2023-10-05 14:38:02.913084 From root 23, the Terminal state 69 is reached 5 times. 2023-10-05 14:38:07.715554 From root 23, the Terminal state 70 is reached 5 times. 2023-10-05 14:38:12.123414 From root 23, the Terminal state 71 is reached 47 times. 2023-10-05 14:38:16.857963 From root 23, the Terminal state 74 is reached 5 times. 2023-10-05 14:38:21.524940 From root 23, the Terminal state 75 is reached 5 times. 2023-10-05 14:38:26.323582 From root 23, the Terminal state 76 is reached 5 times. 2023-10-05 14:38:30.997429 From root 23, the Terminal state 80 is reached 5 times. 2023-10-05 14:38:35.679410 From root 23, the Terminal state 81 is reached 5 times. 2023-10-05 14:38:40.457038 From root 23, the Terminal state 84 is reached 5 times. 2023-10-05 14:38:45.001947 From root 23, the Terminal state 85 is reached 31 times. 2023-10-05 14:38:49.680440 From root 23, the Terminal state 87 is reached 7 times. 2023-10-05 14:38:54.470059 From root 23, the Terminal state 88 is reached 5 times. 2023-10-05 14:38:59.067233 From root 23, the Terminal state 89 is reached 15 times. 2023-10-05 14:39:03.829355 From root 23, the Terminal state 90 is reached 5 times. 2023-10-05 14:39:08.484737 From root 23, the Terminal state 91 is reached 12 times. 2023-10-05 14:39:13.155010 From root 23, the Terminal state 92 is reached 9 times. 2023-10-05 14:39:17.828664 From root 23, the Terminal state 93 is reached 17 times. 2023-10-05 14:39:21.807515 From root 23, the Terminal state 94 is reached 111 times. 2023-10-05 14:39:26.477969 From root 23, the Terminal state 95 is reached 24 times. 2023-10-05 14:39:31.107008 From root 23, the Terminal state 97 is reached 8 times. 2023-10-05 14:39:35.904234 From root 23, the Terminal state 98 is reached 5 times. 2023-10-05 14:39:40.651907 From root 23, the Terminal state 99 is reached 5 times. 2023-10-05 14:39:45.154225 From root 23, the Terminal state 101 is reached 37 times. 2023-10-05 14:39:49.809550 From root 23, the Terminal state 102 is reached 21 times. 2023-10-05 14:39:53.979180 From root 23, the Terminal state 104 is reached 81 times. 2023-10-05 14:39:58.500037 From root 23, the Terminal state 105 is reached 43 times. 2023-10-05 14:40:03.147723 From root 23, the Terminal state 107 is reached 5 times. 2023-10-05 14:40:07.907688 From root 23, the Terminal state 110 is reached 7 times. 2023-10-05 14:40:12.166664 From root 23, the Terminal state 112 is reached 91 times. 2023-10-05 14:40:16.902402 From root 23, the Terminal state 114 is reached 5 times. 2023-10-05 14:40:21.636045 From root 23, the Terminal state 115 is reached 9 times. 2023-10-05 14:40:26.348098 From root 23, the Terminal state 116 is reached 5 times. 2023-10-05 14:40:31.087912 From root 23, the Terminal state 118 is reached 12 times. 2023-10-05 14:40:35.730262 From root 23, the Terminal state 119 is reached 10 times. 2023-10-05 14:40:40.464309 From root 23, the Terminal state 120 is reached 5 times. 2023-10-05 14:40:45.214682 From root 23, the Terminal state 122 is reached 5 times. 2023-10-05 14:40:49.929113 From root 23, the Terminal state 123 is reached 5 times. 2023-10-05 14:40:54.717699 From root 23, the Terminal state 124 is reached 5 times. 2023-10-05 14:40:59.374741 From root 23, the Terminal state 125 is reached 8 times. 2023-10-05 14:41:04.186438 From root 23, the Terminal state 127 is reached 5 times. 2023-10-05 14:41:08.382792 From root 23, the Terminal state 128 is reached 96 times. 2023-10-05 14:41:13.104706 From root 23, the Terminal state 130 is reached 6 times. 2023-10-05 14:41:17.821524 From root 23, the Terminal state 132 is reached 10 times. 2023-10-05 14:41:22.521401 From root 23, the Terminal state 133 is reached 5 times. 2023-10-05 14:41:27.098956 From root 23, the Terminal state 135 is reached 14 times. 2023-10-05 14:41:31.437575 From root 23, the Terminal state 141 is reached 74 times. 2023-10-05 14:41:35.959400 From root 23, the Terminal state 146 is reached 29 times. 2023-10-05 14:41:35.994535 component number 1 out of [0, 1] 2023-10-05 14:41:35.996654\group root method 2023-10-05 14:41:35.996669\setting a dummy root 2023-10-05 14:41:35.996886 New root is 13 and majority OPC 2023-10-05 14:41:35.996940 Computing lazy-teleporting expected hitting times 2023-10-05 14:41:36.318038 Identifying terminal clusters corresponding to unique lineages... 2023-10-05 14:41:36.318118 Closeness:[] 2023-10-05 14:41:36.318128 Betweenness:[] 2023-10-05 14:41:36.318133 Out Degree:[1] remove the [0:2] just using to speed up testing 2023-10-05 14:41:36.318625 Terminal clusters corresponding to unique lineages in this component are [1] 2023-10-05 14:41:36.523495 From root 0, the Terminal state 1 is reached 650 times. 2023-10-05 14:41:36.553477 Terminal clusters corresponding to unique lineages are {5: 'Microglia', 7: 'Mossy', 11: 'Endothelial', 21: 'Cajal Retzius', 23: 'Cck-Tox', 26: 'Granule mature', 27: 'Endothelial', 31: 'Granule mature', 32: 'Granule mature', 34: 'Granule mature', 35: 'Granule mature', 37: 'Granule immature', 38: 'Granule mature', 40: 'Granule mature', 41: 'Granule mature', 44: 'Granule mature', 46: 'Granule immature', 47: 'Granule mature', 50: 'Granule mature', 51: 'Granule mature', 52: 'Granule mature', 54: 'Granule mature', 55: 'Granule mature', 57: 'Granule mature', 59: 'Granule immature', 62: 'Granule immature', 63: 'Granule mature', 65: 'Granule mature', 66: 'Granule immature', 67: 'Granule mature', 71: 'Granule mature', 72: 'Granule mature', 73: 'Granule mature', 76: 'Granule immature', 77: 'Granule mature', 78: 'Granule mature', 82: 'Granule mature', 83: 'Granule mature', 86: 'Granule mature', 87: 'Granule mature', 89: 'Granule mature', 90: 'Granule mature', 91: 'Granule immature', 92: 'Granule mature', 93: 'Granule mature', 94: 'Granule mature', 95: 'Granule mature', 96: 'Granule mature', 97: 'Granule immature', 99: 'Granule mature', 100: 'Granule immature', 101: 'Granule mature', 103: 'Granule immature', 104: 'Granule immature', 106: 'Granule mature', 107: 'Granule mature', 109: 'Granule mature', 112: 'Granule mature', 114: 'Granule mature', 116: 'Granule mature', 117: 'Granule mature', 118: 'Granule mature', 120: 'Granule immature', 121: 'Granule mature', 122: 'Granule mature', 124: 'Granule immature', 125: 'Granule immature', 126: 'Granule mature', 127: 'Granule mature', 129: 'Granule mature', 130: 'Granule mature', 132: 'Granule mature', 134: 'Granule mature', 135: 'Granule mature', 137: 'Granule mature', 143: 'Granule mature', 148: 'Granule mature', 18: 'OPC'} 2023-10-05 14:41:36.553544 Begin projection of pseudotime and lineage likelihood 2023-10-05 14:41:37.040669 Graph has 2 connected components before pruning 2023-10-05 14:41:37.044858 Graph has 60 connected components after pruning 2023-10-05 14:41:37.076580 Graph has 17 connected components after reconnecting handling intersection condition where a singleton cluster 31 without edges exists handling intersection condition where a singleton cluster 41 without edges exists handling intersection condition where a singleton cluster 46 without edges exists handling intersection condition where a singleton cluster 55 without edges exists handling intersection condition where a singleton cluster 62 without edges exists handling intersection condition where a singleton cluster 76 without edges exists handling intersection condition where a singleton cluster 77 without edges exists handling intersection condition where a singleton cluster 82 without edges exists handling intersection condition where a singleton cluster 90 without edges exists handling intersection condition where a singleton cluster 101 without edges exists handling intersection condition where a singleton cluster 116 without edges exists handling intersection condition where a singleton cluster 118 without edges exists handling intersection condition where a singleton cluster 126 without edges exists handling intersection condition where a singleton cluster 135 without edges exists 2023-10-05 14:41:37.077313 64.8% links trimmed from local pruning relative to start 2023-10-05 14:41:37.077337 73.2% links trimmed from global pruning relative to start 2023-10-05 14:41:37.081372 Start making edgebundle milestone... 2023-10-05 14:41:37.081401 Start finding milestones 2023-10-05 14:41:37.693801 End milestones 2023-10-05 14:41:37.693864 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-05 14:41:37.698523 Recompute weights 2023-10-05 14:41:37.718243 pruning milestone graph based on recomputed weights 2023-10-05 14:41:37.719290 Graph has 2 connected components before pruning 2023-10-05 14:41:37.719974 Graph has 12 connected components after pruning 2023-10-05 14:41:37.729587 Graph has 2 connected components after reconnecting 2023-10-05 14:41:37.730507 66.2% links trimmed from global pruning relative to start 2023-10-05 14:41:37.730549 regenerate igraph on pruned edges 2023-10-05 14:41:37.750337 Setting numeric label as single cell pseudotime for coloring edges 2023-10-05 14:41:37.766982 Making smooth edges 2023-10-05 14:41:38.468654 Time elapsed 374.2 seconds
In [84]:
Copied!
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (CGAN)',fontsize=12)
plt.savefig('figures/via_dg_cgan.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (CGAN)',fontsize=12)
plt.savefig('figures/via_dg_cgan.png',dpi=300,bbox_inches='tight')
In [85]:
Copied!
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
In [86]:
Copied!
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [87]:
Copied!
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
index=adata1.obs['celltype'].cat.categories,
columns=adata1.obs['celltype'].cat.categories)
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
index=adata1.obs['celltype'].cat.categories,
columns=adata1.obs['celltype'].cat.categories)
In [88]:
Copied!
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (CGAN)',fontsize=13)
plt.savefig('figures/paga_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_cgan.pdf',dpi=300,bbox_inches='tight')
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (CGAN)',fontsize=13)
plt.savefig('figures/paga_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_cgan.pdf',dpi=300,bbox_inches='tight')
In [90]:
Copied!
np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
Out[90]:
0.2050766394636122
In [93]:
Copied!
np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
Out[93]:
0.0008610680040077388
In [104]:
Copied!
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)
# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)
# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
In [105]:
Copied!
res_dict
res_dict
Out[105]:
{'Cor_mean': 0.8853872742612411, 'non_Cor_mean': 0.5940097187211791, 'Cos_mean': 0.1824074221901813, 'non_Cos_mean': 0.013552926489814078, 'Trans_raw': 0.012605069833265089, 'Trans_after': 0.01236477520165905, 'Var_raw': 0.0008610680040077388, 'Var_after': 0.2050766394636122}
In [106]:
Copied!
import pickle
with open('result/metric_cgan_dg.pkl','wb') as f:
pickle.dump(res_dict,f)
import pickle
with open('result/metric_cgan_dg.pkl','wb') as f:
pickle.dump(res_dict,f)
In [107]:
Copied!
with open('result/metric_cgan_dg.pkl','rb') as f:
res_dict=pickle.load(f)
res_dict
with open('result/metric_cgan_dg.pkl','rb') as f:
res_dict=pickle.load(f)
res_dict
Out[107]:
{'Cor_mean': 0.8853872742612411, 'non_Cor_mean': 0.5940097187211791, 'Cos_mean': 0.1824074221901813, 'non_Cos_mean': 0.013552926489814078, 'Trans_raw': 0.012605069833265089, 'Trans_after': 0.01236477520165905, 'Var_raw': 0.0008610680040077388, 'Var_after': 0.2050766394636122}
In [ ]:
Copied!
In [ ]:
Copied!