ACGAN
In [1]:
Copied!
import argparse
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
import argparse
import os
import numpy as np
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
In [2]:
Copied!
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
In [3]:
Copied!
adata=scv.datasets.dentategyrus()
adata
adata=scv.datasets.dentategyrus()
adata
Out[3]:
AnnData object with n_obs × n_vars = 2930 × 13913 obs: 'clusters', 'age(days)', 'clusters_enlarged' uns: 'clusters_colors' obsm: 'X_umap' layers: 'ambiguous', 'spliced', 'unspliced'
In [ ]:
Copied!
adata=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata
adata=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata
In [ ]:
Copied!
adata=adata[:,adata.var['highly_variable_features']==True]
adata
adata=adata[:,adata.var['highly_variable_features']==True]
adata
In [ ]:
Copied!
In [4]:
Copied!
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
Out[4]:
dg_d_1 | dg_d_2 | dg_d_3 | dg_v_1 | dg_v_2 | dg_v_3 | ca4_1 | ca4_2 | ca4_3 | ca3_d_1 | ... | ca3_v_3 | ca2_1 | ca2_2 | ca2_3 | ca1_d_1 | ca1_d_2 | ca1_d_3 | ca1_v_1 | ca1_v_2 | ca1_v_3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Miga1 | 298 | 364 | 634 | 1356 | 1311 | 1902 | 2536 | 2299 | 2285 | 1879 | ... | 3702 | 2376 | 2583 | 3425 | 3783 | 3478 | 2946 | 924 | 567 | 1227 |
Hunk | 2 | 6 | 29 | 36 | 36 | 98 | 1398 | 2157 | 887 | 552 | ... | 1481 | 2512 | 2440 | 2469 | 3279 | 3348 | 2576 | 426 | 233 | 279 |
Sult2a-ps3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Smg6 | 453 | 406 | 352 | 978 | 468 | 659 | 723 | 767 | 611 | 629 | ... | 932 | 506 | 597 | 537 | 779 | 914 | 640 | 207 | 215 | 388 |
Chmp5 | 122 | 132 | 95 | 645 | 857 | 667 | 1130 | 1007 | 852 | 633 | ... | 1574 | 605 | 799 | 659 | 870 | 1452 | 970 | 171 | 160 | 356 |
5 rows × 24 columns
In [5]:
Copied!
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=['dg_d_1','dg_d_2','dg_d_3'],
celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy()
#bulktb.single_preprocess_lazy()
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
bulk_group=['dg_d_1','dg_d_2','dg_d_3'],
celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy()
#bulktb.single_preprocess_lazy()
In [6]:
Copied!
bulktb.vae_configure(cell_target_num=200)
bulktb.vae_configure(cell_target_num=200)
......drop duplicates index in bulk data ......deseq2 normalize the bulk data ......log10 the bulk data ......calculate the mean of each group ......normalize the single data normalizing counts per cell finished (0:00:00) ......log1p the single data ......prepare the input of bulk2single ...loading data
In [7]:
Copied!
features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')
# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)
# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values) # 使用 LongTensor 存储整数标签
features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')
# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)
# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values) # 使用 LongTensor 存储整数标签
In [8]:
Copied!
# 创建 DataLoader(如果需要)
# DataLoader 可以帮助你批量加载数据,方便训练
from torch.utils.data import DataLoader, TensorDataset
batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# 创建 DataLoader(如果需要)
# DataLoader 可以帮助你批量加载数据,方便训练
from torch.utils.data import DataLoader, TensorDataset
batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
In [9]:
Copied!
n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels))) # 类别数量
img_size=features.shape[1]
channels=1
sample_interval=400
n_features=features.shape[1]
n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels))) # 类别数量
img_size=features.shape[1]
channels=1
sample_interval=400
n_features=features.shape[1]
In [10]:
Copied!
cuda = True if torch.cuda.is_available() else False
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm1d") != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
cuda = True if torch.cuda.is_available() else False
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm1d") != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
In [11]:
Copied!
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(n_classes,n_classes)
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
self.model = nn.Sequential(
*block(latent_dim + n_classes, 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, n_features),
#nn.Sigmoid()
)
def forward(self, noise, labels):
# Concatenate label embedding and image to produce input
gen_input = torch.cat((self.label_emb(labels), noise), -1)
img = self.model(gen_input)
#img = img.view(img.size(0), *img_shape)
return img
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(n_classes,n_classes)
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
self.model = nn.Sequential(
*block(latent_dim + n_classes, 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, n_features),
#nn.Sigmoid()
)
def forward(self, noise, labels):
# Concatenate label embedding and image to produce input
gen_input = torch.cat((self.label_emb(labels), noise), -1)
img = self.model(gen_input)
#img = img.view(img.size(0), *img_shape)
return img
In [12]:
Copied!
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(n_classes, n_classes)
self.model = nn.Sequential(
nn.Linear(n_features, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
#nn.Linear(512, 1),
)
# Output layers
self.adv_layer = nn.Sequential(nn.Linear(512, 1), nn.Sigmoid())
self.aux_layer = nn.Sequential(nn.Linear(512, n_classes), nn.Softmax())
def forward(self, img):
# Concatenate label embedding and image to produce input
#d_in = torch.cat((img, self.label_embedding(labels)), -1)
out=self.model(img)
validity = self.adv_layer(out)
label = self.aux_layer(out)
return validity,label
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(n_classes, n_classes)
self.model = nn.Sequential(
nn.Linear(n_features, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2, inplace=True),
#nn.Linear(512, 1),
)
# Output layers
self.adv_layer = nn.Sequential(nn.Linear(512, 1), nn.Sigmoid())
self.aux_layer = nn.Sequential(nn.Linear(512, n_classes), nn.Softmax())
def forward(self, img):
# Concatenate label embedding and image to produce input
#d_in = torch.cat((img, self.label_embedding(labels)), -1)
out=self.model(img)
validity = self.adv_layer(out)
label = self.aux_layer(out)
return validity,label
In [13]:
Copied!
# Loss functions
adversarial_loss = torch.nn.BCELoss()
auxiliary_loss = torch.nn.CrossEntropyLoss()
# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()
if cuda:
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
auxiliary_loss.cuda()
# Loss functions
adversarial_loss = torch.nn.BCELoss()
auxiliary_loss = torch.nn.CrossEntropyLoss()
# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()
if cuda:
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
auxiliary_loss.cuda()
In [14]:
Copied!
# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
Out[14]:
Discriminator( (label_embedding): Embedding(14, 14) (model): Sequential( (0): Linear(in_features=12953, out_features=512, bias=True) (1): LeakyReLU(negative_slope=0.2, inplace=True) (2): Linear(in_features=512, out_features=512, bias=True) (3): Dropout(p=0.4, inplace=False) (4): LeakyReLU(negative_slope=0.2, inplace=True) (5): Linear(in_features=512, out_features=512, bias=True) (6): Dropout(p=0.4, inplace=False) (7): LeakyReLU(negative_slope=0.2, inplace=True) ) (adv_layer): Sequential( (0): Linear(in_features=512, out_features=1, bias=True) (1): Sigmoid() ) (aux_layer): Sequential( (0): Linear(in_features=512, out_features=14, bias=True) (1): Softmax(dim=None) ) )
In [15]:
Copied!
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
In [16]:
Copied!
# ----------
# Training
# ----------
from tqdm import trange,tqdm
n_epochs=3000
latent_dim=100
bar = tqdm(range(n_epochs))
d_loss_li=[]
g_loss_li=[]
d_acc_li=[]
d_r_acc_li=[]
d_f_acc_li=[]
d_acc_max=0
for epoch in bar:
for i, (imgs, labels) in enumerate(data_loader):
batch_size = imgs.shape[0]
# Adversarial ground truths
valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(FloatTensor))
labels = Variable(labels.type(LongTensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Sample noise and labels as generator input
z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))
# Generate a batch of images
gen_imgs = generator(z, gen_labels)
# Loss measures generator's ability to fool the discriminator
validity, pred_label = discriminator(gen_imgs)
g_loss = 0.5 * (adversarial_loss(validity, valid) + auxiliary_loss(pred_label, gen_labels))
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
real_pred, real_aux = discriminator(real_imgs)
d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2
# Loss for fake images
fake_pred, fake_aux = discriminator(gen_imgs.detach())
d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, gen_labels)) / 2
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
# Calculate discriminator accuracy
pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
gt = np.concatenate([labels.data.cpu().numpy(), gen_labels.data.cpu().numpy()], axis=0)
real_pred=real_aux.data.cpu().numpy()
fake_pred=fake_aux.data.cpu().numpy()
real_gt=labels.data.cpu().numpy()
fake_gt=gen_labels.data.cpu().numpy()
d_acc = np.mean(np.argmax(pred, axis=1) == gt)
real_acc=np.mean(np.argmax(real_pred, axis=1) == real_gt)
fake_acc=np.mean(np.argmax(fake_pred, axis=1) == fake_gt)
d_loss.backward()
optimizer_D.step()
if d_acc>=d_acc_max:
import pickle
with open('result/acgan_dg_generator.pkl','wb') as f:
pickle.dump(generator,f)
with open('result/acgan_dg_discriminator.pkl','wb') as f:
pickle.dump(discriminator,f)
d_acc_max=d_acc
bar.set_description(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%, real_acc: %d%%, fake_acc: %d%%] [G loss: %f]"
% (epoch, n_epochs, i, len(data_loader), d_loss.item(), 100 * d_acc, 100*real_acc,100*fake_acc,g_loss.item())
)
d_loss_li.append(d_loss.item())
g_loss_li.append(g_loss.item())
d_acc_li.append(100*d_acc)
d_r_acc_li.append(100*real_acc)
d_f_acc_li.append(100*fake_acc)
# ----------
# Training
# ----------
from tqdm import trange,tqdm
n_epochs=3000
latent_dim=100
bar = tqdm(range(n_epochs))
d_loss_li=[]
g_loss_li=[]
d_acc_li=[]
d_r_acc_li=[]
d_f_acc_li=[]
d_acc_max=0
for epoch in bar:
for i, (imgs, labels) in enumerate(data_loader):
batch_size = imgs.shape[0]
# Adversarial ground truths
valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(FloatTensor))
labels = Variable(labels.type(LongTensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
# Sample noise and labels as generator input
z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))
# Generate a batch of images
gen_imgs = generator(z, gen_labels)
# Loss measures generator's ability to fool the discriminator
validity, pred_label = discriminator(gen_imgs)
g_loss = 0.5 * (adversarial_loss(validity, valid) + auxiliary_loss(pred_label, gen_labels))
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
real_pred, real_aux = discriminator(real_imgs)
d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2
# Loss for fake images
fake_pred, fake_aux = discriminator(gen_imgs.detach())
d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, gen_labels)) / 2
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
# Calculate discriminator accuracy
pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
gt = np.concatenate([labels.data.cpu().numpy(), gen_labels.data.cpu().numpy()], axis=0)
real_pred=real_aux.data.cpu().numpy()
fake_pred=fake_aux.data.cpu().numpy()
real_gt=labels.data.cpu().numpy()
fake_gt=gen_labels.data.cpu().numpy()
d_acc = np.mean(np.argmax(pred, axis=1) == gt)
real_acc=np.mean(np.argmax(real_pred, axis=1) == real_gt)
fake_acc=np.mean(np.argmax(fake_pred, axis=1) == fake_gt)
d_loss.backward()
optimizer_D.step()
if d_acc>=d_acc_max:
import pickle
with open('result/acgan_dg_generator.pkl','wb') as f:
pickle.dump(generator,f)
with open('result/acgan_dg_discriminator.pkl','wb') as f:
pickle.dump(discriminator,f)
d_acc_max=d_acc
bar.set_description(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%, real_acc: %d%%, fake_acc: %d%%] [G loss: %f]"
% (epoch, n_epochs, i, len(data_loader), d_loss.item(), 100 * d_acc, 100*real_acc,100*fake_acc,g_loss.item())
)
d_loss_li.append(d_loss.item())
g_loss_li.append(g_loss.item())
d_acc_li.append(100*d_acc)
d_r_acc_li.append(100*real_acc)
d_f_acc_li.append(100*fake_acc)
[Epoch 2999/3000] [Batch 45/46] [D loss: 0.887753, acc: 99%, real_acc: 100%, fake_acc: 98%] [G loss: 6.328508]: 100%|██████████| 3000/3000 [20:20<00:00, 2.46it/s]
In [17]:
Copied!
with open('result/acgan_dg_generator.pkl','rb') as f:
generator=pickle.load(f)
with open('result/acgan_dg_discriminator.pkl','rb') as f:
discriminator=pickle.load(f)
with open('result/acgan_dg_generator.pkl','rb') as f:
generator=pickle.load(f)
with open('result/acgan_dg_discriminator.pkl','rb') as f:
discriminator=pickle.load(f)
In [18]:
Copied!
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Discriminator Loss\n(ACGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_d_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_acgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Discriminator Loss\n(ACGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_d_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_acgan_dg.pdf',dpi=300,bbox_inches='tight')
In [19]:
Copied!
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Generator Loss\n(ACGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_g_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_acgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('Generator Loss\n(ACGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)
plt.savefig('figures/loss_g_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_acgan_dg.pdf',dpi=300,bbox_inches='tight')
In [20]:
Copied!
Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape
Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape
Out[20]:
torch.Size([100, 100])
In [21]:
Copied!
n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)
n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)
In [22]:
Copied!
gen_imgs.max()
gen_imgs.max()
Out[22]:
tensor(7.6369, device='cuda:0', grad_fn=<MaxBackward1>)
In [23]:
Copied!
gen_imgs.min()
gen_imgs.min()
Out[23]:
tensor(-3.1579, device='cuda:0', grad_fn=<MinBackward1>)
In [24]:
Copied!
import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)
import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)
In [25]:
Copied!
test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata
test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata
Out[25]:
AnnData object with n_obs × n_vars = 1400 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [26]:
Copied!
test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata
test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata
Out[26]:
AnnData object with n_obs × n_vars = 1400 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [28]:
Copied!
test_adata.var.index=features.columns
test_adata.var.index=features.columns
In [31]:
Copied!
adata.obs['celltype']=adata.obs['clusters'].copy()
adata.obs['celltype']=adata.obs['clusters'].copy()
In [30]:
Copied!
ov.bulk2single.bulk2single_plot_correlation(adata,test_adata,celltype_key='celltype')
ov.bulk2single.bulk2single_plot_correlation(adata,test_adata,celltype_key='celltype')
ranking genes WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups. finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
Out[30]:
(<Figure size 480x480 with 2 Axes>, <AxesSubplot: title={'center': 'Expression correlation'}, xlabel='scRNA-seq reference', ylabel='deconvoluted bulk-seq'>)
In [35]:
Copied!
adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
Begin robust gene identification After filtration, 13264/13264 genes are kept. Among 13264 genes, 13189 genes are robust. End of robust gene identification. Begin size normalization: shiftlog and HVGs selection pearson normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation: ['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt'] finished (0:00:00) extracting highly variable genes --> added 'highly_variable', boolean vector (adata.var) 'highly_variable_rank', float vector (adata.var) 'highly_variable_nbatches', int vector (adata.var) 'highly_variable_intersection', boolean vector (adata.var) 'means', float vector (adata.var) 'variances', float vector (adata.var) 'residual_variances', float vector (adata.var) End of size normalization: shiftlog and HVGs selection pearson
In [36]:
Copied!
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
return_table=True)
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
return_table=True)
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [38]:
Copied!
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata_t.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata_t[:,marker].to_df()
sc_marker['celltype'] = adata_t.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata_t.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))
# the mean expression of 200 marker genes of input sc data
sc_marker = adata_t[:,marker].to_df()
sc_marker['celltype'] = adata_t.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
In [39]:
Copied!
sc_marker_mean=sc_marker_mean.T
sc_marker_mean=sc_marker_mean.T
In [40]:
Copied!
rf_ct = list(sc_marker_mean.columns)
rf_ct = list(sc_marker_mean.columns)
In [41]:
Copied!
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
cor_pd=pd.DataFrame(cor_pd,
index=rf_ct,
columns=rf_ct)
In [43]:
Copied!
import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(ACGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_acgan_dg.pdf',dpi=300,bbox_inches='tight')
import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(ACGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_acgan_dg.pdf',dpi=300,bbox_inches='tight')
In [32]:
Copied!
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
scores_type='logfoldchanges')
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
scores_type='logfoldchanges')
...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [33]:
Copied!
adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
target_sum=1e4)
Begin robust gene identification After filtration, 13264/13913 genes are kept. Among 13264 genes, 13189 genes are robust. End of robust gene identification. Begin size normalization: shiftlog and HVGs selection pearson normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation: ['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt'] finished (0:00:00) extracting highly variable genes --> added 'highly_variable', boolean vector (adata.var) 'highly_variable_rank', float vector (adata.var) 'highly_variable_nbatches', int vector (adata.var) 'highly_variable_intersection', boolean vector (adata.var) 'means', float vector (adata.var) 'variances', float vector (adata.var) 'residual_variances', float vector (adata.var) End of size normalization: shiftlog and HVGs selection pearson
In [44]:
Copied!
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
scores_type='logfoldchanges')
...get cell type marker ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [45]:
Copied!
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(test_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(test_adata, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk1[clt]=degs['names'][:300].tolist()
In [46]:
Copied!
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt,
key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
cmk2[clt]=degs['names'][:300].tolist()
In [47]:
Copied!
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
all_gene=[]
for clt in cmk1.keys():
all_gene+=cmk1[clt]
for clt in cmk2.keys():
all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
In [48]:
Copied!
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
cmk1_pd[clt]=0
cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
cmk2_pd[clt]=0
cmk2_pd.loc[cmk2[clt],clt]=1
In [49]:
Copied!
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
for clt2 in cmk1.keys():
#print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
In [50]:
Copied!
plot_data=plot_data.astype(float)
plot_data=plot_data.astype(float)
In [52]:
Copied!
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(ACGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_acgan_dg.pdf',dpi=300,bbox_inches='tight')
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(ACGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_acgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_acgan_dg.pdf',dpi=300,bbox_inches='tight')
In [53]:
Copied!
adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3
adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3
Out[53]:
View of AnnData object with n_obs × n_vars = 100 × 12953 obs: 'celltype_num', 'celltype' uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues', 'celltype_colors', 'rank_genes_groups' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [54]:
Copied!
bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']
bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']
In [55]:
Copied!
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
adata1
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
merge='same')
adata1
Out[55]:
AnnData object with n_obs × n_vars = 3030 × 12953 obs: 'celltype'
In [56]:
Copied!
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var)
In [57]:
Copied!
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
In [58]:
Copied!
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
Out[58]:
AnnData object with n_obs × n_vars = 3030 × 1644 obs: 'celltype' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'scaled', 'lognorm'
In [60]:
Copied!
adata2=bulktb.vae_model.single_data.copy()
adata2=bulktb.vae_model.single_data.copy()
In [61]:
Copied!
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
extracting highly variable genes finished (0:00:00) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var)
In [62]:
Copied!
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption
In [63]:
Copied!
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
Out[63]:
AnnData object with n_obs × n_vars = 2930 × 2254 obs: 'clusters', 'age(days)', 'clusters_enlarged', 'celltype' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'clusters_colors', 'log1p', 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues' obsm: 'X_umap', 'scaled|original|X_pca', 'X_mde' varm: 'scaled|original|pca_loadings' layers: 'ambiguous', 'spliced', 'unspliced', 'scaled', 'lognorm'
In [65]:
Copied!
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
dataset='group')
v0.run()
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
dataset='group')
v0.run()
2023-10-05 15:20:04.095683 Running VIA over input data of 2930 (samples) x 50 (features) 2023-10-05 15:20:04.095714 Knngraph has 15 neighbors 2023-10-05 15:20:04.970310 Finished global pruning of 15-knn graph used for clustering at level of 0.15. Kept 39.6 % of edges. 2023-10-05 15:20:04.978484 Number of connected components used for clustergraph is 1 2023-10-05 15:20:05.018637 Commencing community detection 2023-10-05 15:20:05.156610 Finished running Leiden algorithm. Found 623 clusters. 2023-10-05 15:20:05.157304 Merging 593 very small clusters (<10) 2023-10-05 15:20:05.161365 Finished detecting communities. Found 52 communities 2023-10-05 15:20:05.161504 Making cluster graph. Global cluster graph pruning level: 0.15 2023-10-05 15:20:05.165578 Graph has 1 connected components before pruning 2023-10-05 15:20:05.167447 Graph has 15 connected components after pruning 2023-10-05 15:20:05.176013 Graph has 1 connected components after reconnecting 2023-10-05 15:20:05.176551 1.2% links trimmed from local pruning relative to start 2023-10-05 15:20:05.176566 53.5% links trimmed from global pruning relative to start 2023-10-05 15:20:05.181407 Starting make edgebundle viagraph... 2023-10-05 15:20:05.181420 Make via clustergraph edgebundle 2023-10-05 15:20:06.599535 Hammer dims: Nodes shape: (52, 2) Edges shape: (238, 3) 2023-10-05 15:20:06.601367 component number 0 out of [0] 2023-10-05 15:20:06.613331\group root method 2023-10-05 15:20:06.613346or component 0, the root is nIPC and ri nIPC 2023-10-05 15:20:06.621689 New root is 26 and majority nIPC 2023-10-05 15:20:06.623721 Computing lazy-teleporting expected hitting times 2023-10-05 15:20:10.351097 Identifying terminal clusters corresponding to unique lineages... 2023-10-05 15:20:10.351187 Closeness:[9, 22, 23, 27, 28] 2023-10-05 15:20:10.351198 Betweenness:[2, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 41, 43, 45, 46, 50, 51] 2023-10-05 15:20:10.351204 Out Degree:[2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 45, 46, 47, 48, 50, 51] remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing 2023-10-05 15:20:10.352016 Terminal clusters corresponding to unique lineages in this component are [8, 9, 11, 13, 15, 16, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 41, 45, 46, 50, 51] 2023-10-05 15:20:11.508065 From root 26, the Terminal state 8 is reached 191 times. 2023-10-05 15:20:12.943586 From root 26, the Terminal state 9 is reached 5 times. 2023-10-05 15:20:14.368932 From root 26, the Terminal state 11 is reached 8 times. 2023-10-05 15:20:15.805161 From root 26, the Terminal state 13 is reached 6 times. 2023-10-05 15:20:17.229697 From root 26, the Terminal state 15 is reached 6 times. 2023-10-05 15:20:18.653152 From root 26, the Terminal state 16 is reached 8 times. 2023-10-05 15:20:20.089502 From root 26, the Terminal state 28 is reached 5 times. 2023-10-05 15:20:21.512814 From root 26, the Terminal state 29 is reached 7 times. 2023-10-05 15:20:22.794625 From root 26, the Terminal state 30 is reached 116 times. 2023-10-05 15:20:24.168451 From root 26, the Terminal state 31 is reached 42 times. 2023-10-05 15:20:25.221457 From root 26, the Terminal state 32 is reached 239 times. 2023-10-05 15:20:26.638503 From root 26, the Terminal state 33 is reached 7 times. 2023-10-05 15:20:27.753206 From root 26, the Terminal state 34 is reached 240 times. 2023-10-05 15:20:29.201742 From root 26, the Terminal state 36 is reached 5 times. 2023-10-05 15:20:30.331534 From root 26, the Terminal state 37 is reached 184 times. 2023-10-05 15:20:31.233835 From root 26, the Terminal state 38 is reached 346 times. 2023-10-05 15:20:32.587120 From root 26, the Terminal state 39 is reached 45 times. 2023-10-05 15:20:33.778969 From root 26, the Terminal state 41 is reached 155 times. 2023-10-05 15:20:34.948078 From root 26, the Terminal state 45 is reached 177 times. 2023-10-05 15:20:36.310731 From root 26, the Terminal state 46 is reached 45 times. 2023-10-05 15:20:37.741504 From root 26, the Terminal state 50 is reached 11 times. 2023-10-05 15:20:39.104399 From root 26, the Terminal state 51 is reached 49 times. 2023-10-05 15:20:39.138189 Terminal clusters corresponding to unique lineages are {8: 'Granule mature', 9: 'Microglia', 11: 'Astrocytes', 13: 'Endothelial', 15: 'Astrocytes', 16: 'Radial Glia-like', 28: 'Endothelial', 29: 'Endothelial', 30: 'Granule mature', 31: 'Granule mature', 32: 'Granule immature', 33: 'Granule mature', 34: 'Granule mature', 36: 'Granule mature', 37: 'Granule mature', 38: 'Granule mature', 39: 'Granule mature', 41: 'Granule mature', 45: 'Granule immature', 46: 'Granule mature', 50: 'Granule mature', 51: 'Granule mature'} 2023-10-05 15:20:39.138227 Begin projection of pseudotime and lineage likelihood 2023-10-05 15:20:39.564074 Graph has 1 connected components before pruning 2023-10-05 15:20:39.566254 Graph has 24 connected components after pruning 2023-10-05 15:20:39.579638 Graph has 1 connected components after reconnecting 2023-10-05 15:20:39.580054 60.1% links trimmed from local pruning relative to start 2023-10-05 15:20:39.580073 66.8% links trimmed from global pruning relative to start 2023-10-05 15:20:39.582132 Start making edgebundle milestone... 2023-10-05 15:20:39.582156 Start finding milestones 2023-10-05 15:20:40.090728 End milestones 2023-10-05 15:20:40.090778 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-05 15:20:40.094903 Recompute weights 2023-10-05 15:20:40.108636 pruning milestone graph based on recomputed weights 2023-10-05 15:20:40.109461 Graph has 1 connected components before pruning 2023-10-05 15:20:40.109999 Graph has 12 connected components after pruning 2023-10-05 15:20:40.118276 Graph has 1 connected components after reconnecting 2023-10-05 15:20:40.118928 68.1% links trimmed from global pruning relative to start 2023-10-05 15:20:40.118951 regenerate igraph on pruned edges 2023-10-05 15:20:40.125720 Setting numeric label as single cell pseudotime for coloring edges 2023-10-05 15:20:40.135095 Making smooth edges 2023-10-05 15:20:40.683080 Time elapsed 36.2 seconds
In [66]:
Copied!
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw1.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw1.png',dpi=300,bbox_inches='tight')
In [67]:
Copied!
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:01) running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [68]:
Copied!
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
index=adata2.obs['celltype'].cat.categories,
columns=adata2.obs['celltype'].cat.categories)
In [69]:
Copied!
raw_transitions.loc['nIPC','OPC']
raw_transitions.loc['nIPC','OPC']
Out[69]:
0.0
In [70]:
Copied!
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
#jac_std_global=0.01,
dataset='group')
v1.run()
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
#jac_std_global=0.01,
dataset='group')
v1.run()
2023-10-05 15:20:45.400889 Running VIA over input data of 3030 (samples) x 50 (features) 2023-10-05 15:20:45.401215 Knngraph has 15 neighbors 2023-10-05 15:20:46.537498 Finished global pruning of 15-knn graph used for clustering at level of 0.15. Kept 39.1 % of edges. 2023-10-05 15:20:46.545754 Number of connected components used for clustergraph is 1 2023-10-05 15:20:46.585332 Commencing community detection 2023-10-05 15:20:46.729897 Finished running Leiden algorithm. Found 759 clusters. 2023-10-05 15:20:46.730627 Merging 729 very small clusters (<10) 2023-10-05 15:20:46.736568 Finished detecting communities. Found 34 communities 2023-10-05 15:20:46.736711 Making cluster graph. Global cluster graph pruning level: 0.15 2023-10-05 15:20:46.740784 Graph has 1 connected components before pruning 2023-10-05 15:20:46.742750 Graph has 14 connected components after pruning 2023-10-05 15:20:46.749722 Graph has 1 connected components after reconnecting 2023-10-05 15:20:46.750105 0.0% links trimmed from local pruning relative to start 2023-10-05 15:20:46.750118 55.2% links trimmed from global pruning relative to start 2023-10-05 15:20:46.752653 Starting make edgebundle viagraph... 2023-10-05 15:20:46.752666 Make via clustergraph edgebundle 2023-10-05 15:20:46.865037 Hammer dims: Nodes shape: (34, 2) Edges shape: (128, 3) 2023-10-05 15:20:46.866509 component number 0 out of [0] 2023-10-05 15:20:46.878155\group root method 2023-10-05 15:20:46.878170or component 0, the root is nIPC and ri nIPC 2023-10-05 15:20:46.885067 New root is 23 and majority nIPC 2023-10-05 15:20:46.885983 Computing lazy-teleporting expected hitting times 2023-10-05 15:20:49.248678 Identifying terminal clusters corresponding to unique lineages... 2023-10-05 15:20:49.248775 Closeness:[7, 20, 25] 2023-10-05 15:20:49.248785 Betweenness:[5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30] 2023-10-05 15:20:49.248790 Out Degree:[3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30] 2023-10-05 15:20:49.249016 We removed cluster 14 from the shortlist of terminal states remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing remove the [0:2] just using to speed up testing 2023-10-05 15:20:49.249564 Terminal clusters corresponding to unique lineages in this component are [7, 11, 12, 15, 19, 21, 22, 25, 26, 28, 29, 30] 2023-10-05 15:20:50.182294 From root 23, the Terminal state 7 is reached 5 times. 2023-10-05 15:20:51.130852 From root 23, the Terminal state 11 is reached 14 times. 2023-10-05 15:20:52.098731 From root 23, the Terminal state 12 is reached 5 times. 2023-10-05 15:20:53.052221 From root 23, the Terminal state 15 is reached 8 times. 2023-10-05 15:20:54.011734 From root 23, the Terminal state 19 is reached 5 times. 2023-10-05 15:20:54.985514 From root 23, the Terminal state 21 is reached 5 times. 2023-10-05 15:20:55.944320 From root 23, the Terminal state 22 is reached 9 times. 2023-10-05 15:20:56.905834 From root 23, the Terminal state 25 is reached 5 times. 2023-10-05 15:20:57.281645 From root 23, the Terminal state 26 is reached 628 times. 2023-10-05 15:20:58.138220 From root 23, the Terminal state 28 is reached 113 times. 2023-10-05 15:20:58.515420 From root 23, the Terminal state 29 is reached 615 times. 2023-10-05 15:20:59.370858 From root 23, the Terminal state 30 is reached 101 times. 2023-10-05 15:20:59.403021 Terminal clusters corresponding to unique lineages are {7: 'Microglia', 11: 'Mossy', 12: 'Endothelial', 15: 'OL', 19: 'GABA', 21: 'Cck-Tox', 22: 'GABA', 25: 'Endothelial', 26: 'Granule mature', 28: 'Granule mature', 29: 'Granule mature', 30: 'Granule mature'} 2023-10-05 15:20:59.403053 Begin projection of pseudotime and lineage likelihood 2023-10-05 15:20:59.847377 Graph has 1 connected components before pruning 2023-10-05 15:20:59.849538 Graph has 18 connected components after pruning 2023-10-05 15:20:59.860360 Graph has 1 connected components after reconnecting 2023-10-05 15:20:59.860759 53.9% links trimmed from local pruning relative to start 2023-10-05 15:20:59.860777 57.0% links trimmed from global pruning relative to start 2023-10-05 15:20:59.862608 Start making edgebundle milestone... 2023-10-05 15:20:59.862633 Start finding milestones 2023-10-05 15:21:00.474501 End milestones 2023-10-05 15:21:00.474547 Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object 2023-10-05 15:21:00.478672 Recompute weights 2023-10-05 15:21:00.499687 pruning milestone graph based on recomputed weights 2023-10-05 15:21:00.500690 Graph has 1 connected components before pruning 2023-10-05 15:21:00.501361 Graph has 10 connected components after pruning 2023-10-05 15:21:00.510063 Graph has 1 connected components after reconnecting 2023-10-05 15:21:00.511099 46.2% links trimmed from global pruning relative to start 2023-10-05 15:21:00.511124 regenerate igraph on pruned edges 2023-10-05 15:21:00.518875 Setting numeric label as single cell pseudotime for coloring edges 2023-10-05 15:21:00.532234 Making smooth edges 2023-10-05 15:21:01.570900 Time elapsed 15.7 seconds
In [71]:
Copied!
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (ACGAN)',fontsize=12)
plt.savefig('figures/via_dg_acgan.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (ACGAN)',fontsize=12)
plt.savefig('figures/via_dg_acgan.png',dpi=300,bbox_inches='tight')
In [84]:
Copied!
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (Raw)',fontsize=13)
plt.savefig('figures/paga_dg_raw.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_raw.pdf',dpi=300,bbox_inches='tight')
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (Raw)',fontsize=13)
plt.savefig('figures/paga_dg_raw.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_raw.pdf',dpi=300,bbox_inches='tight')
In [73]:
Copied!
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
groups='celltype')
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
groups='celltype')
...the pseudotime of VIA added to AnnData obs named `pt_via` computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running PAGA using priors: ['pt_via'] finished added 'paga/connectivities', connectivities adjacency (adata.uns) 'paga/connectivities_tree', connectivities subtree (adata.uns) 'paga/transitions_confidence', velocity transitions (adata.uns)
In [74]:
Copied!
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
index=adata1.obs['celltype'].cat.categories,
columns=adata1.obs['celltype'].cat.categories)
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
index=adata1.obs['celltype'].cat.categories,
columns=adata1.obs['celltype'].cat.categories)
In [75]:
Copied!
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (ACGAN)',fontsize=13)
plt.savefig('figures/paga_dg_acgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_acgan.pdf',dpi=300,bbox_inches='tight')
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (ACGAN)',fontsize=13)
plt.savefig('figures/paga_dg_acgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_acgan.pdf',dpi=300,bbox_inches='tight')
In [77]:
Copied!
np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
Out[77]:
0.0033641781333378197
In [79]:
Copied!
np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
Out[79]:
0.0008610680040077388
In [80]:
Copied!
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)
# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
res_dict={}
#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))
res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean
#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)
# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))
res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean
#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()
#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
In [81]:
Copied!
res_dict
res_dict
Out[81]:
{'Cor_mean': 0.7447424706163023, 'non_Cor_mean': 0.5737731983230978, 'Cos_mean': 0.10380949917187746, 'non_Cos_mean': 0.015533761316587154, 'Trans_raw': 0.012605069833265089, 'Trans_after': 0.016074207762156765, 'Var_raw': 0.0008610680040077388, 'Var_after': 0.0033641781333378197}
In [82]:
Copied!
import pickle
with open('result/metric_acgan_dg.pkl','wb') as f:
pickle.dump(res_dict,f)
import pickle
with open('result/metric_acgan_dg.pkl','wb') as f:
pickle.dump(res_dict,f)
In [83]:
Copied!
with open('result/metric_acgan_dg.pkl','rb') as f:
res_dict=pickle.load(f)
res_dict
with open('result/metric_acgan_dg.pkl','rb') as f:
res_dict=pickle.load(f)
res_dict
Out[83]:
{'Cor_mean': 0.7447424706163023, 'non_Cor_mean': 0.5737731983230978, 'Cos_mean': 0.10380949917187746, 'non_Cos_mean': 0.015533761316587154, 'Trans_raw': 0.012605069833265089, 'Trans_after': 0.016074207762156765, 'Var_raw': 0.0008610680040077388, 'Var_after': 0.0033641781333378197}
In [ ]:
Copied!