CGAN

In [1]:

Copied!





import argparse
import os
import numpy as np
import math

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch
import argparse
import os
import numpy as np
import math

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch

In [2]:

Copied!





import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()
import omicverse as ov
from omicverse.utils import mde
import scanpy as sc
import scvelo as scv
ov.utils.ov_plot_set()

In [3]:

Copied!

adata=scv.datasets.dentategyrus()
adata
adata=scv.datasets.dentategyrus()
adata

Out[3]:

AnnData object with n_obs × n_vars = 2930 × 13913
    obs: 'clusters', 'age(days)', 'clusters_enlarged'
    uns: 'clusters_colors'
    obsm: 'X_umap'
    layers: 'ambiguous', 'spliced', 'unspliced'

In [4]:

Copied!





import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()
import numpy as np
import pandas as pd
bulk=pd.read_csv('data/GSE74985_mergedCount.txt.gz',index_col=0,sep='\t')
bulk=ov.bulk.Matrix_ID_mapping(bulk,'genesets/pair_GRCm39.tsv')
bulk.head()

Out[4]:

	dg_v_1	dg_v_2	dg_v_3	ca4_1	ca4_2	ca4_3	ca3_d_1	...	ca3_v_3	ca2_1	ca2_2	ca2_3	ca1_d_2	ca1_d_3	ca1_v_1
Olfr692	6	8	0	19	3	0	0	...	10	12	16	3	3	1	1
Olfr637-ps1	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0
Olfr1272	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0
Gm23613	0	0	0	0	0	2	0	...	0	0	0	0	0	0	0
Tap2	5	0	3	80	29	16	8	...	9	14	2	46	0	1	0

5 rows × 24 columns

In [5]:

Copied!





bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
                                    celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy(group=['dg_d_1','dg_d_2','dg_d_3'])
#bulktb.single_preprocess_lazy()
bulktb=ov.bulk2single.BulkTrajBlend(bulk_seq=bulk,single_seq=adata,
                                    celltype_key='clusters',)
#bulktb.bulk_preprocess_lazy(group=['dg_d_1','dg_d_2','dg_d_3'])
#bulktb.single_preprocess_lazy()

In [6]:

Copied!

bulktb.vae_configure(cell_target_num=200)
bulktb.vae_configure(cell_target_num=200)

......drop duplicates index in bulk data
......deseq2 normalize the bulk data
......log10 the bulk data
......calculate the mean of each group
......normalize the single data
normalizing counts per cell
    finished (0:00:00)
......log1p the single data
......prepare the input of bulk2single
...loading data

In [7]:

Copied!





features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')

# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)

# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values)  # 使用 LongTensor 存储整数标签
features=bulktb.vae_model.input_data['input_sc_data'].T
labels=bulktb.vae_model.input_data['input_sc_meta']['Cell_type'].astype('str')

# 将类别字符串映射为数字编码
label_mapping = {label: idx for idx, label in enumerate(labels.unique())}
labels_encoded = labels.map(label_mapping)

# 转换为 PyTorch 张量
features_tensor = torch.Tensor(features.values)
labels_tensor = torch.LongTensor(labels_encoded.values)  # 使用 LongTensor 存储整数标签

In [8]:

Copied!





# 创建 DataLoader（如果需要）
# DataLoader 可以帮助你批量加载数据，方便训练
from torch.utils.data import DataLoader, TensorDataset

batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# 创建 DataLoader（如果需要）
# DataLoader 可以帮助你批量加载数据，方便训练
from torch.utils.data import DataLoader, TensorDataset

batch_size = 64
dataset = TensorDataset(features_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [11]:

Copied!





n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels)))  # 类别数量
img_size=32
channels=1
sample_interval=400
n_features=features.shape[1]
n_epochs=200
batch_size=64
lr=0.0002
b1=0.5
b2=0.999
n_cpu=8
latent_dim=100
n_classes=len(list(set(labels)))  # 类别数量
img_size=32
channels=1
sample_interval=400
n_features=features.shape[1]

In [10]:

Copied!

features.shape
features.shape

Out[10]:

(2930, 12953)

In [12]:

Copied!





class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.label_emb = nn.Embedding(n_classes,n_classes)

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(latent_dim + n_classes, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, n_features),
            #nn.Sigmoid()
        )

    def forward(self, noise, labels):
        # Concatenate label embedding and image to produce input
        gen_input = torch.cat((self.label_emb(labels), noise), -1)
        img = self.model(gen_input)
        #img = img.view(img.size(0), *img_shape)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.label_embedding = nn.Embedding(n_classes, n_classes)

        self.model = nn.Sequential(
            nn.Linear(n_classes + n_features, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 1),
        )

    def forward(self, img, labels):
        # Concatenate label embedding and image to produce input
        d_in = torch.cat((img, self.label_embedding(labels)), -1)
        validity = self.model(d_in)
        return validity
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.label_emb = nn.Embedding(n_classes,n_classes)

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(latent_dim + n_classes, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, n_features),
            #nn.Sigmoid()
        )

    def forward(self, noise, labels):
        # Concatenate label embedding and image to produce input
        gen_input = torch.cat((self.label_emb(labels), noise), -1)
        img = self.model(gen_input)
        #img = img.view(img.size(0), *img_shape)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.label_embedding = nn.Embedding(n_classes, n_classes)

        self.model = nn.Sequential(
            nn.Linear(n_classes + n_features, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 1),
        )

    def forward(self, img, labels):
        # Concatenate label embedding and image to produce input
        d_in = torch.cat((img, self.label_embedding(labels)), -1)
        validity = self.model(d_in)
        return validity

In [13]:

Copied!





# Loss functions
adversarial_loss = torch.nn.MSELoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

cuda = True if torch.cuda.is_available() else False
if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()
# Loss functions
adversarial_loss = torch.nn.MSELoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

cuda = True if torch.cuda.is_available() else False
if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()

In [14]:

Copied!





# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))

FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))

FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor

In [15]:

Copied!





# ----------
#  Training
# ----------
from tqdm import trange,tqdm

n_epochs=5000

bar = tqdm(range(n_epochs))
#for epoch in trange(n_epochs):
d_loss_li=[]
g_loss_li=[]
for epoch in bar:
    for i, (imgs, labels) in enumerate(data_loader):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
        fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(FloatTensor))
        labels = Variable(labels.type(LongTensor))
        

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
        gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))

        # Generate a batch of images
        gen_imgs = generator(z, gen_labels)
        

        # Loss measures generator's ability to fool the discriminator
        validity = discriminator(gen_imgs, gen_labels)
        g_loss = adversarial_loss(validity, valid)
        
        g_loss.backward()
        optimizer_G.step()
        
        

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()
        

        # Loss for real images
        validity_real = discriminator(real_imgs, labels)
        
        d_real_loss = adversarial_loss(validity_real, valid)

        # Loss for fake images
        validity_fake = discriminator(gen_imgs.detach(), gen_labels)
        d_fake_loss = adversarial_loss(validity_fake, fake)

        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()
        break
        
        

        
    bar.set_description(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
        )
    d_loss_li.append(d_loss.item())
    g_loss_li.append(g_loss.item())
# ----------
#  Training
# ----------
from tqdm import trange,tqdm

n_epochs=5000

bar = tqdm(range(n_epochs))
#for epoch in trange(n_epochs):
d_loss_li=[]
g_loss_li=[]
for epoch in bar:
    for i, (imgs, labels) in enumerate(data_loader):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
        fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(FloatTensor))
        labels = Variable(labels.type(LongTensor))
        

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, latent_dim))))
        gen_labels = Variable(LongTensor(np.random.randint(0, n_classes, batch_size)))

        # Generate a batch of images
        gen_imgs = generator(z, gen_labels)
        

        # Loss measures generator's ability to fool the discriminator
        validity = discriminator(gen_imgs, gen_labels)
        g_loss = adversarial_loss(validity, valid)
        
        g_loss.backward()
        optimizer_G.step()
        
        

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()
        

        # Loss for real images
        validity_real = discriminator(real_imgs, labels)
        
        d_real_loss = adversarial_loss(validity_real, valid)

        # Loss for fake images
        validity_fake = discriminator(gen_imgs.detach(), gen_labels)
        d_fake_loss = adversarial_loss(validity_fake, fake)

        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()
        break
        
        

        
    bar.set_description(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, n_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
        )
    d_loss_li.append(d_loss.item())
    g_loss_li.append(g_loss.item())

[Epoch 4999/5000] [Batch 0/46] [D loss: 0.064183] [G loss: 0.598119]: 100%|██████████| 5000/5000 [01:12<00:00, 68.73it/s]

In [16]:

Copied!





import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Discriminator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_d_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(d_loss_li)),d_loss_li,s=1)
ax.plot(range(len(d_loss_li)),d_loss_li)

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Discriminator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_d_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_d_cgan_dg.pdf',dpi=300,bbox_inches='tight')

In [17]:

Copied!





import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Generator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_g_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(range(len(g_loss_li)),g_loss_li,s=1)
ax.plot(range(len(g_loss_li)),g_loss_li,color=ov.utils.blue_color[0])

ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.grid(False)
#设置spines可视化情况
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

plt.title('Generator Loss\n(CGAN):Dentategyrus',fontsize=13)
plt.xlabel('Epoch',fontsize=13)
plt.ylabel('Loss',fontsize=13)

plt.savefig('figures/loss_g_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/loss_g_cgan_dg.pdf',dpi=300,bbox_inches='tight')

In [18]:

Copied!

Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape
Variable(FloatTensor(np.random.normal(0, 1, (100, latent_dim)))).shape

Out[18]:

torch.Size([100, 100])

In [19]:

Copied!





n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)
n_row=14
z = Variable(FloatTensor(np.random.normal(0, 1, (n_row * 100, latent_dim))))
# Get labels ranging from 0 to n_classes for n rows
labels = np.array([num for _ in range(100) for num in range(n_row)])
labels = Variable(LongTensor(labels))
gen_imgs = generator(z, labels)

In [20]:

Copied!

gen_imgs.max()
gen_imgs.max()

Out[20]:

tensor(9.9819, device='cuda:0', grad_fn=<MaxBackward1>)

In [21]:

Copied!

gen_imgs.min()
gen_imgs.min()

Out[21]:

tensor(-1.9895, device='cuda:0', grad_fn=<MinBackward1>)

In [22]:

Copied!





import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)


import anndata
test_adata=anndata.AnnData(np.array(gen_imgs.cpu().detach().numpy()))
rev_dict=dict(zip(label_mapping.values(),label_mapping.keys()))
test_adata.obs['celltype_num']=labels.cpu().detach().numpy()
test_adata.obs['celltype']=test_adata.obs['celltype_num'].map(rev_dict)

In [23]:

Copied!

test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata
test_adata.layers['scaled']=test_adata.X.copy()
ov.pp.pca(test_adata,layer='scaled',n_pcs=50)
test_adata

Out[23]:

AnnData object with n_obs × n_vars = 1400 × 12953
    obs: 'celltype_num', 'celltype'
    uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'scaled|original|X_pca'
    varm: 'scaled|original|pca_loadings'
    layers: 'scaled', 'lognorm'

In [24]:

Copied!

test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata
test_adata.obsm["X_mde"] = ov.utils.mde(test_adata.obsm["scaled|original|X_pca"])
test_adata

Out[24]:

AnnData object with n_obs × n_vars = 1400 × 12953
    obs: 'celltype_num', 'celltype'
    uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'scaled|original|X_pca', 'X_mde'
    varm: 'scaled|original|pca_loadings'
    layers: 'scaled', 'lognorm'

In [25]:

Copied!

test_adata.var.index=features.columns
test_adata.var.index=features.columns

In [27]:

Copied!

adata.uns['clusters_colors']
adata.uns['clusters_colors']

Out[27]:

array(['#3ba458', '#404040', '#7a7a7a', '#fda762', '#6950a3', '#2575b7',
       '#08306b', '#e1bfb0', '#e5d8bd', '#79b5d9', '#f14432', '#fc8a6a',
       '#98d594', '#d0e1f2'], dtype=object)

In [28]:

Copied!





sc.pl.embedding(test_adata,
                basis='X_mde',
           color=['celltype','Acta2'],
           wspace=0.4,
            palette=adata.uns['clusters_colors'].tolist()
               )
sc.pl.embedding(test_adata,
                basis='X_mde',
           color=['celltype','Acta2'],
           wspace=0.4,
            palette=adata.uns['clusters_colors'].tolist()
               )

In [29]:

Copied!

adata.obs['celltype']=adata.obs['clusters'].copy()
adata.obs['celltype']=adata.obs['clusters'].copy()

In [30]:

Copied!

ov.bulk2single.bulk2single_plot_correlation(test_adata,adata,celltype_key='celltype')
ov.bulk2single.bulk2single_plot_correlation(test_adata,adata,celltype_key='celltype')

ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)

Out[30]:

(<Figure size 480x480 with 2 Axes>,
 <AxesSubplot: title={'center': 'Expression correlation'}, xlabel='scRNA-seq reference', ylabel='deconvoluted bulk-seq'>)

In [32]:

Copied!

#adata.uns['log1p']['base']=10
#adata.uns['log1p']['base']=10

In [40]:

Copied!

adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)
adata_t=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)

Begin robust gene identification
After filtration, 13264/13913 genes are kept. Among 13264 genes, 13189 genes are robust.
End of robust gene identification.
Begin size normalization: shiftlog and HVGs selection pearson
normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt']
    finished (0:00:00)
extracting highly variable genes
--> added
    'highly_variable', boolean vector (adata.var)
    'highly_variable_rank', float vector (adata.var)
    'highly_variable_nbatches', int vector (adata.var)
    'highly_variable_intersection', boolean vector (adata.var)
    'means', float vector (adata.var)
    'variances', float vector (adata.var)
    'residual_variances', float vector (adata.var)
End of size normalization: shiftlog and HVGs selection pearson

In [41]:

Copied!

cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
                                           return_table=True)
cor_pd=ov.bulk2single.bulk2single_plot_correlation(adata_t,test_adata,celltype_key='celltype',
                                           return_table=True)

ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)

In [42]:

Copied!





#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))

# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()
#sc.tl.rank_genes_groups(single_data, celltype_key, method='wilcoxon')
marker_df = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(200)
#marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(generate_adata.var.index.tolist()))
marker = list(set(np.unique(np.ravel(np.array(marker_df))))&set(test_adata.var.index.tolist()))

# the mean expression of 200 marker genes of input sc data
sc_marker = adata[:,marker].to_df()
sc_marker['celltype'] = adata.obs['celltype']
sc_marker_mean = sc_marker.groupby('celltype')[marker].mean()

In [43]:

Copied!

sc_marker_mean=sc_marker_mean.T
sc_marker_mean=sc_marker_mean.T

In [44]:

Copied!

rf_ct = list(sc_marker_mean.columns)
rf_ct = list(sc_marker_mean.columns)

In [45]:

Copied!

cor_pd=pd.DataFrame(cor_pd,
                   index=rf_ct,
                   columns=rf_ct)
cor_pd=pd.DataFrame(cor_pd,
                   index=rf_ct,
                   columns=rf_ct)

In [46]:

Copied!





import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_cgan_dg.pdf',dpi=300,bbox_inches='tight')
import seaborn as sns
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cor_pd,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Expression correlation\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_expcor_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_expcor_cgan_dg.pdf',dpi=300,bbox_inches='tight')

In [47]:

Copied!

cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
                                  scores_type='logfoldchanges')
cmk1=ov.single.get_celltype_marker(test_adata,clustertype='celltype',
                                  scores_type='logfoldchanges')

...get cell type marker
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)

In [48]:

Copied!

adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)
adata_hpc1=ov.pp.preprocess(adata,mode='shiftlog|pearson',n_HVGs=3000,
                      target_sum=1e4)

Begin robust gene identification
After filtration, 13264/13264 genes are kept. Among 13264 genes, 13189 genes are robust.
End of robust gene identification.
Begin size normalization: shiftlog and HVGs selection pearson
normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['Hba-a1', 'Malat1', 'Ptgds', 'Hbb-bt']
    finished (0:00:00)
extracting highly variable genes
--> added
    'highly_variable', boolean vector (adata.var)
    'highly_variable_rank', float vector (adata.var)
    'highly_variable_nbatches', int vector (adata.var)
    'highly_variable_intersection', boolean vector (adata.var)
    'means', float vector (adata.var)
    'variances', float vector (adata.var)
    'residual_variances', float vector (adata.var)
End of size normalization: shiftlog and HVGs selection pearson

In [49]:

Copied!

cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
                                  scores_type='logfoldchanges')
cmk2=ov.single.get_celltype_marker(adata_hpc1,clustertype='celltype',
                                  scores_type='logfoldchanges')

...get cell type marker
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)

In [97]:

Copied!





cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(test_adata, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk1[clt]=degs['names'][:300].tolist()
cmk1={}
for clt in test_adata.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(test_adata, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk1[clt]=degs['names'][:300].tolist()

In [98]:

Copied!





cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk2[clt]=degs['names'][:300].tolist()
cmk2={}
for clt in adata_hpc1.obs['celltype'].cat.categories:
    degs = sc.get.rank_genes_groups_df(adata_hpc1, group=clt, 
                                       key='rank_genes_groups', log2fc_min=2, 
                                       pval_cutoff=0.05)
    cmk2[clt]=degs['names'][:300].tolist()

In [99]:

Copied!





all_gene=[]
for clt in cmk1.keys():
    all_gene+=cmk1[clt]
for clt in cmk2.keys():
    all_gene+=cmk2[clt]
all_gene=list(set(all_gene))
all_gene=[]
for clt in cmk1.keys():
    all_gene+=cmk1[clt]
for clt in cmk2.keys():
    all_gene+=cmk2[clt]
all_gene=list(set(all_gene))

In [100]:

Copied!





cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
    cmk1_pd[clt]=0
    cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
    cmk2_pd[clt]=0
    cmk2_pd.loc[cmk2[clt],clt]=1
cmk1_pd=pd.DataFrame(index=all_gene)
for clt in cmk1.keys():
    cmk1_pd[clt]=0
    cmk1_pd.loc[cmk1[clt],clt]=1
cmk2_pd=pd.DataFrame(index=all_gene)
for clt in cmk2.keys():
    cmk2_pd[clt]=0
    cmk2_pd.loc[cmk2[clt],clt]=1

In [101]:

Copied!





from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
                      columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
    for clt2 in cmk1.keys():
    #print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
        plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])
from scipy import spatial
plot_data=pd.DataFrame(index=test_adata.obs['celltype'].cat.categories,
                      columns=test_adata.obs['celltype'].cat.categories)
for clt1 in cmk1.keys():
    for clt2 in cmk1.keys():
    #print(clt,1 - spatial.distance.cosine(cmk1_pd['B'], cmk2_pd[clt]))
        plot_data.loc[clt1,clt2]=1 - spatial.distance.cosine(cmk1_pd[clt1], cmk2_pd[clt2])

In [102]:

Copied!

plot_data=plot_data.astype(float)
plot_data=plot_data.astype(float)

In [103]:

Copied!





fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_cgan_dg.pdf',dpi=300,bbox_inches='tight')
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(plot_data,cmap='RdBu_r',cbar_kws={'shrink':0.5},
           square=True,xticklabels=True,yticklabels=True,vmax=0.1)
plt.xlabel("scRNA-seq reference")
plt.ylabel("Deconvoluted bulk-seq")
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
#plt.colorbar(im)
ax.set_title("Cosine similarity\n(CGAN):Dentategyrus")
plt.savefig('figures/heatmap_cossim_cgan_dg.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/heatmap_cossim_cgan_dg.pdf',dpi=300,bbox_inches='tight')

In [58]:

Copied!

adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3
adata3=test_adata[test_adata.obs['celltype']=='OPC']
adata3

Out[58]:

View of AnnData object with n_obs × n_vars = 100 × 12953
    obs: 'celltype_num', 'celltype'
    uns: 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues', 'celltype_colors', 'rank_genes_groups'
    obsm: 'scaled|original|X_pca', 'X_mde'
    varm: 'scaled|original|pca_loadings'
    layers: 'scaled', 'lognorm'

In [64]:

Copied!

bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']
bulktb.vae_model.single_data.obs['celltype']=bulktb.vae_model.single_data.obs['clusters']

In [67]:

Copied!





import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
                     merge='same')
adata1
import anndata
adata1=anndata.concat([bulktb.vae_model.single_data,adata3],
                     merge='same')
adata1

Out[67]:

AnnData object with n_obs × n_vars = 3030 × 12953
    obs: 'celltype'

In [68]:

Copied!

adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]
adata1.raw = adata1
sc.pp.highly_variable_genes(adata1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata1 = adata1[:, adata1.var.highly_variable]

extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

In [69]:

Copied!

ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)
ov.pp.scale(adata1)
ov.pp.pca(adata1,layer='scaled',n_pcs=50)

... as `zero_center=True`, sparse input is densified and may lead to large memory consumption

In [70]:

Copied!

adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1
adata1.obsm["X_mde"] = ov.utils.mde(adata1.obsm["scaled|original|X_pca"])
adata1

Out[70]:

AnnData object with n_obs × n_vars = 3030 × 1802
    obs: 'celltype'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'scaled|original|X_pca', 'X_mde'
    varm: 'scaled|original|pca_loadings'
    layers: 'scaled', 'lognorm'

In [71]:

Copied!





ov.utils.embedding(adata1,
                basis='X_mde',
           color=['celltype'],title='Dentategyrus Cell Type (CGAN)',
                   frameon='small',show=False,
           wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
plt.savefig('figures/umap_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_dg_cgan.pdf',dpi=300,bbox_inches='tight')
ov.utils.embedding(adata1,
                basis='X_mde',
           color=['celltype'],title='Dentategyrus Cell Type (CGAN)',
                   frameon='small',show=False,
           wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
plt.savefig('figures/umap_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/umap_dg_cgan.pdf',dpi=300,bbox_inches='tight')

In [72]:

Copied!

adata2=bulktb.vae_model.single_data.copy()
adata2=bulktb.vae_model.single_data.copy()

In [73]:

Copied!

adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]
adata2.raw = adata2
sc.pp.highly_variable_genes(adata2, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata2 = adata2[:, adata2.var.highly_variable]

extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

In [74]:

Copied!

ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)
ov.pp.scale(adata2)
ov.pp.pca(adata2,layer='scaled',n_pcs=50)

... as `zero_center=True`, sparse input is densified and may lead to large memory consumption

In [75]:

Copied!

adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2
adata2.obsm["X_mde"] = ov.utils.mde(adata2.obsm["scaled|original|X_pca"])
adata2

Out[75]:

AnnData object with n_obs × n_vars = 2930 × 2254
    obs: 'clusters', 'age(days)', 'clusters_enlarged', 'celltype'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'clusters_colors', 'log1p', 'hvg', 'scaled|original|pca_var_ratios', 'scaled|original|cum_sum_eigenvalues'
    obsm: 'X_umap', 'scaled|original|X_pca', 'X_mde'
    varm: 'scaled|original|pca_loadings'
    layers: 'ambiguous', 'spliced', 'unspliced', 'scaled', 'lognorm'

In [76]:

Copied!





sc.pl.embedding(adata2,
                basis='X_mde',
           color=['celltype'],
           wspace=0.4,palette=adata.uns['clusters_colors'].tolist())
sc.pl.embedding(adata2,
                basis='X_mde',
           color=['celltype'],
           wspace=0.4,palette=adata.uns['clusters_colors'].tolist())

In [77]:

Copied!





v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
                    dataset='group')
v0.run()
v0 = ov.single.pyVIA(adata=adata2,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
                    dataset='group')
v0.run()

2023-10-05 14:34:43.856616	From root 26,  the Terminal state 36 is reached 5 times.
2023-10-05 14:34:45.015598	From root 26,  the Terminal state 37 is reached 184 times.
2023-10-05 14:34:45.941438	From root 26,  the Terminal state 38 is reached 346 times.
2023-10-05 14:34:47.343073	From root 26,  the Terminal state 39 is reached 45 times.
2023-10-05 14:34:48.557044	From root 26,  the Terminal state 41 is reached 155 times.
2023-10-05 14:34:49.734404	From root 26,  the Terminal state 45 is reached 177 times.
2023-10-05 14:34:51.109114	From root 26,  the Terminal state 46 is reached 45 times.
2023-10-05 14:34:52.566552	From root 26,  the Terminal state 50 is reached 11 times.
2023-10-05 14:34:53.956050	From root 26,  the Terminal state 51 is reached 49 times.
2023-10-05 14:34:53.986477	Terminal clusters corresponding to unique lineages are {8: 'Granule mature', 9: 'Microglia', 11: 'Astrocytes', 13: 'Endothelial', 15: 'Astrocytes', 16: 'Radial Glia-like', 28: 'Endothelial', 29: 'Endothelial', 30: 'Granule mature', 31: 'Granule mature', 32: 'Granule mature', 33: 'Granule mature', 34: 'Granule mature', 36: 'Granule mature', 37: 'Granule mature', 38: 'Granule mature', 39: 'Granule mature', 41: 'Granule mature', 45: 'Granule immature', 46: 'Granule mature', 50: 'Granule mature', 51: 'Granule mature'}
2023-10-05 14:34:53.986520	Begin projection of pseudotime and lineage likelihood
2023-10-05 14:34:54.431027	Graph has 1 connected components before pruning
2023-10-05 14:34:54.433313	Graph has 24 connected components after pruning
2023-10-05 14:34:54.447372	Graph has 1 connected components after reconnecting
2023-10-05 14:34:54.447843	60.1% links trimmed from local pruning relative to start
2023-10-05 14:34:54.447866	66.8% links trimmed from global pruning relative to start
2023-10-05 14:34:54.450036	Start making edgebundle milestone...
2023-10-05 14:34:54.450060	Start finding milestones
2023-10-05 14:34:54.989903	End milestones
2023-10-05 14:34:54.989959	Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object
2023-10-05 14:34:54.994086	Recompute weights
2023-10-05 14:34:55.008398	pruning milestone graph based on recomputed weights
2023-10-05 14:34:55.009266	Graph has 1 connected components before pruning
2023-10-05 14:34:55.009824	Graph has 11 connected components after pruning
2023-10-05 14:34:55.017485	Graph has 1 connected components after reconnecting
2023-10-05 14:34:55.018179	67.6% links trimmed from global pruning relative to start
2023-10-05 14:34:55.018205	regenerate igraph on pruned edges
2023-10-05 14:34:55.025160	Setting numeric label as single cell pseudotime for coloring edges
2023-10-05 14:34:55.034682	Making smooth edges
2023-10-05 14:34:55.626372	Time elapsed 37.0 seconds

In [83]:

Copied!





import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v0.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Raw Dentategyrus',fontsize=12)
plt.savefig('figures/via_dg_raw.png',dpi=300,bbox_inches='tight')

In [79]:

Copied!





v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')
v0.get_pseudotime(adata2)
sc.pp.neighbors(adata2,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata2,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')

...the pseudotime of VIA added to AnnData obs named `pt_via`
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
running PAGA using priors: ['pt_via']
    finished
added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns)
    'paga/transitions_confidence', velocity transitions (adata.uns)

In [80]:

Copied!

raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
            index=adata2.obs['celltype'].cat.categories,
            columns=adata2.obs['celltype'].cat.categories)
raw_transitions=pd.DataFrame(adata2.uns['paga']['transitions_confidence'].toarray(),
            index=adata2.obs['celltype'].cat.categories,
            columns=adata2.obs['celltype'].cat.categories)

In [81]:

Copied!

raw_transitions.loc['nIPC','OPC']
raw_transitions.loc['nIPC','OPC']

Out[81]:

0.0

In [82]:

Copied!





v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
                     #jac_std_global=0.01,
                    dataset='group')

v1.run()
v1 = ov.single.pyVIA(adata=adata1,adata_key='scaled|original|X_pca',adata_ncomps=100, basis='X_mde',
                         clusters='celltype',knn=15,random_seed=4,root_user=['nIPC'],
                     #jac_std_global=0.01,
                    dataset='group')

v1.run()

2023-10-05 14:35:58.663237	From root 23,  the Terminal state 19 is reached 10 times.
2023-10-05 14:36:03.266199	From root 23,  the Terminal state 21 is reached 15 times.
2023-10-05 14:36:07.796891	From root 23,  the Terminal state 24 is reached 38 times.
2023-10-05 14:36:12.414739	From root 23,  the Terminal state 25 is reached 19 times.
2023-10-05 14:36:17.159413	From root 23,  the Terminal state 29 is reached 5 times.
2023-10-05 14:36:21.469924	From root 23,  the Terminal state 30 is reached 72 times.
2023-10-05 14:36:25.833879	From root 23,  the Terminal state 32 is reached 51 times.
2023-10-05 14:36:30.351587	From root 23,  the Terminal state 33 is reached 49 times.
2023-10-05 14:36:35.023002	From root 23,  the Terminal state 35 is reached 5 times.
2023-10-05 14:36:39.681285	From root 23,  the Terminal state 36 is reached 20 times.
2023-10-05 14:36:43.998816	From root 23,  the Terminal state 38 is reached 58 times.
2023-10-05 14:36:48.744762	From root 23,  the Terminal state 39 is reached 5 times.
2023-10-05 14:36:53.343414	From root 23,  the Terminal state 42 is reached 19 times.
2023-10-05 14:36:58.134378	From root 23,  the Terminal state 44 is reached 5 times.
2023-10-05 14:37:02.731043	From root 23,  the Terminal state 45 is reached 9 times.
2023-10-05 14:37:06.841653	From root 23,  the Terminal state 48 is reached 108 times.
2023-10-05 14:37:11.482964	From root 23,  the Terminal state 49 is reached 9 times.
2023-10-05 14:37:16.096431	From root 23,  the Terminal state 50 is reached 24 times.
2023-10-05 14:37:20.681880	From root 23,  the Terminal state 52 is reached 28 times.
2023-10-05 14:37:25.452697	From root 23,  the Terminal state 53 is reached 5 times.
2023-10-05 14:37:30.074774	From root 23,  the Terminal state 55 is reached 27 times.
2023-10-05 14:37:34.431676	From root 23,  the Terminal state 57 is reached 47 times.
2023-10-05 14:37:39.106907	From root 23,  the Terminal state 60 is reached 5 times.
2023-10-05 14:37:43.885457	From root 23,  the Terminal state 61 is reached 5 times.
2023-10-05 14:37:48.623371	From root 23,  the Terminal state 63 is reached 6 times.
2023-10-05 14:37:53.374386	From root 23,  the Terminal state 64 is reached 5 times.
2023-10-05 14:37:58.198307	From root 23,  the Terminal state 65 is reached 5 times.
2023-10-05 14:38:02.913084	From root 23,  the Terminal state 69 is reached 5 times.
2023-10-05 14:38:07.715554	From root 23,  the Terminal state 70 is reached 5 times.
2023-10-05 14:38:12.123414	From root 23,  the Terminal state 71 is reached 47 times.
2023-10-05 14:38:16.857963	From root 23,  the Terminal state 74 is reached 5 times.
2023-10-05 14:38:21.524940	From root 23,  the Terminal state 75 is reached 5 times.
2023-10-05 14:38:26.323582	From root 23,  the Terminal state 76 is reached 5 times.
2023-10-05 14:38:30.997429	From root 23,  the Terminal state 80 is reached 5 times.
2023-10-05 14:38:35.679410	From root 23,  the Terminal state 81 is reached 5 times.
2023-10-05 14:38:40.457038	From root 23,  the Terminal state 84 is reached 5 times.
2023-10-05 14:38:45.001947	From root 23,  the Terminal state 85 is reached 31 times.
2023-10-05 14:38:49.680440	From root 23,  the Terminal state 87 is reached 7 times.
2023-10-05 14:38:54.470059	From root 23,  the Terminal state 88 is reached 5 times.
2023-10-05 14:38:59.067233	From root 23,  the Terminal state 89 is reached 15 times.
2023-10-05 14:39:03.829355	From root 23,  the Terminal state 90 is reached 5 times.
2023-10-05 14:39:08.484737	From root 23,  the Terminal state 91 is reached 12 times.
2023-10-05 14:39:13.155010	From root 23,  the Terminal state 92 is reached 9 times.
2023-10-05 14:39:17.828664	From root 23,  the Terminal state 93 is reached 17 times.
2023-10-05 14:39:21.807515	From root 23,  the Terminal state 94 is reached 111 times.
2023-10-05 14:39:26.477969	From root 23,  the Terminal state 95 is reached 24 times.
2023-10-05 14:39:31.107008	From root 23,  the Terminal state 97 is reached 8 times.
2023-10-05 14:39:35.904234	From root 23,  the Terminal state 98 is reached 5 times.
2023-10-05 14:39:40.651907	From root 23,  the Terminal state 99 is reached 5 times.
2023-10-05 14:39:45.154225	From root 23,  the Terminal state 101 is reached 37 times.
2023-10-05 14:39:49.809550	From root 23,  the Terminal state 102 is reached 21 times.
2023-10-05 14:39:53.979180	From root 23,  the Terminal state 104 is reached 81 times.
2023-10-05 14:39:58.500037	From root 23,  the Terminal state 105 is reached 43 times.
2023-10-05 14:40:03.147723	From root 23,  the Terminal state 107 is reached 5 times.
2023-10-05 14:40:07.907688	From root 23,  the Terminal state 110 is reached 7 times.
2023-10-05 14:40:12.166664	From root 23,  the Terminal state 112 is reached 91 times.
2023-10-05 14:40:16.902402	From root 23,  the Terminal state 114 is reached 5 times.
2023-10-05 14:40:21.636045	From root 23,  the Terminal state 115 is reached 9 times.
2023-10-05 14:40:26.348098	From root 23,  the Terminal state 116 is reached 5 times.
2023-10-05 14:40:31.087912	From root 23,  the Terminal state 118 is reached 12 times.
2023-10-05 14:40:35.730262	From root 23,  the Terminal state 119 is reached 10 times.
2023-10-05 14:40:40.464309	From root 23,  the Terminal state 120 is reached 5 times.
2023-10-05 14:40:45.214682	From root 23,  the Terminal state 122 is reached 5 times.
2023-10-05 14:40:49.929113	From root 23,  the Terminal state 123 is reached 5 times.
2023-10-05 14:40:54.717699	From root 23,  the Terminal state 124 is reached 5 times.
2023-10-05 14:40:59.374741	From root 23,  the Terminal state 125 is reached 8 times.
2023-10-05 14:41:04.186438	From root 23,  the Terminal state 127 is reached 5 times.
2023-10-05 14:41:08.382792	From root 23,  the Terminal state 128 is reached 96 times.
2023-10-05 14:41:13.104706	From root 23,  the Terminal state 130 is reached 6 times.
2023-10-05 14:41:17.821524	From root 23,  the Terminal state 132 is reached 10 times.
2023-10-05 14:41:22.521401	From root 23,  the Terminal state 133 is reached 5 times.
2023-10-05 14:41:27.098956	From root 23,  the Terminal state 135 is reached 14 times.
2023-10-05 14:41:31.437575	From root 23,  the Terminal state 141 is reached 74 times.
2023-10-05 14:41:35.959400	From root 23,  the Terminal state 146 is reached 29 times.
2023-10-05 14:41:35.994535	component number 1 out of  [0, 1]
2023-10-05 14:41:35.996654\group root method
2023-10-05 14:41:35.996669\setting a dummy root
2023-10-05 14:41:35.996886	New root is 13 and majority OPC
2023-10-05 14:41:35.996940	Computing lazy-teleporting expected hitting times
2023-10-05 14:41:36.318038	Identifying terminal clusters corresponding to unique lineages...
2023-10-05 14:41:36.318118	Closeness:[]
2023-10-05 14:41:36.318128	Betweenness:[]
2023-10-05 14:41:36.318133	Out Degree:[1]
remove the [0:2] just using to speed up testing
2023-10-05 14:41:36.318625	Terminal clusters corresponding to unique lineages in this component are [1] 
2023-10-05 14:41:36.523495	From root 0,  the Terminal state 1 is reached 650 times.
2023-10-05 14:41:36.553477	Terminal clusters corresponding to unique lineages are {5: 'Microglia', 7: 'Mossy', 11: 'Endothelial', 21: 'Cajal Retzius', 23: 'Cck-Tox', 26: 'Granule mature', 27: 'Endothelial', 31: 'Granule mature', 32: 'Granule mature', 34: 'Granule mature', 35: 'Granule mature', 37: 'Granule immature', 38: 'Granule mature', 40: 'Granule mature', 41: 'Granule mature', 44: 'Granule mature', 46: 'Granule immature', 47: 'Granule mature', 50: 'Granule mature', 51: 'Granule mature', 52: 'Granule mature', 54: 'Granule mature', 55: 'Granule mature', 57: 'Granule mature', 59: 'Granule immature', 62: 'Granule immature', 63: 'Granule mature', 65: 'Granule mature', 66: 'Granule immature', 67: 'Granule mature', 71: 'Granule mature', 72: 'Granule mature', 73: 'Granule mature', 76: 'Granule immature', 77: 'Granule mature', 78: 'Granule mature', 82: 'Granule mature', 83: 'Granule mature', 86: 'Granule mature', 87: 'Granule mature', 89: 'Granule mature', 90: 'Granule mature', 91: 'Granule immature', 92: 'Granule mature', 93: 'Granule mature', 94: 'Granule mature', 95: 'Granule mature', 96: 'Granule mature', 97: 'Granule immature', 99: 'Granule mature', 100: 'Granule immature', 101: 'Granule mature', 103: 'Granule immature', 104: 'Granule immature', 106: 'Granule mature', 107: 'Granule mature', 109: 'Granule mature', 112: 'Granule mature', 114: 'Granule mature', 116: 'Granule mature', 117: 'Granule mature', 118: 'Granule mature', 120: 'Granule immature', 121: 'Granule mature', 122: 'Granule mature', 124: 'Granule immature', 125: 'Granule immature', 126: 'Granule mature', 127: 'Granule mature', 129: 'Granule mature', 130: 'Granule mature', 132: 'Granule mature', 134: 'Granule mature', 135: 'Granule mature', 137: 'Granule mature', 143: 'Granule mature', 148: 'Granule mature', 18: 'OPC'}
2023-10-05 14:41:36.553544	Begin projection of pseudotime and lineage likelihood
2023-10-05 14:41:37.040669	Graph has 2 connected components before pruning
2023-10-05 14:41:37.044858	Graph has 60 connected components after pruning
2023-10-05 14:41:37.076580	Graph has 17 connected components after reconnecting
handling intersection condition where a singleton cluster 31 without edges exists
handling intersection condition where a singleton cluster 41 without edges exists
handling intersection condition where a singleton cluster 46 without edges exists
handling intersection condition where a singleton cluster 55 without edges exists
handling intersection condition where a singleton cluster 62 without edges exists
handling intersection condition where a singleton cluster 76 without edges exists
handling intersection condition where a singleton cluster 77 without edges exists
handling intersection condition where a singleton cluster 82 without edges exists
handling intersection condition where a singleton cluster 90 without edges exists
handling intersection condition where a singleton cluster 101 without edges exists
handling intersection condition where a singleton cluster 116 without edges exists
handling intersection condition where a singleton cluster 118 without edges exists
handling intersection condition where a singleton cluster 126 without edges exists
handling intersection condition where a singleton cluster 135 without edges exists
2023-10-05 14:41:37.077313	64.8% links trimmed from local pruning relative to start
2023-10-05 14:41:37.077337	73.2% links trimmed from global pruning relative to start
2023-10-05 14:41:37.081372	Start making edgebundle milestone...
2023-10-05 14:41:37.081401	Start finding milestones
2023-10-05 14:41:37.693801	End milestones
2023-10-05 14:41:37.693864	Will use via-pseudotime for edges, otherwise consider providing a list of numeric labels (single cell level) or via_object
2023-10-05 14:41:37.698523	Recompute weights
2023-10-05 14:41:37.718243	pruning milestone graph based on recomputed weights
2023-10-05 14:41:37.719290	Graph has 2 connected components before pruning
2023-10-05 14:41:37.719974	Graph has 12 connected components after pruning
2023-10-05 14:41:37.729587	Graph has 2 connected components after reconnecting
2023-10-05 14:41:37.730507	66.2% links trimmed from global pruning relative to start
2023-10-05 14:41:37.730549	regenerate igraph on pruned edges
2023-10-05 14:41:37.750337	Setting numeric label as single cell pseudotime for coloring edges
2023-10-05 14:41:37.766982	Making smooth edges
2023-10-05 14:41:38.468654	Time elapsed 374.2 seconds

In [84]:

Copied!





import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (CGAN)',fontsize=12)
plt.savefig('figures/via_dg_cgan.png',dpi=300,bbox_inches='tight')
import matplotlib.pyplot as plt
fig,ax=v1.plot_stream(basis='X_mde',clusters='celltype',
               density_grid=0.8, scatter_size=30, scatter_alpha=0.3, linewidth=0.5)
plt.title('Dentategyrus (CGAN)',fontsize=12)
plt.savefig('figures/via_dg_cgan.png',dpi=300,bbox_inches='tight')

In [85]:

Copied!

ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
ov.utils.plot_paga(adata2,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)

WARNING: Invalid color key. Using grey instead.

Out[85]:

<AxesSubplot: title={'center': 'PAGA LTNN-graph'}>

In [86]:

Copied!





v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')
v1.get_pseudotime(adata1)
sc.pp.neighbors(adata1,n_neighbors= 15,use_rep='scaled|original|X_pca')
ov.utils.cal_paga(adata1,use_time_prior='pt_via',vkey='paga',
                 groups='celltype')

...the pseudotime of VIA added to AnnData obs named `pt_via`
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running PAGA using priors: ['pt_via']
    finished
added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns)
    'paga/transitions_confidence', velocity transitions (adata.uns)

In [87]:

Copied!

after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
            index=adata1.obs['celltype'].cat.categories,
            columns=adata1.obs['celltype'].cat.categories)
after_transitions=pd.DataFrame(adata1.uns['paga']['transitions_confidence'].toarray(),
            index=adata1.obs['celltype'].cat.categories,
            columns=adata1.obs['celltype'].cat.categories)

In [88]:

Copied!





ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (CGAN)',fontsize=13)
plt.savefig('figures/paga_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_cgan.pdf',dpi=300,bbox_inches='tight')
ov.utils.plot_paga(adata1,basis='mde', size=50, alpha=.1,title='PAGA LTNN-graph',
            min_edge_width=2, node_size_scale=1.5,show=False,legend_loc=False)
plt.title('PAGA Dentategyrus (CGAN)',fontsize=13)
plt.savefig('figures/paga_dg_cgan.png',dpi=300,bbox_inches='tight')
plt.savefig('pdf/paga_dg_cgan.pdf',dpi=300,bbox_inches='tight')

WARNING: Invalid color key. Using grey instead.

In [89]:

Copied!

import seaborn as sns
sns.kdeplot(adata1.obs.loc[adata1.obs['celltype']=='OPC'],x='pt_via')
plt.xlim(0,1)
import seaborn as sns
sns.kdeplot(adata1.obs.loc[adata1.obs['celltype']=='OPC'],x='pt_via')
plt.xlim(0,1)

Out[89]:

(0.0, 1.0)

In [90]:

Copied!

np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])

Out[90]:

0.2050766394636122

In [91]:

Copied!

import seaborn as sns
sns.kdeplot(adata2.obs.loc[adata2.obs['celltype']=='OPC'],x='pt_via')
plt.xlim(0,1)
import seaborn as sns
sns.kdeplot(adata2.obs.loc[adata2.obs['celltype']=='OPC'],x='pt_via')
plt.xlim(0,1)

Out[91]:

(0.0, 1.0)

In [93]:

Copied!

np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])

Out[93]:

0.0008610680040077388

In [104]:

Copied!





res_dict={}

#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean

#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean

#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()


#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])
res_dict={}

#Cor:exp
# 计算对角线均值
diagonal_mean = np.trace(cor_pd.values) / len(cor_pd)
# 计算非对角线均值
non_diagonal_mean = (np.sum(cor_pd.values) - np.trace(cor_pd.values)) / (len(cor_pd)**2 - len(cor_pd))

res_dict['Cor_mean']=diagonal_mean
res_dict['non_Cor_mean']=non_diagonal_mean

#Cos:gene
# 计算对角线均值
diagonal_mean = np.trace(plot_data.values) / len(plot_data)

# 计算非对角线均值
non_diagonal_mean = (np.sum(plot_data.values) - np.trace(plot_data.values)) / (len(plot_data)**2 - len(plot_data))

res_dict['Cos_mean']=diagonal_mean
res_dict['non_Cos_mean']=non_diagonal_mean

#raw:trans
res_dict['Trans_raw']=raw_transitions.loc['OPC'].max()
res_dict['Trans_after']=after_transitions.loc['OPC'].max()


#Variance
res_dict['Var_raw']=np.var(adata2.obs.loc[adata2.obs['celltype']=='OPC','pt_via'])
res_dict['Var_after']=np.var(adata1.obs.loc[adata1.obs['celltype']=='OPC','pt_via'])

In [105]:

Copied!

res_dict
res_dict

Out[105]:

{'Cor_mean': 0.8853872742612411,
 'non_Cor_mean': 0.5940097187211791,
 'Cos_mean': 0.1824074221901813,
 'non_Cos_mean': 0.013552926489814078,
 'Trans_raw': 0.012605069833265089,
 'Trans_after': 0.01236477520165905,
 'Var_raw': 0.0008610680040077388,
 'Var_after': 0.2050766394636122}

In [106]:

Copied!

import pickle
with open('result/metric_cgan_dg.pkl','wb') as f:
    pickle.dump(res_dict,f)
import pickle
with open('result/metric_cgan_dg.pkl','wb') as f:
    pickle.dump(res_dict,f)

In [107]:

Copied!

with open('result/metric_cgan_dg.pkl','rb') as f:
    res_dict=pickle.load(f)
res_dict
with open('result/metric_cgan_dg.pkl','rb') as f:
    res_dict=pickle.load(f)
res_dict

Out[107]:

{'Cor_mean': 0.8853872742612411,
 'non_Cor_mean': 0.5940097187211791,
 'Cos_mean': 0.1824074221901813,
 'non_Cos_mean': 0.013552926489814078,
 'Trans_raw': 0.012605069833265089,
 'Trans_after': 0.01236477520165905,
 'Var_raw': 0.0008610680040077388,
 'Var_after': 0.2050766394636122}

In [ ]: