In [1]:
Copied!
from itertools import chain
import anndata as ad
import itertools
import networkx as nx
import pandas as pd
import scanpy as sc
import scglue
import seaborn as sns
from matplotlib import rcParams
from itertools import chain
import anndata as ad
import itertools
import networkx as nx
import pandas as pd
import scanpy as sc
import scglue
import seaborn as sns
from matplotlib import rcParams
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmp2e7ng3xz INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmp2e7ng3xz/_remote_module_non_scriptable.py
In [2]:
Copied!
import omicverse as ov
ov.utils.ov_plot_set()
import omicverse as ov
ov.utils.ov_plot_set()
____ _ _ __ / __ \____ ___ (_)___| | / /__ _____________ / / / / __ `__ \/ / ___/ | / / _ \/ ___/ ___/ _ \ / /_/ / / / / / / / /__ | |/ / __/ / (__ ) __/ \____/_/ /_/ /_/_/\___/ |___/\___/_/ /____/\___/ Version: 1.6.2, Tutorials: https://omicverse.readthedocs.io/
In [3]:
Copied!
rna = ad.read_h5ad("rna-pp.h5ad")
atac = ad.read_h5ad("atac-pp.h5ad")
#guidance = nx.read_graphml("guidance.graphml.gz")
rna = ad.read_h5ad("rna-pp.h5ad")
atac = ad.read_h5ad("atac-pp.h5ad")
#guidance = nx.read_graphml("guidance.graphml.gz")
In [4]:
Copied!
rna.var_names_make_unique()
rna.obs_names_make_unique()
atac.var_names_make_unique()
atac.obs_names_make_unique()
rna.var_names_make_unique()
rna.obs_names_make_unique()
atac.var_names_make_unique()
atac.obs_names_make_unique()
In [9]:
Copied!
guidance = scglue.genomics.rna_anchored_guidance_graph(rna, atac)
guidance
guidance = scglue.genomics.rna_anchored_guidance_graph(rna, atac)
guidance
window_graph: 0%| | 0/36643 [00:00<?, ?it/s]
Out[9]:
<networkx.classes.multidigraph.MultiDiGraph at 0x7f24fdea1430>
In [13]:
Copied!
import networkx as nx
rna.write("rna-pp.h5ad", compression="gzip")
atac.write("atac-pp.h5ad", compression="gzip")
nx.write_graphml(guidance, "guidance.graphml.gz")
import networkx as nx
rna.write("rna-pp.h5ad", compression="gzip")
atac.write("atac-pp.h5ad", compression="gzip")
nx.write_graphml(guidance, "guidance.graphml.gz")
In [5]:
Copied!
scglue.models.configure_dataset(
rna, "NB", use_highly_variable=True,
use_layer="counts", use_rep="X_pca"
)
scglue.models.configure_dataset(
rna, "NB", use_highly_variable=True,
use_layer="counts", use_rep="X_pca"
)
In [6]:
Copied!
scglue.models.configure_dataset(
atac, "NB", use_highly_variable=True,
use_rep="X_lsi"
)
scglue.models.configure_dataset(
atac, "NB", use_highly_variable=True,
use_rep="X_lsi"
)
In [10]:
Copied!
guidance_hvf = guidance.subgraph(chain(
rna.var.query("highly_variable").index,
atac.var.query("highly_variable").index
)).copy()
guidance_hvf = guidance.subgraph(chain(
rna.var.query("highly_variable").index,
atac.var.query("highly_variable").index
)).copy()
In [ ]:
Copied!
glue = scglue.models.fit_SCGLUE(
{"rna": rna, "atac": atac}, guidance_hvf,
fit_kws={"directory": "glue"}
)
glue = scglue.models.fit_SCGLUE(
{"rna": rna, "atac": atac}, guidance_hvf,
fit_kws={"directory": "glue"}
)
[INFO] fit_SCGLUE: Pretraining SCGLUE model... [INFO] autodevice: Using GPU 0 as computation device. [INFO] check_graph: Checking variable coverage... [INFO] check_graph: Checking edge attributes... [INFO] check_graph: Checking self-loops... [INFO] check_graph: Checking graph symmetry... [INFO] SCGLUEModel: Setting `graph_batch_size` = 61738 [INFO] SCGLUEModel: Setting `max_epochs` = 48 [INFO] SCGLUEModel: Setting `patience` = 4 [INFO] SCGLUEModel: Setting `reduce_lr_patience` = 2 [INFO] SCGLUETrainer: Using training directory: "glue/pretrain"
In [12]:
Copied!
glue.save("glue.dill")
# glue = scglue.models.load_model("glue.dill")
glue.save("glue.dill")
# glue = scglue.models.load_model("glue.dill")
In [14]:
Copied!
dx = scglue.models.integration_consistency(
glue, {"rna": rna, "atac": atac}, guidance_hvf
)
dx
dx = scglue.models.integration_consistency(
glue, {"rna": rna, "atac": atac}, guidance_hvf
)
dx
[INFO] integration_consistency: Using layer "counts" for modality "rna" [INFO] integration_consistency: Selecting aggregation "sum" for modality "rna" [INFO] integration_consistency: Selecting aggregation "sum" for modality "atac" [INFO] integration_consistency: Selecting log-norm preprocessing for modality "rna" [INFO] integration_consistency: Selecting log-norm preprocessing for modality "atac" [INFO] get_metacells: Clustering metacells...
[WARNING] get_metacells: `faiss` is not installed, using `sklearn` instead... This might be slow with a large number of cells. Consider installing `faiss` following the guide from https://github.com/facebookresearch/faiss/blob/main/INSTALL.md
[INFO] get_metacells: Aggregating metacells... [INFO] metacell_corr: Computing correlation on 10 common metacells... normalizing counts per cell finished (0:00:00) normalizing counts per cell finished (0:00:00) [INFO] get_metacells: Clustering metacells...
[WARNING] get_metacells: `faiss` is not installed, using `sklearn` instead... This might be slow with a large number of cells. Consider installing `faiss` following the guide from https://github.com/facebookresearch/faiss/blob/main/INSTALL.md
[INFO] get_metacells: Aggregating metacells... [INFO] metacell_corr: Computing correlation on 20 common metacells... normalizing counts per cell finished (0:00:00) normalizing counts per cell finished (0:00:00) [INFO] get_metacells: Clustering metacells...
[WARNING] get_metacells: `faiss` is not installed, using `sklearn` instead... This might be slow with a large number of cells. Consider installing `faiss` following the guide from https://github.com/facebookresearch/faiss/blob/main/INSTALL.md
[INFO] get_metacells: Aggregating metacells... [INFO] metacell_corr: Computing correlation on 49 common metacells... normalizing counts per cell finished (0:00:00) normalizing counts per cell finished (0:00:00) [INFO] get_metacells: Clustering metacells...
[WARNING] get_metacells: `faiss` is not installed, using `sklearn` instead... This might be slow with a large number of cells. Consider installing `faiss` following the guide from https://github.com/facebookresearch/faiss/blob/main/INSTALL.md
[INFO] get_metacells: Aggregating metacells... [INFO] metacell_corr: Computing correlation on 99 common metacells... normalizing counts per cell finished (0:00:00) normalizing counts per cell finished (0:00:00) [INFO] get_metacells: Clustering metacells...
[WARNING] get_metacells: `faiss` is not installed, using `sklearn` instead... This might be slow with a large number of cells. Consider installing `faiss` following the guide from https://github.com/facebookresearch/faiss/blob/main/INSTALL.md
[INFO] get_metacells: Aggregating metacells... [INFO] metacell_corr: Computing correlation on 197 common metacells... normalizing counts per cell finished (0:00:00) normalizing counts per cell finished (0:00:00)
Out[14]:
n_meta | consistency | |
---|---|---|
0 | 10 | 0.459175 |
1 | 20 | 0.402021 |
2 | 50 | 0.327917 |
3 | 100 | 0.303282 |
4 | 200 | 0.246441 |
In [16]:
Copied!
rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)
rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)
In [18]:
Copied!
rna.obs['domain']='scRNA-seq'
atac.obs['domain']='scATAC-seq'
rna.obs['domain']='scRNA-seq'
atac.obs['domain']='scATAC-seq'
In [1]:
Copied!
import scanpy as sc
import scanpy as sc
In [2]:
Copied!
import scanpy as sc
rna=sc.read("rna-emb.h5ad")
atac=sc.read("atac-emb.h5ad")
rna,atac
import scanpy as sc
rna=sc.read("rna-emb.h5ad")
atac=sc.read("atac-emb.h5ad")
rna,atac
Out[2]:
(AnnData object with n_obs × n_vars = 61472 × 36643 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', '__scglue__', 'hvg', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_glue', 'X_pca', 'X_umap' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances', AnnData object with n_obs × n_vars = 130418 × 219070 obs: 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN', 'cluster', 'Cell.Type', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'highly_variable' uns: 'Cell.Type_colors', '__scglue__', 'neighbors', 'umap' obsm: 'X_glue', 'X_lsi', 'X_umap' varm: 'X_glue' obsp: 'connectivities', 'distances')
In [9]:
Copied!
import numpy as np
import numpy as np
Out[9]:
array([[90, 81, 64, 64], [50, 50, 22, 88], [73, 70, 81, 54], ..., [19, 67, 30, 5], [55, 63, 3, 31], [89, 96, 0, 18]])
In [12]:
Copied!
rna_com1=sc.AnnData(np.random.randint(0, 100, size=(61472, 1)),
obs=rna.obs)
rna_com1.obsm=rna.obsm.copy()
atac_com1=sc.AnnData(np.random.randint(0, 100, size=(130418, 1)),
obs=atac.obs)
atac_com1.obsm=atac.obsm.copy()
rna_com1=sc.AnnData(np.random.randint(0, 100, size=(61472, 1)),
obs=rna.obs)
rna_com1.obsm=rna.obsm.copy()
atac_com1=sc.AnnData(np.random.randint(0, 100, size=(130418, 1)),
obs=atac.obs)
atac_com1.obsm=atac.obsm.copy()
/tmp/ipykernel_622186/1062280131.py:1: FutureWarning: X.dtype being converted to np.float32 from int64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour. rna_com1=sc.AnnData(np.random.randint(0, 100, size=(61472, 1)), /tmp/ipykernel_622186/1062280131.py:5: FutureWarning: X.dtype being converted to np.float32 from int64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour. atac_com1=sc.AnnData(np.random.randint(0, 100, size=(130418, 1)),
In [13]:
Copied!
import anndata
combined1 = anndata.concat([rna_com1, atac_com1],merge='same')
import anndata
combined1 = anndata.concat([rna_com1, atac_com1],merge='same')
/mnt/home/zehuazeng/miniconda3/envs/omicverse/lib/python3.8/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`. utils.warn_names_duplicates("obs")
In [17]:
Copied!
sc.pp.neighbors(combined1, use_rep="X_glue", metric="cosine")
#sc.tl.umap(combined)
#sc.pl.umap(combined, color=["Cell.Type", "domain"], wspace=0.65)
sc.pp.neighbors(combined1, use_rep="X_glue", metric="cosine")
#sc.tl.umap(combined)
#sc.pl.umap(combined, color=["Cell.Type", "domain"], wspace=0.65)
In [ ]:
Copied!
In [18]:
Copied!
from scvi.model.utils import mde
combined1.obsm["X_mde"] = mde(combined1.obsm["X_glue"])
from scvi.model.utils import mde
combined1.obsm["X_mde"] = mde(combined1.obsm["X_glue"])
Global seed set to 0
In [22]:
Copied!
ov.plot_set()
ov.plot_set()
In [23]:
Copied!
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
ov.pl.embedding(
combined1,
basis="X_mde",
color=["domain"],
title='Domain',
frameon='small',
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_umap_domain.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_umap_domain.pdf",dpi=300,bbox_inches = 'tight')
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
ov.pl.embedding(
combined1,
basis="X_mde",
color=["domain"],
title='Domain',
frameon='small',
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_umap_domain.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_umap_domain.pdf",dpi=300,bbox_inches = 'tight')
In [24]:
Copied!
a1=combined1[combined1.obs['domain']=='scRNA-seq']
rna.obsm['X_mde']=a1[rna.obs.index].obsm['X_mde'].copy()
a1=combined1[combined1.obs['domain']=='scRNA-seq']
rna.obsm['X_mde']=a1[rna.obs.index].obsm['X_mde'].copy()
In [27]:
Copied!
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
ov.pl.embedding(
rna,
basis="X_mde",
color=["Cell.Type"],
title='Cell.Type',
frameon='small',
ncols=2,
palette=ov.utils.pyomic_palette()[:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_rna_umap_ct.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_rna_umap_ct.pdf",dpi=300,bbox_inches = 'tight')
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
ov.pl.embedding(
rna,
basis="X_mde",
color=["Cell.Type"],
title='Cell.Type',
frameon='small',
ncols=2,
palette=ov.utils.pyomic_palette()[:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_rna_umap_ct.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_rna_umap_ct.pdf",dpi=300,bbox_inches = 'tight')
In [ ]:
Copied!
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
combined1,
basis="X_mde",
color=["domain"],
title='Domain',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_umap_domain.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_umap_domain.pdf",dpi=300,bbox_inches = 'tight')
import matplotlib.pyplot as plt
import omicverse as ov
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
combined1,
basis="X_mde",
color=["domain"],
title='Domain',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
ax=ax,
)
plt.savefig("figures/mofa_umap_domain.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_umap_domain.pdf",dpi=300,bbox_inches = 'tight')
In [23]:
Copied!
feature_embeddings = glue.encode_graph(guidance_hvf)
feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices)
feature_embeddings.iloc[:5, :5]
feature_embeddings = glue.encode_graph(guidance_hvf)
feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices)
feature_embeddings.iloc[:5, :5]
Out[23]:
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
A2M | -0.193446 | -0.342415 | 0.119067 | 0.268406 | 0.034014 |
AADACL2-AS1 | -0.448315 | -0.129871 | 0.796907 | 0.016984 | -0.639139 |
AADACP1 | -0.235093 | 0.167198 | 0.123094 | 0.015190 | -0.094391 |
ABCA1 | -0.154965 | 0.187572 | -0.104705 | -0.050889 | -0.031023 |
ABCA10 | -0.264874 | 0.188973 | 0.009662 | 0.136203 | -0.126329 |
In [24]:
Copied!
rna.varm["X_glue"] = feature_embeddings.reindex(rna.var_names).to_numpy()
atac.varm["X_glue"] = feature_embeddings.reindex(atac.var_names).to_numpy()
rna.varm["X_glue"] = feature_embeddings.reindex(rna.var_names).to_numpy()
atac.varm["X_glue"] = feature_embeddings.reindex(atac.var_names).to_numpy()
In [25]:
Copied!
rna.write("rna-emb.h5ad", compression="gzip")
atac.write("atac-emb.h5ad", compression="gzip")
nx.write_graphml(guidance_hvf, "guidance-hvf.graphml.gz")
rna.write("rna-emb.h5ad", compression="gzip")
atac.write("atac-emb.h5ad", compression="gzip")
nx.write_graphml(guidance_hvf, "guidance-hvf.graphml.gz")
In [32]:
Copied!
atac
atac
Out[32]:
AnnData object with n_obs × n_vars = 130418 × 219070 obs: 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN', 'cluster', 'Cell.Type', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'highly_variable' uns: 'Cell.Type_colors', 'neighbors', 'umap', '__scglue__' obsm: 'X_lsi', 'X_umap', 'X_glue' varm: 'X_glue' obsp: 'connectivities', 'distances'
In [8]:
Copied!
rna=sc.read("rna-emb.h5ad", compression="gzip")
atac=sc.read("atac-emb.h5ad", compression="gzip")
rna=sc.read("rna-emb.h5ad", compression="gzip")
atac=sc.read("atac-emb.h5ad", compression="gzip")
In [9]:
Copied!
rna
rna
Out[9]:
AnnData object with n_obs × n_vars = 61472 × 36643 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', '__scglue__', 'hvg', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_glue', 'X_pca', 'X_umap' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances'
In [10]:
Copied!
sc.pl.embedding(
rna,
basis="X_umap",
color=["domain"],
title='Domain',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
#ax=ax,
)
sc.pl.embedding(
rna,
basis="X_umap",
color=["domain"],
title='Domain',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
#ax=ax,
)
GLUE pair¶
In [38]:
Copied!
from tqdm import tqdm,trange
import anndata
def glue_pair(rna:anndata.AnnData,
atac:anndata.AnnData,depth:int=20)->pd.DataFrame:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
#提取GLUE层结果
print('......Extract GLUE layer from obs')
rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
#对GLUE层进行Pearson系数分析
print('......Prepare for pair')
import gc
len1=(len(rna_loc)//5000)+1
len2=(len(atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=rna_loc.iloc[5000*(i):5000*(i+1)]
t2=atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc)))
del c
gc.collect()
#寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
def find_neighbor_cell(p_pd,n_pd,depth=10):
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
return result
print('......Start to find neighbor')
res_pair=find_neighbor_cell(p_pd,n_pd,depth=depth)
return res_pair
res_pair=glue_pair(rna,atac,depth=10)
from tqdm import tqdm,trange
import anndata
def glue_pair(rna:anndata.AnnData,
atac:anndata.AnnData,depth:int=20)->pd.DataFrame:
r"""
Pair the cells between RNA and ATAC using result of GLUE.
Arguments:
rna: the AnnData of RNA-seq.
atac: the AnnData of ATAC-seq.
depth: the depth of the search for the nearest neighbor.
"""
#提取GLUE层结果
print('......Extract GLUE layer from obs')
rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
#对GLUE层进行Pearson系数分析
print('......Prepare for pair')
import gc
len1=(len(rna_loc)//5000)+1
len2=(len(atac_loc)//5000)+1
if len1>len2:
len1=len2
p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
print('......Start to calculate the Pearson coef')
for j in range(len1):
c=pd.DataFrame()
with trange(len1) as tt:
for i in tt:
t1=rna_loc.iloc[5000*(i):5000*(i+1)]
t2=atac_loc.iloc[5000*(j):5000*(j+1)]
a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
b=pd.DataFrame(a,index=t2.index,columns=t1.index)
c=pd.concat([c,b],axis=1)
del t1
del t2
del a
del b
gc.collect()
tt.set_description('Now Pearson block is {}/{}'.format(i,len1))
with trange(len(c)) as t:
for i in t:
t_c=c.iloc[i]
p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
t.set_description('Now rna_index is {}/{}, all is {}'.format(i+j*5000,i+j*5000+len(c),len(atac_loc)))
print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc)))
del c
gc.collect()
#寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
def find_neighbor_cell(p_pd,n_pd,depth=10):
if depth>50:
print('......depth limited to 50')
depth=50
rubish_c=[]
finish_c=[]
with trange(depth) as dt:
for d in dt:
p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
for i in p_pd.index:
name=n_pd.loc[i,'rank_{}'.format(d)]
if name not in rubish_c:
finish_c.append(i)
rubish_c.append(name)
else:
continue
p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
dt.set_description('Now depth is {}/{}'.format(d,depth))
result=pd.DataFrame()
result['omic_1']=rubish_c
result['omic_2']=finish_c
result.index=['cell_{}'.format(i) for i in range(len(result))]
return result
print('......Start to find neighbor')
res_pair=find_neighbor_cell(p_pd,n_pd,depth=depth)
return res_pair
res_pair=glue_pair(rna,atac,depth=10)
......Extract GLUE layer from obs ......Prepare for pair ......Start to calculate the Pearson coef
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 5000/130418: 100%|██████████████████████████████████████████████████████████████████| 5000/5000 [05:24<00:00, 15.41it/s]
Now epoch is 0, 5000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.34s/it] Now rna_index is 4999, 10000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [05:37<00:00, 14.83it/s]
Now epoch is 1, 10000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 15000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:02<00:00, 13.78it/s]
Now epoch is 2, 15000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 20000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:12<00:00, 13.43it/s]
Now epoch is 3, 20000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.34s/it] Now rna_index is 4999, 25000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:22<00:00, 13.07it/s]
Now epoch is 4, 25000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 30000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:31<00:00, 12.76it/s]
Now epoch is 5, 30000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 35000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:41<00:00, 12.45it/s]
Now epoch is 6, 35000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.34s/it] Now rna_index is 4999, 40000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [06:51<00:00, 12.14it/s]
Now epoch is 7, 40000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 45000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [07:02<00:00, 11.83it/s]
Now epoch is 8, 45000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 50000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [07:14<00:00, 11.50it/s]
Now epoch is 9, 50000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.34s/it] Now rna_index is 4999, 55000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [07:19<00:00, 11.39it/s]
Now epoch is 10, 55000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.33s/it] Now rna_index is 4999, 60000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [07:26<00:00, 11.20it/s]
Now epoch is 11, 60000/130418
Now Pearson block is 12/13: 100%|██████████████████████████████████████████████████████████████████████████████| 13/13 [00:17<00:00, 1.34s/it] Now rna_index is 4999, 65000/130418: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [07:37<00:00, 10.92it/s]
Now epoch is 12, 65000/130418 ......Start to find neighbor
Now depth is 19/20: 100%|██████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:47<00:00, 2.40s/it]
In [40]:
Copied!
res_pair.shape
res_pair.shape
Out[40]:
(28353, 2)
In [44]:
Copied!
rna1=rna[res_pair['omic_1']]
rna1
rna1=rna[res_pair['omic_1']]
rna1
Out[44]:
View of AnnData object with n_obs × n_vars = 28353 × 36643 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap', '__scglue__' obsm: 'X_pca', 'X_umap', 'X_glue' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances'
In [45]:
Copied!
atac1=atac[res_pair['omic_2']]
atac1
atac1=atac[res_pair['omic_2']]
atac1
Out[45]:
View of AnnData object with n_obs × n_vars = 28353 × 219070 obs: 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN', 'cluster', 'Cell.Type', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'highly_variable' uns: 'Cell.Type_colors', 'neighbors', 'umap', '__scglue__' obsm: 'X_lsi', 'X_umap', 'X_glue' varm: 'X_glue' obsp: 'connectivities', 'distances'
In [46]:
Copied!
from scvi.model.utils import mde
rna1.obsm["X_mde"] = mde(rna1.obsm["X_glue"])
atac1.obsm["X_mde"] = mde(atac1.obsm["X_glue"])
from scvi.model.utils import mde
rna1.obsm["X_mde"] = mde(rna1.obsm["X_glue"])
atac1.obsm["X_mde"] = mde(atac1.obsm["X_glue"])
In [49]:
Copied!
rna1.obs.index=res_pair.index
atac1.obs.index=res_pair.index
rna1.obs.index=res_pair.index
atac1.obs.index=res_pair.index
In [54]:
Copied!
res_pair.to_csv('res_pair.csv')
res_pair.to_csv('res_pair.csv')
In [50]:
Copied!
from mudata import MuData
mdata = MuData({'rna': rna1, 'atac': atac1})
mdata
from mudata import MuData
mdata = MuData({'rna': rna1, 'atac': atac1})
mdata
Out[50]:
MuData object with n_obs × n_vars = 28353 × 255713 var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'chrom', 'chromStart', 'chromEnd' 2 modalities rna: 28353 x 36643 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap', '__scglue__', 'domain_colors' obsm: 'X_pca', 'X_umap', 'X_glue', 'X_mde' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances' atac: 28353 x 219070 obs: 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN', 'cluster', 'Cell.Type', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'highly_variable' uns: 'Cell.Type_colors', 'neighbors', 'umap', '__scglue__', 'domain_colors' obsm: 'X_lsi', 'X_umap', 'X_glue', 'X_mde' varm: 'X_glue' obsp: 'connectivities', 'distances'
In [52]:
Copied!
mdata.write("ad.h5mu",compression='gzip')
mdata.write("ad.h5mu",compression='gzip')
In [ ]:
Copied!
import muon as mu
import muon as mu
In [3]:
Copied!
from sklearn.metrics import adjusted_rand_score as ari
ari(mdata.obs['rna:Cell.Type'], mdata.obs['atac:Cell.Type'])
from sklearn.metrics import adjusted_rand_score as ari
ari(mdata.obs['rna:Cell.Type'], mdata.obs['atac:Cell.Type'])
Out[3]:
0.9719116168202193
In [ ]:
Copied!
mu.tl.mofa(mdata, outfile="models/ad_rna_atac.hdf5")
mu.tl.mofa(mdata, outfile="models/ad_rna_atac.hdf5")
MOFA data prepare¶
In [7]:
Copied!
rna1=mdata['rna']
rna1=rna1[:,rna1.var['highly_variable']==True]
atac1=mdata['atac']
atac1=atac1[:,atac1.var['highly_variable']==True]
rna1=mdata['rna']
rna1=rna1[:,rna1.var['highly_variable']==True]
atac1=mdata['atac']
atac1=atac1[:,atac1.var['highly_variable']==True]
In [8]:
Copied!
rna1.write("rna-hvf.h5ad", compression="gzip")
atac1.write("atac-hvf.h5ad", compression="gzip")
rna1.write("rna-hvf.h5ad", compression="gzip")
atac1.write("atac-hvf.h5ad", compression="gzip")
In [2]:
Copied!
mdata=mu.read('ad.h5mu')
mdata=mu.read('ad.h5mu')
In [2]:
Copied!
rna1=ov.utils.read('rna-hvf.h5ad')
atac1=ov.utils.read('atac-hvf.h5ad')
rna1=ov.utils.read('rna-hvf.h5ad')
atac1=ov.utils.read('atac-hvf.h5ad')
In [5]:
Copied!
import random
random_obs_index=random.sample(list(rna1.obs.index),5000)
import random
random_obs_index=random.sample(list(rna1.obs.index),5000)
In [9]:
Copied!
from sklearn.metrics import adjusted_rand_score as ari
ari(rna1[random_obs_index].obs['Cell.Type'], atac1[random_obs_index].obs['Cell.Type'])
from sklearn.metrics import adjusted_rand_score as ari
ari(rna1[random_obs_index].obs['Cell.Type'], atac1[random_obs_index].obs['Cell.Type'])
Out[9]:
0.9723725925034213
In [10]:
Copied!
rna1[random_obs_index].write("rna-mofa.h5ad", compression="gzip")
atac1[random_obs_index].write("atac-mofa.h5ad", compression="gzip")
rna1[random_obs_index].write("rna-mofa.h5ad", compression="gzip")
atac1[random_obs_index].write("atac-mofa.h5ad", compression="gzip")
In [28]:
Copied!
rna1=ov.utils.read('rna-mofa.h5ad')
atac1=ov.utils.read('atac-mofa.h5ad')
rna1=ov.utils.read('rna-mofa.h5ad')
atac1=ov.utils.read('atac-mofa.h5ad')
MOFA trainging¶
In [3]:
Copied!
test_mofa=ov.single.mofa(omics=[rna1,atac1],
omics_name=['RNA','ATAC'])
test_mofa=ov.single.mofa(omics=[rna1,atac1],
omics_name=['RNA','ATAC'])
In [4]:
Copied!
test_mofa.mofa_preprocess()
test_mofa.mofa_run(outfile='models/ad_rna_atac.hdf5')
test_mofa.mofa_preprocess()
test_mofa.mofa_run(outfile='models/ad_rna_atac.hdf5')
######################################################### ### __ __ ____ ______ ### ### | \/ |/ __ \| ____/\ _ ### ### | \ / | | | | |__ / \ _| |_ ### ### | |\/| | | | | __/ /\ \_ _| ### ### | | | | |__| | | / ____ \|_| ### ### |_| |_|\____/|_|/_/ \_\ ### ### ### ######################################################### Groups names not provided, using default naming convention: - group1, group2, ..., groupG Successfully loaded view='RNA' group='group0' with N=5000 samples and D=3161 features... Successfully loaded view='ATAC' group='group0' with N=5000 samples and D=58097 features... Warning: 37 features(s) in view 0 have zero variance, consider removing them before training the model... Warning: 3092 features(s) in view 1 have zero variance, consider removing them before training the model... Model options: - Automatic Relevance Determination prior on the factors: True - Automatic Relevance Determination prior on the weights: True - Spike-and-slab prior on the factors: False - Spike-and-slab prior on the weights: True Likelihoods: - View 0 (RNA): gaussian - View 1 (ATAC): gaussian GPU mode is activated ###################################### ## Training the model with seed 112 ## ###################################### ELBO before training: -4290590720.86 Iteration 1: time=36.58, ELBO=302162726.28, deltaELBO=4592753447.138 (107.04245047%), Factors=19 Iteration 2: time=32.05, ELBO=354166671.33, deltaELBO=52003945.057 (1.21204628%), Factors=18 Iteration 3: time=30.22, ELBO=354805494.54, deltaELBO=638823.211 (0.01488893%), Factors=17 Iteration 4: time=29.15, ELBO=355107136.03, deltaELBO=301641.485 (0.00703030%), Factors=17 Iteration 5: time=29.16, ELBO=355319195.06, deltaELBO=212059.028 (0.00494242%), Factors=17 Iteration 6: time=29.15, ELBO=355551548.43, deltaELBO=232353.373 (0.00541542%), Factors=17 Iteration 7: time=29.23, ELBO=355819088.26, deltaELBO=267539.829 (0.00623550%), Factors=17 Iteration 8: time=29.18, ELBO=356076651.91, deltaELBO=257563.653 (0.00600299%), Factors=17 Iteration 9: time=29.17, ELBO=356295123.60, deltaELBO=218471.693 (0.00509188%), Factors=17 Iteration 10: time=29.17, ELBO=356474463.38, deltaELBO=179339.776 (0.00417984%), Factors=17 Iteration 11: time=28.73, ELBO=356606211.84, deltaELBO=131748.464 (0.00307064%), Factors=16 Iteration 12: time=27.66, ELBO=356752505.55, deltaELBO=146293.701 (0.00340964%), Factors=16 Iteration 13: time=27.65, ELBO=356866168.32, deltaELBO=113662.775 (0.00264912%), Factors=16 Iteration 14: time=27.72, ELBO=356948056.66, deltaELBO=81888.340 (0.00190856%), Factors=16 Iteration 15: time=27.27, ELBO=357026668.86, deltaELBO=78612.199 (0.00183220%), Factors=15 Iteration 16: time=26.10, ELBO=357075677.15, deltaELBO=49008.294 (0.00114223%), Factors=15 Iteration 17: time=26.10, ELBO=357119669.55, deltaELBO=43992.395 (0.00102532%), Factors=15 Iteration 18: time=26.11, ELBO=357161582.66, deltaELBO=41913.106 (0.00097686%), Factors=15 Iteration 19: time=26.11, ELBO=357203104.12, deltaELBO=41521.468 (0.00096773%), Factors=15 Iteration 20: time=26.11, ELBO=357245287.22, deltaELBO=42183.098 (0.00098315%), Factors=15 Iteration 21: time=26.12, ELBO=357288777.84, deltaELBO=43490.616 (0.00101363%), Factors=15 Iteration 22: time=25.70, ELBO=357117113.79, deltaELBO=-171664.043 (0.00400094%), Factors=14 Warning, lower bound is decreasing... Iteration 23: time=24.64, ELBO=357173544.76, deltaELBO=56430.966 (0.00131523%), Factors=14 Iteration 24: time=24.64, ELBO=357220278.61, deltaELBO=46733.851 (0.00108922%), Factors=14 Iteration 25: time=24.64, ELBO=357251984.65, deltaELBO=31706.041 (0.00073897%), Factors=14 Iteration 26: time=24.64, ELBO=357279482.02, deltaELBO=27497.363 (0.00064088%), Factors=14 Iteration 27: time=24.65, ELBO=357305657.80, deltaELBO=26175.780 (0.00061007%), Factors=14 Iteration 28: time=24.61, ELBO=357329754.32, deltaELBO=24096.524 (0.00056161%), Factors=14 Iteration 29: time=24.65, ELBO=357349778.45, deltaELBO=20024.126 (0.00046670%), Factors=14 Iteration 30: time=24.23, ELBO=357310329.46, deltaELBO=-39448.984 (0.00091943%), Factors=13 Warning, lower bound is decreasing... Iteration 31: time=23.18, ELBO=357320371.06, deltaELBO=10041.598 (0.00023404%), Factors=13 Iteration 32: time=23.18, ELBO=357328312.77, deltaELBO=7941.707 (0.00018510%), Factors=13 Converged! ####################### ## Training finished ## ####################### Saving model in models/ad_rna_atac.hdf5...
In [30]:
Copied!
from mudata import MuData
mdata = MuData({'rna': rna1, 'atac': atac1})
mdata
from mudata import MuData
mdata = MuData({'rna': rna1, 'atac': atac1})
mdata
Out[30]:
MuData object with n_obs × n_vars = 5000 × 61258 var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'chrom', 'chromStart', 'chromEnd' 2 modalities rna: 5000 x 3161 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', '__scglue__', 'domain_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_glue', 'X_mde', 'X_pca', 'X_umap' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances' atac: 5000 x 58097 obs: 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN', 'cluster', 'Cell.Type', 'balancing_weight', 'domain' var: 'gene_ids', 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'highly_variable' uns: 'Cell.Type_colors', '__scglue__', 'domain_colors', 'neighbors', 'umap' obsm: 'X_glue', 'X_lsi', 'X_mde', 'X_umap' varm: 'X_glue' obsp: 'connectivities', 'distances'
In [31]:
Copied!
# # NOTE: if you wish to load the trained model,
# # use mofax library to quickly add
# # factors and weights matrices
# # to the mdata object
# #
import mofax as mfx
model = mfx.mofa_model('models/ad_rna_atac.hdf5')
mdata.obsm["X_mofa"] = model.get_factors()
model.close()
# # NOTE: if you wish to load the trained model,
# # use mofax library to quickly add
# # factors and weights matrices
# # to the mdata object
# #
import mofax as mfx
model = mfx.mofa_model('models/ad_rna_atac.hdf5')
mdata.obsm["X_mofa"] = model.get_factors()
model.close()
In [7]:
Copied!
mdata.obsm['X_mofa'].shape
mdata.obsm['X_mofa'].shape
Out[7]:
(5000, 13)
In [9]:
Copied!
# Copy colours that were defined previously
mdata.uns = mdata.uns or dict()
mdata.uns['rna:Cell.Type_colors'] = mdata['rna'].uns['Cell.Type_colors']
mdata.uns['atac:Cell.Type_colors'] = mdata['atac'].uns['Cell.Type_colors']
# Copy colours that were defined previously
mdata.uns = mdata.uns or dict()
mdata.uns['rna:Cell.Type_colors'] = mdata['rna'].uns['Cell.Type_colors']
mdata.uns['atac:Cell.Type_colors'] = mdata['atac'].uns['Cell.Type_colors']
In [33]:
Copied!
m = mfx.mofa_model('models/ad_rna_atac.hdf5')
a1=m.get_r2()
m = mfx.mofa_model('models/ad_rna_atac.hdf5')
a1=m.get_r2()
In [34]:
Copied!
a1.head()
a1.head()
Out[34]:
Factor | View | Group | R2 | |
---|---|---|---|---|
0 | Factor1 | RNA | group0 | 6.185937 |
1 | Factor1 | ATAC | group0 | 0.625745 |
2 | Factor2 | RNA | group0 | 1.907447 |
3 | Factor2 | ATAC | group0 | 2.482087 |
4 | Factor3 | RNA | group0 | 3.663136 |
In [35]:
Copied!
import pandas as pd
plot_data=pd.DataFrame()
for i in m.get_r2()['View'].unique():
plot_data[i]=m.get_r2().loc[m.get_r2()['View']==i,'R2'].values
plot_data
import pandas as pd
plot_data=pd.DataFrame()
for i in m.get_r2()['View'].unique():
plot_data[i]=m.get_r2().loc[m.get_r2()['View']==i,'R2'].values
plot_data
Out[35]:
RNA | ATAC | |
---|---|---|
0 | 6.185937 | 0.625745 |
1 | 1.907447 | 2.482087 |
2 | 3.663136 | 0.207662 |
3 | 2.290023 | 0.001297 |
4 | 1.176851 | 0.032663 |
5 | 1.002245 | 0.042093 |
6 | 0.090284 | 0.616941 |
7 | 0.074696 | 0.465844 |
8 | 0.020677 | 0.323996 |
9 | 0.048964 | 0.178057 |
10 | 0.109093 | 0.103680 |
11 | 0.031659 | 0.124130 |
12 | -1.605516 | 0.531147 |
In [37]:
Copied!
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(2,3))
sns.heatmap(plot_data,cmap='Greens',ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.ylabel('Factor',fontsize=12)
plt.xlabel('View',fontsize=12)
plt.title('Varience',fontsize=12)
plt.savefig("figures/mofa_varience.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_varience.pdf",dpi=300,bbox_inches = 'tight')
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(2,3))
sns.heatmap(plot_data,cmap='Greens',ax=ax,xticklabels=True,yticklabels=True,
cbar_kws={'shrink':0.5})
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.ylabel('Factor',fontsize=12)
plt.xlabel('View',fontsize=12)
plt.title('Varience',fontsize=12)
plt.savefig("figures/mofa_varience.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_varience.pdf",dpi=300,bbox_inches = 'tight')
In [38]:
Copied!
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=1)
f1_w.sort_values('weights',ascending=False)
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=1)
f1_w.sort_values('weights',ascending=False)
Out[38]:
feature | weights | abs_weights | sig | |
---|---|---|---|---|
1619 | b'RNA_RALYL' | 1.000000 | 1.000000 | + |
2177 | b'RNA_NELL2' | 0.981866 | 0.981866 | + |
677 | b'RNA_STXBP5L' | 0.978343 | 0.978343 | + |
1528 | b'RNA_DLGAP2' | 0.977844 | 0.977844 | + |
2463 | b'RNA_GABRB3' | 0.965634 | 0.965634 | + |
... | ... | ... | ... | ... |
520 | b'RNA_ERBB4' | -0.293090 | 0.293090 | - |
1021 | b'RNA_SLC1A3' | -0.302998 | 0.302998 | - |
1108 | b'RNA_LINC01170' | -0.386568 | 0.386568 | - |
878 | b'RNA_SPP1' | -0.398615 | 0.398615 | - |
2032 | b'RNA_NEAT1' | -0.631516 | 0.631516 | - |
3161 rows × 4 columns
In [39]:
Copied!
f1_w.index=[str(i,"utf8").replace('RNA_','') for i in f1_w['feature']]
f1_w
f1_w.index=[str(i,"utf8").replace('RNA_','') for i in f1_w['feature']]
f1_w
Out[39]:
feature | weights | abs_weights | sig | |
---|---|---|---|---|
GABRD | b'RNA_GABRD' | 0.388015 | 0.388015 | + |
HES5 | b'RNA_HES5' | 0.000463 | 0.000463 | + |
TTC34 | b'RNA_TTC34' | 0.113615 | 0.113615 | + |
PRDM16 | b'RNA_PRDM16' | -0.000092 | 0.000092 | - |
LINC01646 | b'RNA_LINC01646' | 0.000030 | 0.000030 | + |
... | ... | ... | ... | ... |
MT-CO3 | b'RNA_MT-CO3' | 0.242487 | 0.242487 | + |
MT-ND3 | b'RNA_MT-ND3' | 0.125587 | 0.125587 | + |
MT-ND4 | b'RNA_MT-ND4' | 0.161691 | 0.161691 | + |
MT-ND5 | b'RNA_MT-ND5' | 0.104282 | 0.104282 | + |
MT-CYB | b'RNA_MT-CYB' | 0.165990 | 0.165990 | + |
3161 rows × 4 columns
In [40]:
Copied!
factor_w=pd.DataFrame(index=rna1.var.index)
for i in range(m.get_factors().shape[1]):
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=i+1)
f1_w.index=[str(i,"utf8").replace('RNA_','') for i in f1_w['feature']]
f1_w=f1_w.loc[rna1.var.index]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w=pd.DataFrame(index=rna1.var.index)
for i in range(m.get_factors().shape[1]):
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=i+1)
f1_w.index=[str(i,"utf8").replace('RNA_','') for i in f1_w['feature']]
f1_w=f1_w.loc[rna1.var.index]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
In [41]:
Copied!
ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=1)
ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view='RNA',factor=1)
Out[41]:
feature | weights | abs_weights | sig | |
---|---|---|---|---|
0 | b'RNA_GABRD' | 0.388015 | 0.388015 | + |
1 | b'RNA_HES5' | 0.000463 | 0.000463 | + |
2 | b'RNA_TTC34' | 0.113615 | 0.113615 | + |
3 | b'RNA_PRDM16' | -0.000092 | 0.000092 | - |
4 | b'RNA_LINC01646' | 0.000030 | 0.000030 | + |
... | ... | ... | ... | ... |
3156 | b'RNA_MT-CO3' | 0.242487 | 0.242487 | + |
3157 | b'RNA_MT-ND3' | 0.125587 | 0.125587 | + |
3158 | b'RNA_MT-ND4' | 0.161691 | 0.161691 | + |
3159 | b'RNA_MT-ND5' | 0.104282 | 0.104282 | + |
3160 | b'RNA_MT-CYB' | 0.165990 | 0.165990 | + |
3161 rows × 4 columns
In [42]:
Copied!
rna1.varm['factor_weight']=factor_w.values
rna1.varm['factor_weight']=factor_w.values
In [43]:
Copied!
factor_w.head()
factor_w.head()
Out[43]:
factor_1 | factor_2 | factor_3 | factor_4 | factor_5 | factor_6 | factor_7 | factor_8 | factor_9 | factor_10 | factor_11 | factor_12 | factor_13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
GABRD | 0.388015 | -0.096568 | -0.002226 | -0.000855 | 0.000358 | 0.341265 | -0.003031 | 0.000224 | -0.003167 | 0.000032 | -2.651118e-04 | -0.001843 | -0.028821 |
HES5 | 0.000463 | 0.276760 | -0.031110 | -0.003222 | 0.000457 | -0.143575 | -0.000092 | -0.001769 | 0.002673 | 0.000025 | 2.178488e-05 | -0.000227 | -0.258354 |
TTC34 | 0.113615 | 0.138576 | -0.002731 | -0.052147 | 0.007113 | -0.004158 | -0.002309 | -0.001182 | -0.017258 | 0.000029 | -6.750181e-05 | 0.000351 | -0.207835 |
PRDM16 | -0.000092 | 0.901125 | -0.147763 | -0.171246 | -0.043769 | -0.362034 | -0.007310 | 0.023654 | -0.012479 | 0.000570 | -9.746646e-06 | 0.002236 | -0.865202 |
LINC01646 | 0.000030 | -0.000013 | -0.000006 | -0.000003 | 0.000025 | 0.000010 | 0.000036 | -0.000005 | 0.000341 | 0.000002 | 4.687552e-07 | -0.000014 | -0.000011 |
In [44]:
Copied!
rna1=ov.single.factor_exact(rna1,hdf5_path='models/ad_rna_atac.hdf5')
rna1
rna1=ov.single.factor_exact(rna1,hdf5_path='models/ad_rna_atac.hdf5')
rna1
Out[44]:
AnnData object with n_obs × n_vars = 5000 × 3161 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain', 'factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12', 'factor13' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', '__scglue__', 'domain_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_glue', 'X_mde', 'X_pca', 'X_umap' varm: 'PCs', 'X_glue', 'factor_weight' layers: 'counts' obsp: 'connectivities', 'distances'
In [45]:
Copied!
plot_data1=ov.single.factor_correlation(adata=rna1,cluster='Cell.Type',factor_list=range(1,14))
plot_data1=ov.single.factor_correlation(adata=rna1,cluster='Cell.Type',factor_list=range(1,14))
In [46]:
Copied!
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6,3))
sns.heatmap(plot_data1,cmap='Purples',ax=ax,square=True,cbar_kws={'shrink':0.5})
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel('Factor',fontsize=12)
plt.ylabel('Cell.Type',fontsize=12)
plt.title('Correlation',fontsize=12)
plt.savefig("figures/mofa_correlation.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_correlation.pdf",dpi=300,bbox_inches = 'tight')
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6,3))
sns.heatmap(plot_data1,cmap='Purples',ax=ax,square=True,cbar_kws={'shrink':0.5})
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel('Factor',fontsize=12)
plt.ylabel('Cell.Type',fontsize=12)
plt.title('Correlation',fontsize=12)
plt.savefig("figures/mofa_correlation.png",dpi=300,bbox_inches = 'tight')
plt.savefig("pdf/mofa_correlation.pdf",dpi=300,bbox_inches = 'tight')
In [86]:
Copied!
rna1.obsm['X_mofa']=m.get_factors()
rna1.obsm['X_mofa']=m.get_factors()
In [96]:
Copied!
m.get_factors().shape
m.get_factors().shape
Out[96]:
(5000, 13)
In [234]:
Copied!
fig, ax = plt.subplots(figsize=(3,3))
factor1,factor2=4,6
sc.pl.embedding(
adata=rna1,
basis='X_mofa',
color='Cell.Type',
title='INH',
components="{},{}".format(factor1,factor2),
palette=ov.utils.pyomic_palette(),
#frameon=False,
ncols=2,
ax=ax
)
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(3,3))
factor1,factor2=4,6
sc.pl.embedding(
adata=rna1,
basis='X_mofa',
color='Cell.Type',
title='INH',
components="{},{}".format(factor1,factor2),
palette=ov.utils.pyomic_palette(),
#frameon=False,
ncols=2,
ax=ax
)
fig.savefig("figures/mofa_factor_{}_{}.png".format(factor1,factor2),dpi=300,bbox_inches = 'tight')
In [229]:
Copied!
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
rna1,
basis="X_mde",
color=["factor6"],
frameon=False,
ncols=2,
#palette=ov.utils.pyomic_palette(),
show=False,
cmap='Greens',
vmin=0,
ax=ax
)
plt.savefig("figures/umap_factor6.png",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
rna1,
basis="X_mde",
color=["factor6"],
frameon=False,
ncols=2,
#palette=ov.utils.pyomic_palette(),
show=False,
cmap='Greens',
vmin=0,
ax=ax
)
plt.savefig("figures/umap_factor6.png",dpi=300,bbox_inches = 'tight')
In [204]:
Copied!
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
rna1,
basis="X_mde",
color=["Cell.Type"],
frameon=False,
ncols=2,
#palette=ov.utils.pyomic_palette(),
show=False,
cmap='Purples_r',
ax=ax
)
plt.savefig("figures/umap_celltype_rna.png",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(3,3))
sc.pl.embedding(
rna1,
basis="X_mde",
color=["Cell.Type"],
frameon=False,
ncols=2,
#palette=ov.utils.pyomic_palette(),
show=False,
cmap='Purples_r',
ax=ax
)
plt.savefig("figures/umap_celltype_rna.png",dpi=300,bbox_inches = 'tight')
In [106]:
Copied!
import anndata
combined = anndata.concat([rna1, atac1])
import anndata
combined = anndata.concat([rna1, atac1])
In [107]:
Copied!
combined
combined
Out[107]:
AnnData object with n_obs × n_vars = 10000 × 0 obs: 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain' obsm: 'X_glue', 'X_mde', 'X_umap'
In [108]:
Copied!
sc.pp.neighbors(combined, use_rep="X_glue", metric="cosine")
#sc.tl.umap(combined)
#sc.pl.umap(combined, color=["Cell.Type", "domain"], wspace=0.65)
sc.pp.neighbors(combined, use_rep="X_glue", metric="cosine")
#sc.tl.umap(combined)
#sc.pl.umap(combined, color=["Cell.Type", "domain"], wspace=0.65)
computing neighbors finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:11)
In [109]:
Copied!
from scvi.model.utils import mde
combined.obsm["X_mde"] = mde(combined.obsm["X_glue"])
from scvi.model.utils import mde
combined.obsm["X_mde"] = mde(combined.obsm["X_glue"])
In [138]:
Copied!
sc.pl.embedding(
combined,
basis="X_mde",
color=["Cell.Type"],
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette(),
show=False,
)
plt.savefig("figures/umap_celltype.png",dpi=300,bbox_inches = 'tight')
sc.pl.embedding(
combined,
basis="X_mde",
color=["Cell.Type"],
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette(),
show=False,
)
plt.savefig("figures/umap_celltype.png",dpi=300,bbox_inches = 'tight')
In [140]:
Copied!
sc.pl.embedding(
combined,
basis="X_mde",
color=["domain"],
title='Omics',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
)
plt.savefig("figures/umap_domain.png",dpi=300,bbox_inches = 'tight')
sc.pl.embedding(
combined,
basis="X_mde",
color=["domain"],
title='Omics',
frameon=False,
ncols=2,
palette=ov.utils.pyomic_palette()[11:],
show=False,
)
plt.savefig("figures/umap_domain.png",dpi=300,bbox_inches = 'tight')
In [114]:
Copied!
factor_w[['factor_4','factor_6']]
factor_w[['factor_4','factor_6']]
Out[114]:
factor_4 | factor_6 | |
---|---|---|
GABRD | -0.002226 | 0.000358 |
HES5 | -0.031110 | 0.000457 |
TTC34 | -0.002731 | 0.007113 |
PRDM16 | -0.147763 | -0.043769 |
LINC01646 | -0.000006 | 0.000025 |
... | ... | ... |
MT-CO3 | -0.026080 | 0.000006 |
MT-ND3 | -0.065518 | -0.007528 |
MT-ND4 | -0.074759 | -0.003568 |
MT-ND5 | -0.050181 | -0.009814 |
MT-CYB | -0.057740 | -0.004356 |
3161 rows × 2 columns
In [ ]:
Copied!
import pandas as pd
factor_w=pd.DataFrame()
view='RNA'
for i in range(model.get_factors().shape[1]):
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w
import pandas as pd
factor_w=pd.DataFrame()
view='RNA'
for i in range(model.get_factors().shape[1]):
f1_w=ov.single.get_weights(hdf5_path='models/ad_rna_atac.hdf5',view=view,factor=i+1)
f1_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w['factor_{}'.format(i+1)]=f1_w['weights']
factor_w.index=[str(i,"utf8").replace('{}_'.format(view),'') for i in f1_w['feature']]
factor_w
In [27]:
Copied!
factor_w['factor_1'].sort_values(ascending=False)
factor_w['factor_1'].sort_values(ascending=False)
Out[27]:
RALYL 1.000000 NELL2 0.981866 STXBP5L 0.978343 DLGAP2 0.977844 GABRB3 0.965634 ... ERBB4 -0.293090 SLC1A3 -0.302998 LINC01170 -0.386568 SPP1 -0.398615 NEAT1 -0.631516 Name: factor_1, Length: 3161, dtype: float64
In [53]:
Copied!
fig, ax = plt.subplots(figsize=(3,4))
plot_data4=pd.DataFrame()
plot_data4['weight']=factor_w['factor_1'].sort_values(ascending=False)
plot_data4['rank']=range(len(plot_data4['weight']))
plt.plot(plot_data4['rank'],plot_data4['weight'],color='#a51616')
hub_gene=plot_data4.index[:5]
plt.scatter(plot_data4.loc[hub_gene,'rank'],
plot_data4.loc[hub_gene,'weight'],color='#a51616',
alpha=0.5)
from adjustText import adjust_text
texts=[ax.text(plot_data4.loc[i,'rank'],
plot_data4.loc[i,'weight'],
i,
fontdict={'size':10,'weight':'normal','color':'black'}
) for i in hub_gene]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('factor1',fontsize=12,)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel('Feature rank',fontsize=12)
plt.ylabel('Weight',fontsize=12)
plt.grid(False)
fig, ax = plt.subplots(figsize=(3,4))
plot_data4=pd.DataFrame()
plot_data4['weight']=factor_w['factor_1'].sort_values(ascending=False)
plot_data4['rank']=range(len(plot_data4['weight']))
plt.plot(plot_data4['rank'],plot_data4['weight'],color='#a51616')
hub_gene=plot_data4.index[:5]
plt.scatter(plot_data4.loc[hub_gene,'rank'],
plot_data4.loc[hub_gene,'weight'],color='#a51616',
alpha=0.5)
from adjustText import adjust_text
texts=[ax.text(plot_data4.loc[i,'rank'],
plot_data4.loc[i,'weight'],
i,
fontdict={'size':10,'weight':'normal','color':'black'}
) for i in hub_gene]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.title('factor1',fontsize=12,)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel('Feature rank',fontsize=12)
plt.ylabel('Weight',fontsize=12)
plt.grid(False)
In [22]:
Copied!
factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5)&(plot_data3['factor_{}'.format(factor2)]>0.5),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5)&(plot_data3['factor_{}'.format(factor2)]<-0.5),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5)&(plot_data3['factor_{}'.format(factor2)]>0.5),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5)&(plot_data3['factor_{}'.format(factor2)]<-0.5),'sig']='down-down'
factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5)&(plot_data3['factor_{}'.format(factor2)]>0.5),'sig']='up-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5)&(plot_data3['factor_{}'.format(factor2)]<-0.5),'sig']='up-down'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5)&(plot_data3['factor_{}'.format(factor2)]>0.5),'sig']='down-up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5)&(plot_data3['factor_{}'.format(factor2)]<-0.5),'sig']='down-down'
In [233]:
Copied!
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color='#c2c2c2',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color='#a51616',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color='#e25d5d',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color='#3b9868',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color='#0d6a3b',alpha=0.5)
plt.vlines(x=0.5,ymin=-1,ymax=1,color='#a51616',linestyles='dashed')
plt.hlines(y=0.5,xmin=-1,xmax=1,color='#a51616',linestyles='dashed')
plt.vlines(x=-0.5,ymin=-1,ymax=1,color='#0d6a3b',linestyles='dashed')
plt.hlines(y=-0.5,xmin=-1,xmax=1,color='#0d6a3b',linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=12)
plt.ylabel('factor_{}'.format(factor2),fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
['#a51616','#e25d5d','#3b9868','#0d6a3b']):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:5]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title('INH',fontsize=12)
plt.savefig("figures/factor_gene_inh.png",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color='#c2c2c2',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-up','factor_{}'.format(factor2)],
color='#a51616',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up-down','factor_{}'.format(factor2)],
color='#e25d5d',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-up','factor_{}'.format(factor2)],
color='#3b9868',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down-down','factor_{}'.format(factor2)],
color='#0d6a3b',alpha=0.5)
plt.vlines(x=0.5,ymin=-1,ymax=1,color='#a51616',linestyles='dashed')
plt.hlines(y=0.5,xmin=-1,xmax=1,color='#a51616',linestyles='dashed')
plt.vlines(x=-0.5,ymin=-1,ymax=1,color='#0d6a3b',linestyles='dashed')
plt.hlines(y=-0.5,xmin=-1,xmax=1,color='#0d6a3b',linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=12)
plt.ylabel('factor_{}'.format(factor2),fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
from adjustText import adjust_text
for sig,color in zip(['up-up','up-down','down-up','down-down'],
['#a51616','#e25d5d','#3b9868','#0d6a3b']):
hub_gene=plot_data3.loc[plot_data3['sig']==sig].index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:5]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title('INH',fontsize=12)
plt.savefig("figures/factor_gene_inh.png",dpi=300,bbox_inches = 'tight')
In [230]:
Copied!
factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5),'sig']='down'
factor1,factor2=6,4
plot_data3=factor_w[['factor_{}'.format(factor1),'factor_{}'.format(factor2)]]
plot_data3['sig']='normal'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]>0.5),'sig']='up'
plot_data3.loc[(plot_data3['factor_{}'.format(factor1)]<-0.5),'sig']='down'
In [231]:
Copied!
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color='#c2c2c2',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color='#a51616',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color='#0d6a3b',alpha=0.5)
plt.vlines(x=0.5,ymin=-1,ymax=1,color='#a51616',linestyles='dashed')
plt.hlines(y=0.5,xmin=-1,xmax=1,color='#a51616',linestyles='dashed')
plt.vlines(x=-0.5,ymin=-1,ymax=1,color='#0d6a3b',linestyles='dashed')
plt.hlines(y=-0.5,xmin=-1,xmax=1,color='#0d6a3b',linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=12)
plt.ylabel('factor_{}'.format(factor2),fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
['#a51616','#0d6a3b']):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:5]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title('INH',fontsize=12)
plt.savefig("figures/factor_gene_inh.png",dpi=300,bbox_inches = 'tight')
fig, ax = plt.subplots(figsize=(3,3))
ax.scatter(plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='normal','factor_{}'.format(factor2)],
color='#c2c2c2',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='up','factor_{}'.format(factor2)],
color='#a51616',alpha=0.5)
ax.scatter(plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor1)],
plot_data3.loc[plot_data3['sig']=='down','factor_{}'.format(factor2)],
color='#0d6a3b',alpha=0.5)
plt.vlines(x=0.5,ymin=-1,ymax=1,color='#a51616',linestyles='dashed')
plt.hlines(y=0.5,xmin=-1,xmax=1,color='#a51616',linestyles='dashed')
plt.vlines(x=-0.5,ymin=-1,ymax=1,color='#0d6a3b',linestyles='dashed')
plt.hlines(y=-0.5,xmin=-1,xmax=1,color='#0d6a3b',linestyles='dashed')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
plt.grid(False)
plt.xlabel('factor_{}'.format(factor1),fontsize=12)
plt.ylabel('factor_{}'.format(factor2),fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
from adjustText import adjust_text
for sig,color in zip(['up','down'],
['#a51616','#0d6a3b']):
if 'up' in sig:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=False).index.tolist()
else:
hub_gene=plot_data3.loc[plot_data3['sig']==sig].sort_values('factor_{}'.format(factor1),ascending=True).index.tolist()
if len(hub_gene)==0:
continue
texts=[ax.text(plot_data3.loc[i,'factor_{}'.format(factor1)],
plot_data3.loc[i,'factor_{}'.format(factor2)],
i,
fontdict={'size':10,'weight':'bold','color':'black'}
) for i in hub_gene[:5]]
adjust_text(texts,only_move={'text': 'xy'},arrowprops=dict(arrowstyle='->', color='grey'),)
plt.title('INH',fontsize=12)
plt.savefig("figures/factor_gene_inh.png",dpi=300,bbox_inches = 'tight')
In [13]:
Copied!
import mofax as mfx
model = mfx.mofa_model('models/ad_rna_atac.hdf5')
import mofax as mfx
model = mfx.mofa_model('models/ad_rna_atac.hdf5')
In [63]:
Copied!
pd.concat([factor_w,factor_w],axis=1)
pd.concat([factor_w,factor_w],axis=1)
Out[63]:
factor_1 | factor_2 | factor_3 | factor_4 | factor_5 | factor_6 | factor_7 | factor_8 | factor_9 | factor_10 | ... | factor_4 | factor_5 | factor_6 | factor_7 | factor_8 | factor_9 | factor_10 | factor_11 | factor_12 | factor_13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
GABRD | 0.388015 | -0.096568 | -0.002226 | -0.000855 | 0.000358 | 0.341265 | -0.003031 | 0.000224 | -0.003167 | 0.000032 | ... | -0.000855 | 0.000358 | 0.341265 | -0.003031 | 0.000224 | -0.003167 | 0.000032 | -2.651118e-04 | -0.001843 | -0.028821 |
HES5 | 0.000463 | 0.276760 | -0.031110 | -0.003222 | 0.000457 | -0.143575 | -0.000092 | -0.001769 | 0.002673 | 0.000025 | ... | -0.003222 | 0.000457 | -0.143575 | -0.000092 | -0.001769 | 0.002673 | 0.000025 | 2.178488e-05 | -0.000227 | -0.258354 |
TTC34 | 0.113615 | 0.138576 | -0.002731 | -0.052147 | 0.007113 | -0.004158 | -0.002309 | -0.001182 | -0.017258 | 0.000029 | ... | -0.052147 | 0.007113 | -0.004158 | -0.002309 | -0.001182 | -0.017258 | 0.000029 | -6.750181e-05 | 0.000351 | -0.207835 |
PRDM16 | -0.000092 | 0.901125 | -0.147763 | -0.171246 | -0.043769 | -0.362034 | -0.007310 | 0.023654 | -0.012479 | 0.000570 | ... | -0.171246 | -0.043769 | -0.362034 | -0.007310 | 0.023654 | -0.012479 | 0.000570 | -9.746646e-06 | 0.002236 | -0.865202 |
LINC01646 | 0.000030 | -0.000013 | -0.000006 | -0.000003 | 0.000025 | 0.000010 | 0.000036 | -0.000005 | 0.000341 | 0.000002 | ... | -0.000003 | 0.000025 | 0.000010 | 0.000036 | -0.000005 | 0.000341 | 0.000002 | 4.687552e-07 | -0.000014 | -0.000011 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
MT-CO3 | 0.242487 | -0.046276 | -0.026080 | 0.000190 | 0.000006 | 0.330310 | -0.054185 | -0.004804 | -0.144484 | 0.000268 | ... | 0.000190 | 0.000006 | 0.330310 | -0.054185 | -0.004804 | -0.144484 | 0.000268 | -9.001574e-04 | 0.001719 | -0.307852 |
MT-ND3 | 0.125587 | -0.000323 | -0.065518 | -0.000296 | -0.007528 | 0.221193 | -0.035952 | -0.004491 | -0.072415 | 0.000308 | ... | -0.000296 | -0.007528 | 0.221193 | -0.035952 | -0.004491 | -0.072415 | 0.000308 | -4.577748e-04 | 0.001609 | -0.256856 |
MT-ND4 | 0.161691 | -0.001290 | -0.074759 | 0.001310 | -0.003568 | 0.260324 | -0.044698 | -0.007164 | -0.115841 | 0.000402 | ... | 0.001310 | -0.003568 | 0.260324 | -0.044698 | -0.007164 | -0.115841 | 0.000402 | -6.540196e-04 | 0.001242 | -0.289882 |
MT-ND5 | 0.104282 | -0.001342 | -0.050181 | 0.000579 | -0.009814 | 0.226687 | -0.020815 | -0.002617 | -0.073407 | 0.000290 | ... | 0.000579 | -0.009814 | 0.226687 | -0.020815 | -0.002617 | -0.073407 | 0.000290 | -2.776113e-04 | 0.001331 | -0.171389 |
MT-CYB | 0.165990 | -0.001053 | -0.057740 | -0.000157 | -0.004356 | 0.274541 | -0.041253 | -0.004388 | -0.100811 | 0.000347 | ... | -0.000157 | -0.004356 | 0.274541 | -0.041253 | -0.004388 | -0.100811 | 0.000347 | -4.972872e-04 | 0.001635 | -0.265953 |
3161 rows × 26 columns
In [64]:
Copied!
import anndata
import scanpy as sc
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
import anndata
import scanpy as sc
adata1=anndata.AnnData(pd.concat([factor_w,factor_w],axis=1).T)
In [65]:
Copied!
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
adata1
adata1.obs['Factor']=adata1.obs.index
adata1.obs['Factor']=adata1.obs['Factor'].astype('category')
adata1
Out[65]:
AnnData object with n_obs × n_vars = 26 × 3161 obs: 'Factor'
In [101]:
Copied!
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=3,
cmap='bwr',show=False)
sc.tl.rank_genes_groups(adata1, groupby='Factor', method='wilcoxon')
ax=sc.pl.rank_genes_groups_dotplot(adata1, n_genes=3,
cmap='bwr',show=False)
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00)
In [98]:
Copied!
import numpy as np
def get_celltype_marker(adata:anndata.AnnData,
clustertype:str='leiden',
log2fc_min:int=2,
pval_cutoff:float=0.05,rank:bool=True)->dict:
r"""Get marker genes for each clusters.
Arguments:
adata: anndata object
clustertype: Clustering name used in scanpy. (leiden)
log2fc_min: Minimum log2 fold change of marker genes. (2)
pval_cutoff: Maximum p value of marker genes. (0.05)
rank: Whether to rank genes by wilcoxon test. (True)
Returns:
cellmarker: A dictionary of marker genes for each clusters.
"""
print('...get cell type marker')
celltypes = sorted(adata.obs[clustertype].unique())
cell_marker_dict={}
if rank==False and 'rank_genes_groups' in adata.uns.keys():
sc.tl.rank_genes_groups(adata, clustertype, method='wilcoxon')
for celltype in celltypes:
degs = sc.get.rank_genes_groups_df(adata, group=celltype, key='rank_genes_groups', log2fc_min=log2fc_min,
pval_cutoff=pval_cutoff)
foldp=np.histogram(degs['logfoldchanges'])
foldchange=(foldp[1][np.where(foldp[1]>0)[0][-5]]+foldp[1][np.where(foldp[1]>0)[0][-6]])/2
cellmarker=degs.loc[degs['logfoldchanges']>foldchange]['names'].values
cell_marker_dict[celltype]=cellmarker
return cell_marker_dict
res_dict=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=3,pval_cutoff=0.1)
import numpy as np
def get_celltype_marker(adata:anndata.AnnData,
clustertype:str='leiden',
log2fc_min:int=2,
pval_cutoff:float=0.05,rank:bool=True)->dict:
r"""Get marker genes for each clusters.
Arguments:
adata: anndata object
clustertype: Clustering name used in scanpy. (leiden)
log2fc_min: Minimum log2 fold change of marker genes. (2)
pval_cutoff: Maximum p value of marker genes. (0.05)
rank: Whether to rank genes by wilcoxon test. (True)
Returns:
cellmarker: A dictionary of marker genes for each clusters.
"""
print('...get cell type marker')
celltypes = sorted(adata.obs[clustertype].unique())
cell_marker_dict={}
if rank==False and 'rank_genes_groups' in adata.uns.keys():
sc.tl.rank_genes_groups(adata, clustertype, method='wilcoxon')
for celltype in celltypes:
degs = sc.get.rank_genes_groups_df(adata, group=celltype, key='rank_genes_groups', log2fc_min=log2fc_min,
pval_cutoff=pval_cutoff)
foldp=np.histogram(degs['logfoldchanges'])
foldchange=(foldp[1][np.where(foldp[1]>0)[0][-5]]+foldp[1][np.where(foldp[1]>0)[0][-6]])/2
cellmarker=degs.loc[degs['logfoldchanges']>foldchange]['names'].values
cell_marker_dict[celltype]=cellmarker
return cell_marker_dict
res_dict=get_celltype_marker(adata1,clustertype='Factor',
log2fc_min=3,pval_cutoff=0.1)
...get cell type marker
In [99]:
Copied!
res_dict
res_dict
Out[99]:
{'factor_1': array(['SORBS2', 'CEP126', 'MIR4500HG', 'SEL1L3', 'PPP3CA', 'CCDC171', 'HPYR1', 'ADRA1B', 'TUBBP5', 'TMEM38A', 'TMEM241', 'TMEM179', 'CAMKK1', 'NECTIN3-AS1', 'CNTNAP3P2', 'LINC00351', 'CHRM5'], dtype=object), 'factor_10': array([], dtype=object), 'factor_11': array([], dtype=object), 'factor_12': array([], dtype=object), 'factor_13': array(['BCL6', 'MT1H'], dtype=object), 'factor_2': array(['B3GAT2', 'GABRB1', 'GLDC', 'NTRK2', 'ITGA9-AS1', 'ADAMTS17'], dtype=object), 'factor_3': array([], dtype=object), 'factor_4': array([], dtype=object), 'factor_5': array([], dtype=object), 'factor_6': array(['AKAIN1', 'DLX1', 'GRIP2', 'DLX6-AS1', 'TAC1', 'CASC6', 'TRIM55', 'GRPR', 'ANKRD7', 'GIPC2', 'CHRDL1'], dtype=object), 'factor_7': array([], dtype=object), 'factor_8': array([], dtype=object), 'factor_9': array([], dtype=object)}
In [87]:
Copied!
pd.DataFrame(adata1.uns['rank_genes_groups']['pvals_adj'])
pd.DataFrame(adata1.uns['rank_genes_groups']['pvals_adj'])
Out[87]:
factor_1 | factor_10 | factor_11 | factor_12 | factor_13 | factor_2 | factor_3 | factor_4 | factor_5 | factor_6 | factor_7 | factor_8 | factor_9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
1 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
2 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
4 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3156 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3157 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3158 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3159 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3160 | 0.049316 | 0.835037 | 0.909858 | 0.430013 | 0.044684 | 0.059848 | 0.136075 | 0.225811 | 0.140408 | 0.09445 | 0.538405 | 0.865735 | 0.177299 |
3161 rows × 13 columns
In [91]:
Copied!
#sc.tl.rank_genes_groups(adata1, clustertype, method='wilcoxon')
degs = sc.get.rank_genes_groups_df(adata1, group='factor_1', key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
degs
#sc.tl.rank_genes_groups(adata1, clustertype, method='wilcoxon')
degs = sc.get.rank_genes_groups_df(adata1, group='factor_1', key='rank_genes_groups', log2fc_min=2,
pval_cutoff=0.05)
degs
Out[91]:
names | scores | logfoldchanges | pvals | pvals_adj | |
---|---|---|---|---|---|
0 | SNTG1 | 2.309401 | 4.500756 | 0.020921 | 0.049316 |
1 | BASP1 | 2.309401 | 5.058449 | 0.020921 | 0.049316 |
2 | LINC02150 | 2.309401 | 6.360254 | 0.020921 | 0.049316 |
3 | AEBP2 | 2.309401 | 5.160892 | 0.020921 | 0.049316 |
4 | LINC02398 | 2.309401 | 6.242045 | 0.020921 | 0.049316 |
... | ... | ... | ... | ... | ... |
726 | NOX4 | -2.309401 | 4.731972 | 0.020921 | 0.049316 |
727 | NEAT1 | -2.309401 | 4.084878 | 0.020921 | 0.049316 |
728 | LINC01170 | -2.309401 | 5.790870 | 0.020921 | 0.049316 |
729 | LTBP1 | -2.309401 | 3.040365 | 0.020921 | 0.049316 |
730 | ROR1 | -2.309401 | 3.563920 | 0.020921 | 0.049316 |
731 rows × 5 columns
In [3]:
Copied!
pymofa_obj=ov.single._mofa.pyMOFA(model_path='models/ad_rna_atac.hdf5')
pymofa_obj=ov.single._mofa.pyMOFA(model_path='models/ad_rna_atac.hdf5')
In [5]:
Copied!
pymofa_obj.get_factors(rna1)
rna1
pymofa_obj.get_factors(rna1)
rna1
Out[5]:
AnnData object with n_obs × n_vars = 5000 × 3161 obs: 'SampleID', 'Diagnosis', 'Batch', 'Cell.Type', 'cluster', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'RIN', 'balancing_weight', 'domain', 'factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12', 'factor13' var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std', 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'hgnc_id' uns: 'Cell.Type_colors', '__scglue__', 'domain_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_glue', 'X_mde', 'X_pca', 'X_umap', 'X_mofa' varm: 'PCs', 'X_glue' layers: 'counts' obsp: 'connectivities', 'distances'
In [9]:
Copied!
pymofa_obj.r2
pymofa_obj.r2
Out[9]:
RNA | ATAC | |
---|---|---|
0 | 6.185937 | 0.625745 |
1 | 1.907447 | 2.482087 |
2 | 3.663136 | 0.207662 |
3 | 2.290023 | 0.001297 |
4 | 1.176851 | 0.032663 |
5 | 1.002245 | 0.042093 |
6 | 0.090284 | 0.616941 |
7 | 0.074696 | 0.465844 |
8 | 0.020677 | 0.323996 |
9 | 0.048964 | 0.178057 |
10 | 0.109093 | 0.103680 |
11 | 0.031659 | 0.124130 |
12 | -1.605516 | 0.531147 |
In [10]:
Copied!
pymofa_obj.plot_factor(rna1,'Cell.Type','EX',figsize=(3,3),
factor1=1,factor2=2,)
pymofa_obj.plot_factor(rna1,'Cell.Type','EX',figsize=(3,3),
factor1=1,factor2=2,)
In [ ]:
Copied!
pymofa_obj.plot_weight_gene_d2(view='RNA',factor1=1,factor2=2,)
pymofa_obj.plot_weight_gene_d2(view='RNA',factor1=1,factor2=2,)
In [15]:
Copied!
pymofa_obj.plot_top_feature_dotplot(view='RNA')
pymofa_obj.plot_top_feature_dotplot(view='RNA')
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00) WARNING: dendrogram data not found (using key=dendrogram_Factor). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: You’re trying to run this on 3161 dimensions of `.X`, if you really want this, set `use_rep='X'`. Falling back to preprocessing with `sc.pp.pca` and default params. computing PCA with n_comps=25 finished (0:00:00) Storing dendrogram info using `.uns['dendrogram_Factor']`
Out[15]:
{'mainplot_ax': <AxesSubplot: >, 'group_extra_ax': <AxesSubplot: >, 'gene_group_ax': <AxesSubplot: >, 'size_legend_ax': <AxesSubplot: title={'center': 'Fraction of cells\nin group (%)'}>, 'color_legend_ax': <AxesSubplot: title={'center': 'Mean expression\nin group'}>}
In [17]:
Copied!
pymofa_obj.plot_top_feature_heatmap(view='RNA')
pymofa_obj.plot_top_feature_heatmap(view='RNA')
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00) WARNING: dendrogram data not found (using key=dendrogram_Factor). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: You’re trying to run this on 3161 dimensions of `.X`, if you really want this, set `use_rep='X'`. Falling back to preprocessing with `sc.pp.pca` and default params. computing PCA with n_comps=25 finished (0:00:00) Storing dendrogram info using `.uns['dendrogram_Factor']`
Out[17]:
{'mainplot_ax': <AxesSubplot: >, 'group_extra_ax': <AxesSubplot: >, 'gene_group_ax': <AxesSubplot: >, 'color_legend_ax': <AxesSubplot: title={'center': 'Mean expression\nin group'}>}
In [16]:
Copied!
pymofa_obj.plot_top_feature_dotplot(view='ATAC')
pymofa_obj.plot_top_feature_dotplot(view='ATAC')
ranking genes finished: added to `.uns['rank_genes_groups']` 'names', sorted np.recarray to be indexed by group ids 'scores', sorted np.recarray to be indexed by group ids 'logfoldchanges', sorted np.recarray to be indexed by group ids 'pvals', sorted np.recarray to be indexed by group ids 'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00) WARNING: dendrogram data not found (using key=dendrogram_Factor). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: You’re trying to run this on 58097 dimensions of `.X`, if you really want this, set `use_rep='X'`. Falling back to preprocessing with `sc.pp.pca` and default params. computing PCA with n_comps=25 finished (0:00:00) Storing dendrogram info using `.uns['dendrogram_Factor']`
Out[16]:
{'mainplot_ax': <AxesSubplot: >, 'group_extra_ax': <AxesSubplot: >, 'gene_group_ax': <AxesSubplot: >, 'size_legend_ax': <AxesSubplot: title={'center': 'Fraction of cells\nin group (%)'}>, 'color_legend_ax': <AxesSubplot: title={'center': 'Mean expression\nin group'}>}
In [18]:
Copied!
import igraph
igraph.__version__
import igraph
igraph.__version__
Out[18]:
'0.10.4'
In [ ]:
Copied!