Skip to content

MOFA分析:MOFA模型构建

到本小节,就正式构建MOFA模型了

1. 数据准备

1.1 导入包

from mofapy2.run.entry_point import entry_point
import anndata
import networkx as nx
import scanpy as sc
from matplotlib import rcParams
import pandas as pd
import numpy as np

1.2 导入数据

rna=anndata.read_h5ad("rna_mofa.h5ad")
atac=anndata.read_h5ad("atac_mofa.h5ad")

1.3 计算多组学公共list

ret3= list(set(rna.obs.index).intersection(atac.obs.index))

1.4 观察细胞类型

for i in list(set(rna[ret3].obs['cell_type'])):
  print(i,len(rna[ret3].obs.loc[rna[ret3].obs['cell_type']==i])/len(rna[ret3].obs))

OPC 0.046986390149060274

PER.END 0.0031756318859364873

ASC 0.07465975372650681

INH 0.08165910563836681

MG 0.0390149060272197

ODC 0.6180816591056384

EX 0.13642255346727156

2. MOFA模型

2.1 MOFA参数设置

# initialise the entry point

ent1 = entry_point()
ent1.set_data_options(
    scale_groups = False, 
    scale_views = False,
    center_groups=True,
)

2.2 构建组学层

data_mat=[[None for g in range(1)] for m in range(2)]
data_mat[0][0]=rna[ret3].X
data_mat[1][0]=np.array(atac[ret3].X.todense())

2.3 MOFA数据导入

ent1.set_data_matrix(data_mat, likelihoods = ["gaussian","gaussian"],
          views_names=['rna','atac'],
          samples_names=[ret3],
          features_names=[rna[ret3].var_names,atac[ret3].var_names])

2.4 模型参数设置

ent1.set_model_options(
    factors = 20, 
    spikeslab_weights = True, 
    ard_factors = True,
    ard_weights = True
)
ent1.set_train_options(
    iter = 3000, 
    convergence_mode = "slow", 
    startELBO = 1, 
    freqELBO = 1, 
    dropR2 = 0.001, 
    gpu_mode = True, 
    verbose = False, 
    seed = 1
)

2.5 模型运行并保存

ent1.build()

ent1.run()

# Save the output
ent1.save(outfile='mofa_factor.hdf5')

image-20211104134822671

2.6 meta数据导出

rna[ret3].obs.to_csv('mofa_meta.csv')