Skip to content

Api bulk2single

omicverse.bulk2single.Bulk2Single

Bulk2Single class.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
class Bulk2Single:
    r"""
    Bulk2Single class.

    """
    def __init__(self,bulk_data:pd.DataFrame,single_data:anndata.AnnData,
                 celltype_key:str,bulk_group=None,max_single_cells:int=5000,
                 top_marker_num:int=500,ratio_num:int=1,gpu:Union[int,str]=0):
        """
        Initializes the Bulk2Single class.

        Arguments:
            bulk_data: The bulk RNA-seq data.
            single_data: The single-cell RNA-seq data.
            celltype_key: The name of the column in the bulk data containing cell types.
            top_marker_num: The number of top markers to select per cell type. Default is 500.
            ratio_num: The ratio between the number of single cells and target number of converted cells. Default is 1.
            gpu: The ID of the GPU to use. Set to -1 to use CPU. Default is 0. If set to 'mps', the MPS backend will be used.

        """
        single_data.var_names_make_unique()
        bulk_data=data_drop_duplicates_index(bulk_data)
        self.bulk_data=bulk_data
        self.single_data=single_data

        if self.single_data.shape[0]>max_single_cells:
            print(f"......random select {max_single_cells} single cells")
            import random
            cell_idx=random.sample(self.single_data.obs.index.tolist(),max_single_cells)
            self.single_data=self.single_data[cell_idx,:]
        self.celltype_key=celltype_key
        self.bulk_group=bulk_group
        self.input_data=None
        #self.input_data=bulk2single_data_prepare(bulk_data,single_data,celltype_key)
        #self.cell_target_num = data_process(self.input_data, top_marker_num, ratio_num)

        test2=single_data.to_df()
        sc_ref=pd.DataFrame(columns=test2.columns)
        sc_ref_index=[]
        for celltype in list(set(single_data.obs[celltype_key])):
            sc_ref.loc[celltype]=single_data[single_data.obs[celltype_key]==celltype].to_df().sum()
            sc_ref_index.append(celltype)
        sc_ref.index=sc_ref_index
        self.sc_ref=sc_ref


        if gpu=='mps' and torch.backends.mps.is_available():
            print('Note that mps may loss will be nan, used it when torch is supported')
            self.used_device = torch.device("mps")
        else:
            self.used_device = torch.device(f"cuda:{gpu}") if gpu >= 0 and torch.cuda.is_available() else torch.device('cpu')
        self.history=[]

    def predicted_fraction(self,method='scaden',sep='\t', scaler='mms',
                        datatype='counts', genelenfile=None,
                        mode='overall', adaptive=True, variance_threshold=0.98,
                        save_model_name=None,
                        batch_size=128, epochs=128, seed=1,scale_size=2):
        from ..externel.tape import Deconvolution,ScadenDeconvolution
        sc_ref=self.sc_ref.copy()
        if method=='scaden':
            CellFractionPrediction=ScadenDeconvolution(sc_ref, 
                           self.bulk_data.T, sep=sep,
                           batch_size=batch_size, epochs=epochs)
        elif method=='tape':
            SignatureMatrix, CellFractionPrediction = \
                Deconvolution(sc_ref, self.bulk_data.T, sep=sep, scaler=scaler,
                            datatype=datatype, genelenfile=genelenfile,
                            mode=mode, adaptive=adaptive, variance_threshold=variance_threshold,
                            save_model_name=save_model_name,
                            batch_size=batch_size, epochs=epochs, seed=seed)
        else:
            raise ValueError('method must be scaden or tape')
        if self.bulk_group!=None:
            cell_total_num=self.single_data.shape[0]*self.bulk_data[self.bulk_group].mean(axis=1).sum()/self.single_data.to_df().sum().sum()
            print('Predicted Total Cell Num:',cell_total_num)
            self.cell_target_num=dict(pd.Series(CellFractionPrediction.loc[self.bulk_group].mean()*cell_total_num*scale_size).astype(int))

        else:
            cell_total_num=self.single_data.shape[0]*self.bulk_data.mean(axis=1).sum()/self.single_data.to_df().sum().sum()
            print('Predicted Total Cell Num:',cell_total_num)
            self.cell_target_num=dict(pd.Series(CellFractionPrediction.mean()*cell_total_num*scale_size).astype(int))

        return CellFractionPrediction

    def bulk_preprocess_lazy(self,)->None:
        """
        Preprocess the bulk data

        Arguments:
            group: The group of the bulk data. Default is None. It need to set to calculate the mean of each group.
        """

        print("......drop duplicates index in bulk data")
        self.bulk_data=data_drop_duplicates_index(self.bulk_data)
        print("......deseq2 normalize the bulk data")
        self.bulk_data=deseq2_normalize(self.bulk_data)
        print("......log10 the bulk data")
        self.bulk_data=np.log10(self.bulk_data+1)
        print("......calculate the mean of each group")
        if self.bulk_group is None:
            self.bulk_seq_group=self.bulk_data
            return None
        else:
            data_dg_v=self.bulk_data[self.bulk_group].mean(axis=1)
            data_dg=pd.DataFrame(index=data_dg_v.index)
            data_dg['group']=data_dg_v
            self.bulk_seq_group=data_dg
        return None

    def single_preprocess_lazy(self,target_sum:int=1e4)->None:
        """
        Preprocess the single data

        Arguments:
            target_sum: The target sum of the normalize. Default is 1e4.

        """

        print("......normalize the single data")
        sc.pp.normalize_total(self.single_data, target_sum=target_sum)
        print("......log1p the single data")
        sc.pp.log1p(self.single_data)
        return None

    def prepare_input(self,):
        print("......prepare the input of bulk2single")
        self.input_data=bulk2single_data_prepare(self.bulk_seq_group,
                                                 self.single_data,
                                                 self.celltype_key)


    def train(self,
            vae_save_dir:str='save_model',
            vae_save_name:str='vae',
            generate_save_dir:str='output',
            generate_save_name:str='output',
            batch_size:int=512,
            learning_rate:int=1e-4,
            hidden_size:int=256,
            epoch_num:int=5000,
            patience:int=50,save:bool=True)->torch.nn.Module:
        """
        Trains the VAE model.

        Arguments:
            vae_save_dir: The directory to save the trained VAE model. Default is 'save_model'.
            vae_save_name: The name of the saved VAE model. Default is 'vae'.
            generate_save_dir: The directory to save the generated single-cell data. Default is 'output'.
            generate_save_name: The name of the saved generated single-cell data. Default is 'output'.
            batch_size: The batch size for training. Default is 512.
            learning_rate: The learning rate for training. Default is 1e-4.
            hidden_size: The hidden size for the encoder and decoder networks. Default is 256.
            epoch_num: The maximum number of epochs for training. Default is 5.
            patience: The number of epochs to wait before early stopping. Default is 50.
            save: Whether to save the trained VAE model. Default is True.

        Returns:
            vae_net: The trained VAE model.
        """
        if self.input_data==None:
            self.prepare_input()
        single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
        nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
        print('...begin vae training')
        vae_net,history = train_vae(single_cell,
                            label,
                            self.used_device,
                            batch_size,
                            feature_size=feature_size,
                            epoch_num=epoch_num,
                            learning_rate=learning_rate,
                            hidden_size=hidden_size,
                            patience=patience,)
        print('...vae training done!')
        if save:
            path_save = os.path.join(vae_save_dir, f"{vae_save_name}.pth")
            if not os.path.exists(vae_save_dir):
                os.makedirs(vae_save_dir)
            torch.save(vae_net.state_dict(), path_save)
            print(f"...save trained vae in {path_save}.")
            import pickle
            #save cell_target_num
            with open(os.path.join(vae_save_dir, f"{vae_save_name}_cell_target_num.pkl"), 'wb') as f:
                pickle.dump(self.cell_target_num, f)
        self.vae_net=vae_net
        self.history=history
        return vae_net
        print('generating....')
        generate_sc_meta, generate_sc_data = generate_vae(vae_net, -1,
                                                          single_cell, label, breed_2_list,
                                                          index_2_gene, cell_number_target_num, used_device)
        sc_g=anndata.AnnData(generate_sc_data.T)
        sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,self.celltype_key].values
        sc_g.write_h5ad(os.path.join(generate_save_dir, f"{generate_save_name}.h5ad"), compression='gzip')
        self.__save_generation(generate_sc_meta, generate_sc_data,
                               generate_save_dir, generate_save_name)
        return sc_g

    def save(self,vae_save_dir:str='save_model',
            vae_save_name:str='vae',):
        """
        Saves the trained VAE model.

        Arguments:
            vae_save_dir: the directory to save the trained VAE model. Default is 'save_model'.
            vae_save_name: the name of the saved VAE model. Default is 'vae'.

        """
        path_save = os.path.join(vae_save_dir, f"{vae_save_name}.pth")
        if not os.path.exists(vae_save_dir):
            os.makedirs(vae_save_dir)
        torch.save(self.vae_net.state_dict(), path_save)
        import pickle
        #save cell_target_num
        with open(os.path.join(vae_save_dir, f"{vae_save_name}_cell_target_num.pkl"), 'wb') as f:
            pickle.dump(self.cell_target_num, f)
        print(f"...save trained vae in {path_save}.")

    def generate(self)->anndata.AnnData:
        r"""
        Generate the single-cell data.

        Returns:
            sc_g: The generated single-cell data.
        """
        single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
        nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
        print('...generating')
        generate_sc_meta, generate_sc_data = generate_vae(self.vae_net, -1,
                                                          single_cell, label, breed_2_list,
                                                          index_2_gene, cell_number_target_num, self.used_device)
        generate_sc_meta.set_index('Cell',inplace=True)
        sc_g=anndata.AnnData(generate_sc_data.T)
        sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,'Cell_type'].values
        return sc_g

    def load_fraction(self,fraction_path:str):
        r"""
        Load the predicted cell fraction.

        Arguments:
            fraction_path: The path of the predicted cell fraction.

        Returns:
            fraction: The predicted cell fraction.
        """
        #load cell_target_num
        import pickle
        with open(os.path.join(fraction_path), 'rb') as f:
            self.cell_target_num = pickle.load(f)


    def load(self,vae_load_dir:str,hidden_size:int=256):
        r"""
        load the trained VAE model of Bulk2Single.

        Arguments:
            vae_load_dir: The directory to load the trained VAE model.
            hidden_size: The hidden size for the encoder and decoder networks. Default is 256.
        """

        single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
        nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
        print(f'loading model from {vae_load_dir}')
        vae_net = load_vae(feature_size, hidden_size, vae_load_dir, self.used_device)
        self.vae_net=vae_net

    def load_and_generate(self,
                              vae_load_dir:str,  # load_dir
                              hidden_size:int=256)->anndata.AnnData:
        r"""
        load the trained VAE model of Bulk2Single and generate the single-cell data.

        Arguments:
            vae_load_dir: The directory to load the trained VAE model.
            hidden_size: The hidden size for the encoder and decoder networks. Default is 256.

        Returns:
            sc_g: The generated single-cell data.
        """
        single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
        nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
        print(f'loading model from {vae_load_dir}')
        vae_net = load_vae(feature_size, hidden_size, vae_load_dir, self.used_device)
        print('...generating')
        generate_sc_meta, generate_sc_data = generate_vae(vae_net, -1,
                                                          single_cell, label, breed_2_list,
                                                          index_2_gene, cell_number_target_num, self.used_device)
        generate_sc_meta.set_index('Cell',inplace=True)
        #return generate_sc_meta, generate_sc_data
        sc_g=anndata.AnnData(generate_sc_data.T)
        sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,'Cell_type'].values

        print('...generating done!')
        return sc_g

    def filtered(self,generate_adata,highly_variable_genes:bool=True,max_value:float=10,
                     n_comps:int=100,svd_solver:str='auto',leiden_size:int=50):
        generate_adata.raw = generate_adata
        if highly_variable_genes:
            sc.pp.highly_variable_genes(generate_adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
            generate_adata = generate_adata[:, generate_adata.var.highly_variable]
        sc.pp.scale(generate_adata, max_value=max_value)
        sc.tl.pca(generate_adata, n_comps=n_comps, svd_solver=svd_solver)
        sc.pp.neighbors(generate_adata, use_rep="X_pca")
        sc.tl.leiden(generate_adata)
        filter_leiden=list(generate_adata.obs['leiden'].value_counts()[generate_adata.obs['leiden'].value_counts()<leiden_size].index)
        print("The filter leiden is ",filter_leiden)
        generate_adata=generate_adata[~generate_adata.obs['leiden'].isin(filter_leiden)]
        self.generate_adata=generate_adata.copy()

        return generate_adata

    def plot_loss(self,figsize:tuple=(4,4))->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
        r"""
        plot the loss curve of the trained VAE model.

        Arguments:
            figsize: The size of the figure. Default is (4,4).

        Returns:
            fig: The figure of the loss curve.
            ax: The axes of the figure.
        """
        fig, ax = plt.subplots(figsize=figsize)
        ax.plot(range(len(self.history)),self.history)
        ax.set_title('Beta-VAE')
        ax.set_ylabel('Loss')
        ax.set_xlabel('Epochs')
        return fig,ax


    def __get_model_input(self, data, cell_target_num):
        # input:data, celltype, bulk & output: label, dic, single_cell
        single_cell = data["input_sc_data"].values.T  # single cell data (600 * 6588)
        index_2_gene = (data["input_sc_data"].index).tolist()
        breed = data["input_sc_meta"]['Cell_type']
        breed_np = breed.values
        breed_set = set(breed_np)
        breed_2_list = list(breed_set)
        dic = {}  # breed_set to index {'B cell': 0, 'Monocyte': 1, 'Dendritic cell': 2, 'T cell': 3}
        label = []  # the label of cell (with index correspond)
        nclass = len(breed_set)

        ntrain = single_cell.shape[0]
        # FeaSize = single_cell.shape[1]
        feature_size = single_cell.shape[1]
        assert nclass == len(cell_target_num.keys()), "cell type num no match!!!"

        for i in range(len(breed_set)):
            dic[breed_2_list[i]] = i
        cell = data["input_sc_meta"]["Cell"].values

        for i in range(cell.shape[0]):
            label.append(dic[breed_np[i]])

        label = np.array(label)

        # label index the data size of corresponding target
        cell_number_target_num = {}
        for k, v in cell_target_num.items():
            cell_number_target_num[dic[k]] = v

        return single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, nclass, ntrain, feature_size

    def __save_generation(self, generate_sc_meta, generate_sc_data, generate_save_dir,
                          generate_save_name, ):
        # saving.....
        if not os.path.exists(generate_save_dir):
            os.makedirs(generate_save_dir)
        path_label_generate_csv = os.path.join(generate_save_dir, f"{generate_save_name}_sc_celltype.csv")
        path_cell_generate_csv = os.path.join(generate_save_dir, f"{generate_save_name}_sc_data.csv")

        generate_sc_meta.to_csv(path_label_generate_csv)
        generate_sc_data.to_csv(path_cell_generate_csv)
        print(f"saving to {path_label_generate_csv} and {path_cell_generate_csv}.")

__init__(bulk_data, single_data, celltype_key, bulk_group=None, max_single_cells=5000, top_marker_num=500, ratio_num=1, gpu=0)

Initializes the Bulk2Single class.

Parameters:

Name Type Description Default
bulk_data pd.DataFrame

The bulk RNA-seq data.

required
single_data anndata.AnnData

The single-cell RNA-seq data.

required
celltype_key str

The name of the column in the bulk data containing cell types.

required
top_marker_num int

The number of top markers to select per cell type. Default is 500.

500
ratio_num int

The ratio between the number of single cells and target number of converted cells. Default is 1.

1
gpu Union[int, str]

The ID of the GPU to use. Set to -1 to use CPU. Default is 0. If set to 'mps', the MPS backend will be used.

0
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def __init__(self,bulk_data:pd.DataFrame,single_data:anndata.AnnData,
             celltype_key:str,bulk_group=None,max_single_cells:int=5000,
             top_marker_num:int=500,ratio_num:int=1,gpu:Union[int,str]=0):
    """
    Initializes the Bulk2Single class.

    Arguments:
        bulk_data: The bulk RNA-seq data.
        single_data: The single-cell RNA-seq data.
        celltype_key: The name of the column in the bulk data containing cell types.
        top_marker_num: The number of top markers to select per cell type. Default is 500.
        ratio_num: The ratio between the number of single cells and target number of converted cells. Default is 1.
        gpu: The ID of the GPU to use. Set to -1 to use CPU. Default is 0. If set to 'mps', the MPS backend will be used.

    """
    single_data.var_names_make_unique()
    bulk_data=data_drop_duplicates_index(bulk_data)
    self.bulk_data=bulk_data
    self.single_data=single_data

    if self.single_data.shape[0]>max_single_cells:
        print(f"......random select {max_single_cells} single cells")
        import random
        cell_idx=random.sample(self.single_data.obs.index.tolist(),max_single_cells)
        self.single_data=self.single_data[cell_idx,:]
    self.celltype_key=celltype_key
    self.bulk_group=bulk_group
    self.input_data=None
    #self.input_data=bulk2single_data_prepare(bulk_data,single_data,celltype_key)
    #self.cell_target_num = data_process(self.input_data, top_marker_num, ratio_num)

    test2=single_data.to_df()
    sc_ref=pd.DataFrame(columns=test2.columns)
    sc_ref_index=[]
    for celltype in list(set(single_data.obs[celltype_key])):
        sc_ref.loc[celltype]=single_data[single_data.obs[celltype_key]==celltype].to_df().sum()
        sc_ref_index.append(celltype)
    sc_ref.index=sc_ref_index
    self.sc_ref=sc_ref


    if gpu=='mps' and torch.backends.mps.is_available():
        print('Note that mps may loss will be nan, used it when torch is supported')
        self.used_device = torch.device("mps")
    else:
        self.used_device = torch.device(f"cuda:{gpu}") if gpu >= 0 and torch.cuda.is_available() else torch.device('cpu')
    self.history=[]

train(vae_save_dir='save_model', vae_save_name='vae', generate_save_dir='output', generate_save_name='output', batch_size=512, learning_rate=0.0001, hidden_size=256, epoch_num=5000, patience=50, save=True)

Trains the VAE model.

Parameters:

Name Type Description Default
vae_save_dir str

The directory to save the trained VAE model. Default is 'save_model'.

'save_model'
vae_save_name str

The name of the saved VAE model. Default is 'vae'.

'vae'
generate_save_dir str

The directory to save the generated single-cell data. Default is 'output'.

'output'
generate_save_name str

The name of the saved generated single-cell data. Default is 'output'.

'output'
batch_size int

The batch size for training. Default is 512.

512
learning_rate int

The learning rate for training. Default is 1e-4.

0.0001
hidden_size int

The hidden size for the encoder and decoder networks. Default is 256.

256
epoch_num int

The maximum number of epochs for training. Default is 5.

5000
patience int

The number of epochs to wait before early stopping. Default is 50.

50
save bool

Whether to save the trained VAE model. Default is True.

True

Returns:

Name Type Description
vae_net torch.nn.Module

The trained VAE model.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def train(self,
        vae_save_dir:str='save_model',
        vae_save_name:str='vae',
        generate_save_dir:str='output',
        generate_save_name:str='output',
        batch_size:int=512,
        learning_rate:int=1e-4,
        hidden_size:int=256,
        epoch_num:int=5000,
        patience:int=50,save:bool=True)->torch.nn.Module:
    """
    Trains the VAE model.

    Arguments:
        vae_save_dir: The directory to save the trained VAE model. Default is 'save_model'.
        vae_save_name: The name of the saved VAE model. Default is 'vae'.
        generate_save_dir: The directory to save the generated single-cell data. Default is 'output'.
        generate_save_name: The name of the saved generated single-cell data. Default is 'output'.
        batch_size: The batch size for training. Default is 512.
        learning_rate: The learning rate for training. Default is 1e-4.
        hidden_size: The hidden size for the encoder and decoder networks. Default is 256.
        epoch_num: The maximum number of epochs for training. Default is 5.
        patience: The number of epochs to wait before early stopping. Default is 50.
        save: Whether to save the trained VAE model. Default is True.

    Returns:
        vae_net: The trained VAE model.
    """
    if self.input_data==None:
        self.prepare_input()
    single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
    nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
    print('...begin vae training')
    vae_net,history = train_vae(single_cell,
                        label,
                        self.used_device,
                        batch_size,
                        feature_size=feature_size,
                        epoch_num=epoch_num,
                        learning_rate=learning_rate,
                        hidden_size=hidden_size,
                        patience=patience,)
    print('...vae training done!')
    if save:
        path_save = os.path.join(vae_save_dir, f"{vae_save_name}.pth")
        if not os.path.exists(vae_save_dir):
            os.makedirs(vae_save_dir)
        torch.save(vae_net.state_dict(), path_save)
        print(f"...save trained vae in {path_save}.")
        import pickle
        #save cell_target_num
        with open(os.path.join(vae_save_dir, f"{vae_save_name}_cell_target_num.pkl"), 'wb') as f:
            pickle.dump(self.cell_target_num, f)
    self.vae_net=vae_net
    self.history=history
    return vae_net
    print('generating....')
    generate_sc_meta, generate_sc_data = generate_vae(vae_net, -1,
                                                      single_cell, label, breed_2_list,
                                                      index_2_gene, cell_number_target_num, used_device)
    sc_g=anndata.AnnData(generate_sc_data.T)
    sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,self.celltype_key].values
    sc_g.write_h5ad(os.path.join(generate_save_dir, f"{generate_save_name}.h5ad"), compression='gzip')
    self.__save_generation(generate_sc_meta, generate_sc_data,
                           generate_save_dir, generate_save_name)
    return sc_g

generate()

Generate the single-cell data.

Returns:

Name Type Description
sc_g anndata.AnnData

The generated single-cell data.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def generate(self)->anndata.AnnData:
    r"""
    Generate the single-cell data.

    Returns:
        sc_g: The generated single-cell data.
    """
    single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
    nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
    print('...generating')
    generate_sc_meta, generate_sc_data = generate_vae(self.vae_net, -1,
                                                      single_cell, label, breed_2_list,
                                                      index_2_gene, cell_number_target_num, self.used_device)
    generate_sc_meta.set_index('Cell',inplace=True)
    sc_g=anndata.AnnData(generate_sc_data.T)
    sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,'Cell_type'].values
    return sc_g

load(vae_load_dir, hidden_size=256)

load the trained VAE model of Bulk2Single.

Parameters:

Name Type Description Default
vae_load_dir str

The directory to load the trained VAE model.

required
hidden_size int

The hidden size for the encoder and decoder networks. Default is 256.

256
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def load(self,vae_load_dir:str,hidden_size:int=256):
    r"""
    load the trained VAE model of Bulk2Single.

    Arguments:
        vae_load_dir: The directory to load the trained VAE model.
        hidden_size: The hidden size for the encoder and decoder networks. Default is 256.
    """

    single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
    nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
    print(f'loading model from {vae_load_dir}')
    vae_net = load_vae(feature_size, hidden_size, vae_load_dir, self.used_device)
    self.vae_net=vae_net

load_and_generate(vae_load_dir, hidden_size=256)

load the trained VAE model of Bulk2Single and generate the single-cell data.

Parameters:

Name Type Description Default
vae_load_dir str

The directory to load the trained VAE model.

required
hidden_size int

The hidden size for the encoder and decoder networks. Default is 256.

256

Returns:

Name Type Description
sc_g anndata.AnnData

The generated single-cell data.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def load_and_generate(self,
                          vae_load_dir:str,  # load_dir
                          hidden_size:int=256)->anndata.AnnData:
    r"""
    load the trained VAE model of Bulk2Single and generate the single-cell data.

    Arguments:
        vae_load_dir: The directory to load the trained VAE model.
        hidden_size: The hidden size for the encoder and decoder networks. Default is 256.

    Returns:
        sc_g: The generated single-cell data.
    """
    single_cell, label, breed_2_list, index_2_gene, cell_number_target_num, \
    nclass, ntrain, feature_size = self.__get_model_input(self.input_data, self.cell_target_num)
    print(f'loading model from {vae_load_dir}')
    vae_net = load_vae(feature_size, hidden_size, vae_load_dir, self.used_device)
    print('...generating')
    generate_sc_meta, generate_sc_data = generate_vae(vae_net, -1,
                                                      single_cell, label, breed_2_list,
                                                      index_2_gene, cell_number_target_num, self.used_device)
    generate_sc_meta.set_index('Cell',inplace=True)
    #return generate_sc_meta, generate_sc_data
    sc_g=anndata.AnnData(generate_sc_data.T)
    sc_g.obs[self.celltype_key] = generate_sc_meta.loc[sc_g.obs.index,'Cell_type'].values

    print('...generating done!')
    return sc_g

plot_loss(figsize=(4, 4))

plot the loss curve of the trained VAE model.

Parameters:

Name Type Description Default
figsize tuple

The size of the figure. Default is (4,4).

(4, 4)

Returns:

Name Type Description
fig matplotlib.figure.Figure

The figure of the loss curve.

ax matplotlib.axes._axes.Axes

The axes of the figure.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/bulk2single/_bulk2single.py
def plot_loss(self,figsize:tuple=(4,4))->Tuple[matplotlib.figure.Figure,matplotlib.axes._axes.Axes]:
    r"""
    plot the loss curve of the trained VAE model.

    Arguments:
        figsize: The size of the figure. Default is (4,4).

    Returns:
        fig: The figure of the loss curve.
        ax: The axes of the figure.
    """
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(range(len(self.history)),self.history)
    ax.set_title('Beta-VAE')
    ax.set_ylabel('Loss')
    ax.set_xlabel('Epochs')
    return fig,ax