Skip to content

Api scdrug

omicverse.single.Drug_Response

Drug_Response class for drug response prediction. The raw code could be found at https://github.com/ailabstw/scDrug

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
class Drug_Response:
    r"""
    Drug_Response class for drug response prediction.
    The raw code could be found at https://github.com/ailabstw/scDrug
    """
    def __init__(self,adata,scriptpath,modelpath,output='./',model='GDSC',clusters='All',
                 cell='A549',cpus=4,n_drugs=10):

        """
        Initializes the Drug_Response class.

        Arguments:
            adata: Annotated data matrix with cells as rows and genes as columns.
            scriptpath: Path to the directory containing the CaDRReS scripts for the analysis. You need to download the scripts according to `git clone https://github.com/CSB5/CaDRReS-Sc.git` and set the path to the directory.
            modelpath: Path to the directory containing the pre-trained models.
                You need to download the models according to `Pyomic.utils.download_GDSC_data()` and `Pyomic.utils.download_CaDRReS_model()` and set the path to the directory.
            output: Path to the directory where the output files will be saved (default: './').
            model: The name of the pre-trained model to be used for the analysis (default: 'GDSC').
            clusters: The cluster labels to be used for the analysis. Default is all cells (default: 'All').
            cell: The cell line to be analyzed (default: 'A549').
            cpus: The number of CPUs to be used for the analysis (default: 4).
            n_drugs: The number of top drugs to be selected based on the predicted sensitivity (default: 10).

        Returns:
            None
        """

        self.model = model
        self.adata=adata
        self.clusters=clusters
        self.output=output
        self.n_drugs=n_drugs
        self.modelpath=modelpath

        self.scriptpath = scriptpath
        sys.path.append(os.path.abspath(scriptpath))

        from cadrres_sc import pp, model, evaluation, utility

        self.load_model()
        self.drug_info()
        self.bulk_exp()
        self.sc_exp()
        self.kernel_feature_preparartion()
        self.sensitivity_prediction()
        if self.model == 'GDSC':
            self.masked_drugs = list(pd.read_csv(self.modelpath+'masked_drugs.csv')['GDSC'].dropna().astype('int64').astype('str'))
            self.cell_death_proportion()
        else:
            self.masked_drugs = list(pd.read_csv(self.modelpath+'masked_drugs.csv')['PRISM'])
        self.output_result()
        self.figure_output()

    def load_model(self):
        r"""
        load the pre-trained model.

        """
        from cadrres_sc import pp, model, evaluation, utility
        ### IC50/AUC prediction
        ## Read pre-trained model
        #model_dir = '/Users/fernandozeng/Desktop/analysis/scDrug/CaDRReS-Sc-model/'
        model_dir = self.modelpath
        obj_function = widgets.Dropdown(options=['cadrres-wo-sample-bias', 'cadrres-wo-sample-bias-weight'], description='Objetice function')
        self.model_spec_name = obj_function.value
        if self.model == 'GDSC':
            model_file = model_dir + '{}_param_dict_all_genes.pickle'.format(self.model_spec_name)
        elif self.model == 'PRISM':
            model_file = model_dir + '{}_param_dict_prism.pickle'.format(self.model_spec_name)
        else:
            sys.exit('Wrong model name.')
        self.cadrres_model = model.load_model(model_file)

    def drug_info(self):
        r"""
        read the drug information.

        """
        ## Read drug information
        if self.model == 'GDSC':
            self.drug_info_df = pd.read_csv(self.scriptpath + '/preprocessed_data/GDSC/drug_stat.csv', index_col=0)
            self.drug_info_df.index = self.drug_info_df.index.astype(str)
        else:
            self.drug_info_df = pd.read_csv(self.scriptpath + '/preprocessed_data/PRISM/PRISM_drug_info.csv', index_col='broad_id')

    def bulk_exp(self):
        r"""
        extract the bulk gene expression data.

        """
        ## Read test data
        if self.model == 'GDSC':
            #GDSC_exp exists in the data folder
            files=os.listdir(self.scriptpath + '/data/GDSC')
            if 'GDSC_exp.tsv' not in files:
                self.gene_exp_df = pd.read_csv(self.modelpath + 'GDSC_exp.tsv.gz', sep='\t', index_col=0)
                self.gene_exp_df = self.gene_exp_df.groupby(self.gene_exp_df.index).mean()
            else:
                self.gene_exp_df = pd.read_csv(self.scriptpath + '/data/GDSC/GDSC_exp.tsv', sep='\t', index_col=0)
                self.gene_exp_df = self.gene_exp_df.groupby(self.gene_exp_df.index).mean()
        else:
            self.gene_exp_df = pd.read_csv(self.scriptpath + '/data/CCLE/CCLE_expression.csv', low_memory=False, index_col=0).T
            self.gene_exp_df.index = [gene.split(sep=' (')[0] for gene in self.gene_exp_df.index]

    def sc_exp(self):
        r"""
        Load cluster-specific gene expression profile
        """
        ## Load cluster-specific gene expression profile
        if self.clusters == 'All':
            clusters = sorted(self.adata.obs['louvain'].unique(), key=int)
        else:
            clusters = [x.strip() for x in self.clusters.split(',')]

        self.cluster_norm_exp_df = pd.DataFrame(columns=clusters, index=self.adata.raw.var.index)
        for cluster in clusters:
            self.cluster_norm_exp_df[cluster] =  self.adata.raw.X[self.adata.obs['louvain']==cluster].mean(axis=0).T \
                                                 if np.sum(self.adata.raw.X[self.adata.obs['louvain']==cluster]) else 0.0

    def kernel_feature_preparartion(self):
        r"""
        kernel feature preparation

        """
        from cadrres_sc import pp, model, evaluation, utility
        ## Read essential genes list
        if self.model == 'GDSC':
            ess_gene_list = self.gene_exp_df.index.dropna().tolist()
        else:
            ess_gene_list = utility.get_gene_list(self.scriptpath + '/preprocessed_data/PRISM/feature_genes.txt')

        ## Calculate fold-change
        cell_line_log2_mean_fc_exp_df, cell_line_mean_exp_df = pp.gexp.normalize_log2_mean_fc(self.gene_exp_df)

        self.adata_exp_mean = pd.Series(self.adata.raw.X.mean(axis=0).tolist()[0], index=self.adata.raw.var.index)
        cluster_norm_exp_df = self.cluster_norm_exp_df.sub(self.adata_exp_mean, axis=0)

        ## Calculate kernel feature
        self.test_kernel_df = pp.gexp.calculate_kernel_feature(cluster_norm_exp_df, cell_line_log2_mean_fc_exp_df, ess_gene_list)

    def sensitivity_prediction(self):
        r"""
        Predict drug sensitivity

        """
        from cadrres_sc import pp, model, evaluation, utility
        ## Drug response prediction
        if self.model == 'GDSC':
            print('...Predicting drug response for using CaDRReS(GDSC): {}'.format(self.model_spec_name))
            self.pred_ic50_df, P_test_df= model.predict_from_model(self.cadrres_model, self.test_kernel_df, self.model_spec_name)
            print('...done!')
        else:
            print('...Predicting drug response for using CaDRReS(PRISM): {}'.format(self.model_spec_name))
            self.pred_auc_df, P_test_df= model.predict_from_model(self.cadrres_model, self.test_kernel_df, self.model_spec_name)
            print('...done!')

    def cell_death_proportion(self):
        r"""
        Predict cell death proportion and cell death percentage at the ref_type dosage

        """
        ### Drug kill prediction
        ref_type = 'log2_median_ic50'
        self.drug_list = [x for x in self.pred_ic50_df.columns if not x in self.masked_drugs]
        self.drug_info_df = self.drug_info_df.loc[self.drug_list]
        self.pred_ic50_df = self.pred_ic50_df.loc[:,self.drug_list]

        ## Predict cell death percentage at the ref_type dosage
        pred_delta_df = pd.DataFrame(self.pred_ic50_df.values - self.drug_info_df[ref_type].values, columns=self.pred_ic50_df.columns)
        pred_cv_df = 100 / (1 + (np.power(2, -pred_delta_df)))
        self.pred_kill_df = 100 - pred_cv_df

    def output_result(self):
        if self.model == 'GDSC':
            drug_df = pd.DataFrame({'Drug ID': self.drug_list, 
                                    'Drug Name': [self.drug_info_df.loc[drug_id]['Drug Name'] for drug_id in self.drug_list]})
            self.pred_ic50_df = (self.pred_ic50_df.T-self.pred_ic50_df.min(axis=1))/(self.pred_ic50_df.max(axis=1)-self.pred_ic50_df.min(axis=1))
            self.pred_ic50_df = self.pred_ic50_df.T
            self.pred_ic50_df.columns = pd.MultiIndex.from_frame(drug_df)
            self.pred_ic50_df.round(3).to_csv(os.path.join(self.output, 'IC50_prediction.csv'))
            self.pred_kill_df.columns = pd.MultiIndex.from_frame(drug_df)
            self.pred_kill_df.round(3).to_csv(os.path.join(self.output, 'drug_kill_prediction.csv'))
        else:
            drug_list = list(self.pred_auc_df.columns)
            drug_list  = [d for d in drug_list if d not in self.masked_drugs]
            drug_df = pd.DataFrame({'Drug ID':drug_list,
                                    'Drug Name':[self.drug_info_df.loc[d, 'name'] for d in drug_list]})
            self.pred_auc_df = self.pred_auc_df.loc[:,drug_list].T
            self.pred_auc_df = (self.pred_auc_df-self.pred_auc_df.min())/(self.pred_auc_df.max()-self.pred_auc_df.min())
            self.pred_auc_df = self.pred_auc_df.T
            self.pred_auc_df.columns = pd.MultiIndex.from_frame(drug_df)
            self.pred_auc_df.round(3).to_csv(os.path.join(self.output, 'PRISM_prediction.csv'))

    def draw_plot(self, df, n_drug=10, name='', figsize=()):
        r"""
        plot heatmap of drug response prediction

        Arguments:
            df : `pandas.DataFrame` drug response prediction dataframe
            n_drug : `int` number of drugs to be plotted
            name : `str` name of the plot
            figsize : `tuple` size of the plot
        """
        def select_drug(df, n_drug):
            selected_drugs = []
            df_tmp = df.reset_index().set_index('Drug Name').iloc[:, 1:]
            for cluster in sorted([x for x in df_tmp.columns], key=int):
                for drug_name in df_tmp.sort_values(by=cluster, ascending=False).index[:n_drug].values:
                    if drug_name not in selected_drugs:
                        selected_drugs.append(drug_name)
            df_tmp = df_tmp.loc[selected_drugs, :]
            return df_tmp

        if self.model == 'GDSC':
            fig, ax = plt.subplots(figsize=figsize) 
            sns.heatmap(df.iloc[:n_drug,:-1], cmap='Blues', \
                        linewidths=0.5, linecolor='lightgrey', cbar=True, cbar_kws={'shrink': .2, 'label': 'Drug Sensitivity'}, ax=ax)
            ax.set_xlabel('Cluster', fontsize=20)
            ax.set_ylabel('Drug', fontsize=20)
            ax.figure.axes[-1].yaxis.label.set_size(20)
            for _, spine in ax.spines.items():
                spine.set_visible(True)
                spine.set_color('lightgrey') 
            plt.savefig(os.path.join(self.output, '{}.png'.format(name)), bbox_inches='tight', dpi=200)
            plt.close()

        else:
            fig, ax = plt.subplots(figsize=(df.shape[1], int(n_drug*df.shape[1]/5))) 
            sns.heatmap(select_drug(df, n_drug), cmap='Reds', \
                        linewidths=0.5, linecolor='lightgrey', cbar=True, cbar_kws={'shrink': .2, 'label': 'Drug Sensitivity'}, ax=ax, vmin=0, vmax=1)
            ax.set_xlabel('Cluster', fontsize=20)
            ax.set_ylabel('Drug', fontsize=20)
            ax.figure.axes[-1].yaxis.label.set_size(20)
            for _, spine in ax.spines.items():
                spine.set_visible(True)
                spine.set_color('lightgrey') 
            plt.savefig(os.path.join(self.output, '{}.png'.format(name)), bbox_inches='tight', dpi=200)
            plt.close()

    def figure_output(self):
        r"""
        plot figures

        """
        print('...Ploting figures...')
        ## GDSC figures
        if self.model == 'GDSC':
            tmp_pred_ic50_df = self.pred_ic50_df.T
            tmp_pred_ic50_df = tmp_pred_ic50_df.assign(sum=tmp_pred_ic50_df.sum(axis=1)).sort_values(by='sum', ascending=True)
            self.draw_plot(tmp_pred_ic50_df, name='GDSC prediction', figsize=(12,40))
            tmp_pred_kill_df = self.pred_kill_df.T
            tmp_pred_kill_df = tmp_pred_kill_df.loc[(tmp_pred_kill_df>=50).all(axis=1)]
            tmp_pred_kill_df = tmp_pred_kill_df.assign(sum=tmp_pred_kill_df.sum(axis=1)).sort_values(by='sum', ascending=False)
            self.draw_plot(tmp_pred_kill_df, n_drug=10, name='predicted cell death', figsize=(12,8))

        ## PRISM figures
        else:
            tmp_pred_auc_df = self.pred_auc_df.T
            #tmp_pred_auc_df = tmp_pred_auc_df.assign(sum=tmp_pred_auc_df.sum(axis=1)).sort_values(by='sum', ascending=True)
            self.draw_plot(tmp_pred_auc_df, n_drug=self.n_drugs, name='PRISM prediction')  
        print('done!')  

__init__(adata, scriptpath, modelpath, output='./', model='GDSC', clusters='All', cell='A549', cpus=4, n_drugs=10)

Initializes the Drug_Response class.

Parameters:

Name Type Description Default
adata

Annotated data matrix with cells as rows and genes as columns.

required
scriptpath

Path to the directory containing the CaDRReS scripts for the analysis. You need to download the scripts according to git clone https://github.com/CSB5/CaDRReS-Sc.git and set the path to the directory.

required
modelpath

Path to the directory containing the pre-trained models. You need to download the models according to Pyomic.utils.download_GDSC_data() and Pyomic.utils.download_CaDRReS_model() and set the path to the directory.

required
output

Path to the directory where the output files will be saved (default: './').

'./'
model

The name of the pre-trained model to be used for the analysis (default: 'GDSC').

'GDSC'
clusters

The cluster labels to be used for the analysis. Default is all cells (default: 'All').

'All'
cell

The cell line to be analyzed (default: 'A549').

'A549'
cpus

The number of CPUs to be used for the analysis (default: 4).

4
n_drugs

The number of top drugs to be selected based on the predicted sensitivity (default: 10).

10

Returns:

Type Description

None

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def __init__(self,adata,scriptpath,modelpath,output='./',model='GDSC',clusters='All',
             cell='A549',cpus=4,n_drugs=10):

    """
    Initializes the Drug_Response class.

    Arguments:
        adata: Annotated data matrix with cells as rows and genes as columns.
        scriptpath: Path to the directory containing the CaDRReS scripts for the analysis. You need to download the scripts according to `git clone https://github.com/CSB5/CaDRReS-Sc.git` and set the path to the directory.
        modelpath: Path to the directory containing the pre-trained models.
            You need to download the models according to `Pyomic.utils.download_GDSC_data()` and `Pyomic.utils.download_CaDRReS_model()` and set the path to the directory.
        output: Path to the directory where the output files will be saved (default: './').
        model: The name of the pre-trained model to be used for the analysis (default: 'GDSC').
        clusters: The cluster labels to be used for the analysis. Default is all cells (default: 'All').
        cell: The cell line to be analyzed (default: 'A549').
        cpus: The number of CPUs to be used for the analysis (default: 4).
        n_drugs: The number of top drugs to be selected based on the predicted sensitivity (default: 10).

    Returns:
        None
    """

    self.model = model
    self.adata=adata
    self.clusters=clusters
    self.output=output
    self.n_drugs=n_drugs
    self.modelpath=modelpath

    self.scriptpath = scriptpath
    sys.path.append(os.path.abspath(scriptpath))

    from cadrres_sc import pp, model, evaluation, utility

    self.load_model()
    self.drug_info()
    self.bulk_exp()
    self.sc_exp()
    self.kernel_feature_preparartion()
    self.sensitivity_prediction()
    if self.model == 'GDSC':
        self.masked_drugs = list(pd.read_csv(self.modelpath+'masked_drugs.csv')['GDSC'].dropna().astype('int64').astype('str'))
        self.cell_death_proportion()
    else:
        self.masked_drugs = list(pd.read_csv(self.modelpath+'masked_drugs.csv')['PRISM'])
    self.output_result()
    self.figure_output()

load_model()

load the pre-trained model.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def load_model(self):
    r"""
    load the pre-trained model.

    """
    from cadrres_sc import pp, model, evaluation, utility
    ### IC50/AUC prediction
    ## Read pre-trained model
    #model_dir = '/Users/fernandozeng/Desktop/analysis/scDrug/CaDRReS-Sc-model/'
    model_dir = self.modelpath
    obj_function = widgets.Dropdown(options=['cadrres-wo-sample-bias', 'cadrres-wo-sample-bias-weight'], description='Objetice function')
    self.model_spec_name = obj_function.value
    if self.model == 'GDSC':
        model_file = model_dir + '{}_param_dict_all_genes.pickle'.format(self.model_spec_name)
    elif self.model == 'PRISM':
        model_file = model_dir + '{}_param_dict_prism.pickle'.format(self.model_spec_name)
    else:
        sys.exit('Wrong model name.')
    self.cadrres_model = model.load_model(model_file)

sensitivity_prediction()

Predict drug sensitivity

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def sensitivity_prediction(self):
    r"""
    Predict drug sensitivity

    """
    from cadrres_sc import pp, model, evaluation, utility
    ## Drug response prediction
    if self.model == 'GDSC':
        print('...Predicting drug response for using CaDRReS(GDSC): {}'.format(self.model_spec_name))
        self.pred_ic50_df, P_test_df= model.predict_from_model(self.cadrres_model, self.test_kernel_df, self.model_spec_name)
        print('...done!')
    else:
        print('...Predicting drug response for using CaDRReS(PRISM): {}'.format(self.model_spec_name))
        self.pred_auc_df, P_test_df= model.predict_from_model(self.cadrres_model, self.test_kernel_df, self.model_spec_name)
        print('...done!')

cell_death_proportion()

Predict cell death proportion and cell death percentage at the ref_type dosage

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def cell_death_proportion(self):
    r"""
    Predict cell death proportion and cell death percentage at the ref_type dosage

    """
    ### Drug kill prediction
    ref_type = 'log2_median_ic50'
    self.drug_list = [x for x in self.pred_ic50_df.columns if not x in self.masked_drugs]
    self.drug_info_df = self.drug_info_df.loc[self.drug_list]
    self.pred_ic50_df = self.pred_ic50_df.loc[:,self.drug_list]

    ## Predict cell death percentage at the ref_type dosage
    pred_delta_df = pd.DataFrame(self.pred_ic50_df.values - self.drug_info_df[ref_type].values, columns=self.pred_ic50_df.columns)
    pred_cv_df = 100 / (1 + (np.power(2, -pred_delta_df)))
    self.pred_kill_df = 100 - pred_cv_df

draw_plot(df, n_drug=10, name='', figsize=())

plot heatmap of drug response prediction

Parameters:

Name Type Description Default
df

pandas.DataFrame drug response prediction dataframe

required
n_drug

int number of drugs to be plotted

10
name

str name of the plot

''
figsize

tuple size of the plot

()
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def draw_plot(self, df, n_drug=10, name='', figsize=()):
    r"""
    plot heatmap of drug response prediction

    Arguments:
        df : `pandas.DataFrame` drug response prediction dataframe
        n_drug : `int` number of drugs to be plotted
        name : `str` name of the plot
        figsize : `tuple` size of the plot
    """
    def select_drug(df, n_drug):
        selected_drugs = []
        df_tmp = df.reset_index().set_index('Drug Name').iloc[:, 1:]
        for cluster in sorted([x for x in df_tmp.columns], key=int):
            for drug_name in df_tmp.sort_values(by=cluster, ascending=False).index[:n_drug].values:
                if drug_name not in selected_drugs:
                    selected_drugs.append(drug_name)
        df_tmp = df_tmp.loc[selected_drugs, :]
        return df_tmp

    if self.model == 'GDSC':
        fig, ax = plt.subplots(figsize=figsize) 
        sns.heatmap(df.iloc[:n_drug,:-1], cmap='Blues', \
                    linewidths=0.5, linecolor='lightgrey', cbar=True, cbar_kws={'shrink': .2, 'label': 'Drug Sensitivity'}, ax=ax)
        ax.set_xlabel('Cluster', fontsize=20)
        ax.set_ylabel('Drug', fontsize=20)
        ax.figure.axes[-1].yaxis.label.set_size(20)
        for _, spine in ax.spines.items():
            spine.set_visible(True)
            spine.set_color('lightgrey') 
        plt.savefig(os.path.join(self.output, '{}.png'.format(name)), bbox_inches='tight', dpi=200)
        plt.close()

    else:
        fig, ax = plt.subplots(figsize=(df.shape[1], int(n_drug*df.shape[1]/5))) 
        sns.heatmap(select_drug(df, n_drug), cmap='Reds', \
                    linewidths=0.5, linecolor='lightgrey', cbar=True, cbar_kws={'shrink': .2, 'label': 'Drug Sensitivity'}, ax=ax, vmin=0, vmax=1)
        ax.set_xlabel('Cluster', fontsize=20)
        ax.set_ylabel('Drug', fontsize=20)
        ax.figure.axes[-1].yaxis.label.set_size(20)
        for _, spine in ax.spines.items():
            spine.set_visible(True)
            spine.set_color('lightgrey') 
        plt.savefig(os.path.join(self.output, '{}.png'.format(name)), bbox_inches='tight', dpi=200)
        plt.close()

omicverse.single.autoResolution(adata, cpus=4)

Automatically determine clustering resolution

Parameters:

Name Type Description Default
adata

scanpy.AnnData The single cell data.

required
cpus

int, optional (default: 4) The number of cpus used for parallel computing.

4

Returns:

Name Type Description
adata

scanpy.AnnData The single cell data with the clustering resolution.

res

float The clustering resolution.

df_sil

pandas.DataFrame The silhouette score of each clustering resolution.

Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def autoResolution(adata,cpus=4):
    r"""
    Automatically determine clustering resolution

    Arguments:
        adata : `scanpy.AnnData` The single cell data.
        cpus : `int`, optional (default: 4) The number of cpus used for parallel computing.

    Returns:
        adata : `scanpy.AnnData` The single cell data with the clustering resolution.
        res : `float` The clustering resolution.
        df_sil: `pandas.DataFrame` The silhouette score of each clustering resolution.
    """
    print("Automatically determine clustering resolution...")
    start = time.time()
    def subsample_clustering(adata, sample_n, subsample_n, resolution, subsample):
        subadata = adata[subsample]
        sc.tl.louvain(subadata, resolution=resolution)
        cluster = subadata.obs['louvain'].tolist()

        subsampling_n = np.zeros((sample_n, sample_n), dtype=bool)
        coclustering_n = np.zeros((sample_n, sample_n), dtype=bool)

        for i in range(subsample_n):
            for j in range(subsample_n):
                x = subsample[i]
                y = subsample[j]
                subsampling_n[x][y] = True
                if cluster[i] == cluster[j]:
                    coclustering_n[x][y] = True
        return (subsampling_n, coclustering_n)
    rep_n = 5
    subset = 0.8
    sample_n = len(adata.obs)
    subsample_n = int(sample_n * subset)
    resolutions = np.linspace(0.4, 1.4, 6)
    silhouette_avg = {}
    np.random.seed(1)
    best_resolution = 0
    highest_sil = 0
    for r in resolutions:
        r = np.round(r, 1)
        print("Clustering test: resolution = ", r)
        sub_start = time.time()
        subsamples = [np.random.choice(sample_n, subsample_n, replace=False) for t in range(rep_n)]
        p = mp.Pool(cpus)
        func = partial(subsample_clustering, adata, sample_n, subsample_n, r)
        resultList = p.map(func, subsamples)
        p.close()
        p.join()

        subsampling_n = sum([result[0] for result in resultList])
        coclustering_n = sum([result[1] for result in resultList])

        subsampling_n[np.where(subsampling_n == 0)] = 1e6
        distance = 1.0 - coclustering_n / subsampling_n
        np.fill_diagonal(distance, 0.0)

        sc.tl.louvain(adata, resolution=r, key_added = 'louvain_r' + str(r))
        silhouette_avg[str(r)] = silhouette_score(distance, adata.obs['louvain_r' + str(r)], metric="precomputed")
        if silhouette_avg[str(r)] > highest_sil:
            highest_sil = silhouette_avg[str(r)]
            best_resolution = r
        print("robustness score = ", silhouette_avg[str(r)])
        sub_end = time.time()
        print('time: {}', sub_end - sub_start)
        print()
    adata.obs['louvain'] = adata.obs['louvain_r' + str(best_resolution)]
    print("resolution with highest score: ", best_resolution)
    res = best_resolution
    # write silhouette record to uns and remove the clustering results except for the one with the best resolution
    adata.uns['sihouette score'] = silhouette_avg
    # draw lineplot
    df_sil = pd.DataFrame(silhouette_avg.values(), columns=['silhouette score'], index=[float(x) for x in silhouette_avg.keys()])
    df_sil.plot.line(style='.-', color='green', title='Auto Resolution', xticks=resolutions, xlabel='resolution', ylabel='silhouette score', legend=False)
    #pp.savefig()
    #plt.close()
    end = time.time()
    print('time: {}', end-start)
    return adata, res, df_sil

omicverse.single.writeGEP(adata_GEP, path)

Write the gene expression profile to a file

Parameters:

Name Type Description Default
adata_GEP

scanpy.AnnData The single cell data with gene expression profile.

required
path

str The path to save the gene expression profile.

required
Source code in /Users/fernandozeng/miniforge3/envs/scbasset/lib/python3.8/site-packages/omicverse/single/_scdrug.py
def writeGEP(adata_GEP,path):
    r"""
    Write the gene expression profile to a file

    Arguments:
        adata_GEP : `scanpy.AnnData` The single cell data with gene expression profile. 
        path : `str` The path to save the gene expression profile.

    Returns:

    """
    print('Exporting GEP...')
    sc.pp.normalize_total(adata_GEP, target_sum=1e6)
    mat = adata_GEP.X.transpose()
    if type(mat) is not np.ndarray:
        mat = mat.toarray()
    GEP_df = pd.DataFrame(mat, index=adata_GEP.var.index)
    GEP_df.columns = adata_GEP.obs['louvain'].tolist()
    # GEP_df = GEP_df.loc[adata.var.index[adata.var.highly_variable==True]]
    GEP_df.dropna(axis=1, inplace=True)
    GEP_df.to_csv(os.path.join(path, 'GEP.txt'), sep='\t')