ShouWenWang-Lab / cospar

Further development of CoSpar happens here!
MIT License
8 stars 2 forks source link

how to get randomized clone plot #4

Closed ShouWenWang closed 10 months ago

ShouWenWang commented 10 months ago

How to get this plot from Cospar

image
ShouWenWang commented 10 months ago

I have renamed this function as clonal_fate_bias_v0. And you can call it from cospar.pl.clonal_fate_bias_v0 from the current CoSpar package.

     def clonal_fate_bias_v0(adata,selected_fate='',clone_size_thresh=3,
N_resampling=1000,compute_new=True,show_histogram=True):

Or, you can use the early version of the cospar. I checked that running earlier CoSpar version (e.g, 0.0.11) could get this plot. I have also attached the corresponding code and its output below. To get the early version, you may either download CoSpar from https://github.com/ShouWenWang-Lab/cospar, and then use ‘git checkout …’ to get the early version. You will need to be familiar with the git tool.

Running cospar 0.0.11 (python 3.6.12) on 2021-03-26 13:46.

image
ShouWenWang commented 9 months ago

You can use the following function clonal_fate_bias_v0 to get this randomized clone fate bias plot.

To see this function,

git clone https://github.com/ShouWenWang-Lab/cospar
git checkout 3d39e4d

This function is located at cospar.plotting.clonal_fate_bias_v0.

# this is based on direct simulation, providing the ranked profile for randomized clones
def clonal_fate_bias_v0(adata,selected_fate='',clone_size_thresh=3,
    N_resampling=1000,compute_new=True,show_histogram=True):
    """
    Plot clonal fate bias towards a cluster.

    This is just -log(P-value), where P-value is for the observation 
    cell fraction of a clone in the targeted cluster as compared to 
    randomized clones, where the randomized sampling produces clones 
    of the same size as the targeted clone. The computed results will 
    be saved at the directory settings.data_path.

    This function is based on direct simulation, which is time-consuming. 
    The time cost scales linearly with the number of resampling 
    and the number of clones. It provides a ranked profile
    for randomized clones.

    Parameters
    ----------
    adata: :class:`~anndata.AnnData` object
    selected_fate: `str`
        The targeted fate cluster, from adata.obs['state_info'].
    clone_size_thresh: `int`, optional (default: 3)
        Clones with size >= this threshold will be highlighted in 
        the plot in red.
    N_resampling: `int`, optional (default: 1000)
        The number of randomized sampling for assessing the P-value of a clone. 
    compute_new: `bool`, optional (default: True)
        Compute from scratch, regardless of existing saved files. 
    show_histogram: `bool`, optional (default: True)
        If true, show the distribution of inferred fate probability.

    Returns
    -------
    fate_bias: `np.array`
        Computed clonal fate bias.
    sort_idx: `np.array`
        Corresponding clone id list. 
    """

    fig_width=settings.fig_width; fig_height=settings.fig_height; point_size=settings.fig_point_size
    state_annote_0=adata.obs['state_info']
    data_des=adata.uns['data_des'][-1]
    clonal_matrix=adata.obsm['X_clone']
    data_path=settings.data_path
    data_des=adata.uns['data_des'][-1]
    figure_path=settings.figure_path
    state_list=list(set(state_annote_0))

    valid_clone_idx=(clonal_matrix.sum(0).A.flatten()>=2)
    valid_clone_id=np.nonzero(valid_clone_idx)[0]
    sel_cell_idx=(clonal_matrix[:,valid_clone_idx].sum(1).A>0).flatten()
    sel_cell_id=np.nonzero(sel_cell_idx)[0]
    clone_N=np.sum(valid_clone_idx)
    cell_N=len(sel_cell_id)

    ## annotate the state_annot
    clonal_matrix_new=clonal_matrix[sel_cell_id][:,valid_clone_id]
    state_annote_new=state_annote_0[sel_cell_id]
    hit_target=np.zeros(len(sel_cell_id),dtype=bool)

    if type(selected_fate)==str:
        selected_fate=[selected_fate]

    mega_cluster_list,valid_fate_list,fate_array_flat,sel_index_list=hf.analyze_selected_fates(selected_fate,state_annote_new)
    if len(mega_cluster_list)==0:
        logg.error("No cells selected. Computation aborted!")
        return None, None 
    else:
        fate_name=mega_cluster_list[0]
        target_idx=sel_index_list[0]

        file_name=f'{data_path}/{data_des}_clonal_fate_bias_{N_resampling}_{fate_name}.npz'

        if (not os.path.exists(file_name)) or compute_new:

            ## target clone
            target_ratio_array=np.zeros(clone_N)

            null_ratio_array=np.zeros((clone_N,N_resampling))
            P_value_up=np.zeros(clone_N)
            P_value_down=np.zeros(clone_N)
            P_value=np.zeros(clone_N)
            P_value_rsp=np.zeros((clone_N,N_resampling))

            for m in range(clone_N):
                if m%50==0:
                    logg.info(f"Current clone id: {m}")
                target_cell_idx=(clonal_matrix_new[:,m].sum(1).A>0).flatten()
                target_clone_size=np.sum(target_cell_idx) 

                if target_clone_size>0:
                    target_ratio=np.sum(target_idx[target_cell_idx])/target_clone_size
                    target_ratio_array[m]=target_ratio
                    #N_resampling=int(np.floor(cell_N/target_clone_size))

                    sel_cell_id_copy=list(np.arange(cell_N))

                    for j in range(N_resampling):
                        temp_id=np.random.choice(sel_cell_id_copy,size=target_clone_size,replace=False)
                        null_ratio_array[m,j]=np.sum(target_idx[temp_id])/target_clone_size

                    ## reprogamming clone
                    P_value_up[m]=np.sum(null_ratio_array[m]>=target_ratio)/N_resampling
                    P_value_down[m]=np.sum(null_ratio_array[m]<=target_ratio)/N_resampling
                    P_value[m]=np.min([P_value_up[m],P_value_down[m]])

                    for j1,zz in enumerate(null_ratio_array[m]):
                        P_value_up_rsp=np.sum(null_ratio_array[m]>=zz)/N_resampling
                        P_value_down_rsp=np.sum(null_ratio_array[m]<=zz)/N_resampling
                        P_value_rsp[m,j1]=np.min([P_value_up_rsp,P_value_down_rsp])            

            np.savez(file_name,P_value_rsp=P_value_rsp,P_value=P_value)

        else:
            saved_data=np.load(file_name,allow_pickle=True)
            P_value_rsp=saved_data['P_value_rsp']
            P_value=saved_data['P_value']
            #target_ratio_array=saved_data['target_ratio_array']

        ####### Plotting
        clone_size_array=clonal_matrix_new.sum(0).A.flatten()

        resol=1/N_resampling
        P_value_rsp_new=P_value_rsp.reshape((clone_N*N_resampling,))
        sort_idx=np.argsort(P_value_rsp_new)
        P_value_rsp_new=P_value_rsp_new[sort_idx]+resol
        sel_idx=((np.arange(clone_N)+1)*len(sort_idx)/clone_N).astype(int)-1
        fate_bias_rsp=-np.log10(P_value_rsp_new[sel_idx])

        sort_idx=np.argsort(P_value)
        P_value=P_value[sort_idx]+resol
        fate_bias=-np.log10(P_value)
        idx=clone_size_array[sort_idx]>=clone_size_thresh

        fig=plt.figure(figsize=(fig_width,fig_height));ax=plt.subplot(1,1,1)
        ax.plot(np.arange(len(fate_bias))[~idx],fate_bias[~idx],'.',color='blue',markersize=5,label=f'Size $<$ {int(clone_size_thresh)}')#,markeredgecolor='black',markeredgewidth=0.2)
        ax.plot(np.arange(len(fate_bias))[idx],fate_bias[idx],'.',color='red',markersize=5,label=f'Size $\ge$ {int(clone_size_thresh)}')#,markeredgecolor='black',markeredgewidth=0.2)
        ax.plot(np.arange(len(fate_bias_rsp)),fate_bias_rsp,'.',color='grey',markersize=5,label='Randomized')#,markeredgecolor='black',markeredgewidth=0.2)

        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.set_xlabel('Clone rank')
        plt.rc('text', usetex=True)
        #ax.set_ylabel('Fate bias ($-\\log_{10}P_{value}$)')
        ax.set_ylabel('Clonal fate bias')
        ax.legend()
        #ax.set_xlim([0,0.8])
        fig.tight_layout()
        fig.savefig(f'{figure_path}/{data_des}_clonal_fate_bias.{settings.file_format_figs}')
        plt.rc('text', usetex=False)
        #plt.show()

        if show_histogram:
            target_fraction_array=(clonal_matrix_new.T*target_idx)/clone_size_array
            fig=plt.figure(figsize=(fig_width,fig_height));ax=plt.subplot(1,1,1)
            ax.hist(target_fraction_array,color='#2ca02c',density=True)
            ax.set_xlim([0,1])
            ax.set_xlabel('Clonal fraction in selected fates')
            ax.set_ylabel('Density')
            ax.spines["top"].set_visible(False)
            ax.spines["right"].set_visible(False)
            ax.set_title(f'Average: {int(np.mean(target_fraction_array)*100)/100};   Expect: {int(np.mean(target_idx)*100)/100}')
            fig.savefig(f'{figure_path}/{data_des}_observed_clonal_fraction.{settings.file_format_figs}')

        return fate_bias,sort_idx,clone_size_array[sort_idx]