saeyslab / multinichenetr

MultiNicheNet: a flexible framework for differential cell-cell communication analysis from multi-sample multi-condition single-cell transcriptomics data
GNU General Public License v3.0
112 stars 14 forks source link

Issue with dplyr::standardise_join_by() with get_abundance_expression_info #15

Closed khain2650 closed 1 year ago

khain2650 commented 1 year ago

Hi all, I am working through the basic analysis steps tutorial with with my dataset. However, I get an error when I try to run get_abundance_expression_info(). Traceback below:

20: stop(fallback)
19: signal_abort(cnd)
18: abort(c("`by` must be supplied when `x` and `y` have no common variables.", 
        i = "use by = character()` to perform a cross-join."))
17: standardise_join_by(by, x_names = x_names, y_names = y_names)
16: join_cols(tbl_vars(x), tbl_vars(y), by = by, suffix = suffix, 
        keep = keep)
15: join_mutate(x, y, by = by, type = "inner", suffix = suffix, na_matches = na_matches, 
        keep = keep)
14: inner_join.data.frame(pseudobulk_counts_celltype$sample %>% data.frame() %>% 
        tibble::rownames_to_column("sample") %>% dplyr::mutate(effective_library_size = lib.size * 
        norm.factors), pseudobulk_counts_celltype$counts %>% data.frame() %>% 
        tibble::rownames_to_column("gene") %>% tidyr::gather(sample, 
        pb_raw, -gene))
13: dplyr::inner_join(pseudobulk_counts_celltype$sample %>% data.frame() %>% 
        tibble::rownames_to_column("sample") %>% dplyr::mutate(effective_library_size = lib.size * 
        norm.factors), pseudobulk_counts_celltype$counts %>% data.frame() %>% 
        tibble::rownames_to_column("gene") %>% tidyr::gather(sample, 
        pb_raw, -gene))
12: FUN(X[[i]], ...)
11: lapply(., function(celltype_oi, pb) {
        pseudobulk_counts_celltype = edgeR::DGEList(pb@assays@data[[celltype_oi]])
        non_zero_samples = pseudobulk_counts_celltype %>% apply(2, 
            sum) %>% .[. > 0] %>% names()
        pseudobulk_counts_celltype = pseudobulk_counts_celltype[, 
            non_zero_samples]
        pseudobulk_counts_celltype = edgeR::calcNormFactors(pseudobulk_counts_celltype)
        pseudobulk_counts_celltype_df = dplyr::inner_join(pseudobulk_counts_celltype$sample %>% 
            data.frame() %>% tibble::rownames_to_column("sample") %>% 
            dplyr::mutate(effective_library_size = lib.size * norm.factors), 
            pseudobulk_counts_celltype$counts %>% data.frame() %>% 
                tibble::rownames_to_column("gene") %>% tidyr::gather(sample, 
                pb_raw, -gene))
        pseudobulk_counts_celltype_df = pseudobulk_counts_celltype_df %>% 
            dplyr::mutate(pb_norm = pb_raw/effective_library_size) %>% 
            dplyr::mutate(pb_sample = log2((pb_norm * 1e+06) + 1)) %>% 
            tibble::as_tibble() %>% dplyr::mutate(celltype = celltype_oi)
    }, pb)
10: list2(...)
9: dplyr::bind_rows(.)
8: dplyr::select(., gene, sample, pb_sample, celltype)
7: dplyr::distinct(.)
6: sce$cluster_id %>% unique() %>% lapply(function(celltype_oi, 
       pb) {
       pseudobulk_counts_celltype = edgeR::DGEList(pb@assays@data[[celltype_oi]])
       non_zero_samples = pseudobulk_counts_celltype %>% apply(2, 
           sum) %>% .[. > 0] %>% names()
       pseudobulk_counts_celltype = pseudobulk_counts_celltype[, 
           non_zero_samples]
       pseudobulk_counts_celltype = edgeR::calcNormFactors(pseudobulk_counts_celltype)
       pseudobulk_counts_celltype_df = dplyr::inner_join(pseudobulk_counts_celltype$sample %>% 
           data.frame() %>% tibble::rownames_to_column("sample") %>% 
           dplyr::mutate(effective_library_size = lib.size * norm.factors), 
           pseudobulk_counts_celltype$counts %>% data.frame() %>% 
               tibble::rownames_to_column("gene") %>% tidyr::gather(sample, 
               pb_raw, -gene))
       pseudobulk_counts_celltype_df = pseudobulk_counts_celltype_df %>% 
           dplyr::mutate(pb_norm = pb_raw/effective_library_size) %>% 
           dplyr::mutate(pb_sample = log2((pb_norm * 1e+06) + 1)) %>% 
           tibble::as_tibble() %>% dplyr::mutate(celltype = celltype_oi)
   }, pb) %>% dplyr::bind_rows() %>% dplyr::select(gene, sample, 
       pb_sample, celltype) %>% dplyr::distinct()
5: get_pseudobulk_logCPM_exprs(sce, sample_id = sample_id, celltype_id = celltype_id, 
       group_id = group_id, batches = batches, assay_oi_pb = "counts", 
       fun_oi_pb = "sum")
4: get_avg_frac_exprs_abund(sce = sce, sample_id = sample_id, celltype_id = celltype_id, 
       group_id = group_id, batches = batches)
3: withCallingHandlers(expr, message = function(c) if (inherits(c, 
       classes)) tryInvokeRestart("muffleMessage"))
2: suppressMessages(get_avg_frac_exprs_abund(sce = sce, sample_id = sample_id, 
       celltype_id = celltype_id, group_id = group_id, batches = batches))
1: get_abundance_expression_info(sce = sce, sample_id = "orig.ident", 
       group_id = "group", celltype_id = "cluster", min_cells = min_cells, 
       senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network)
khain2650 commented 1 year ago

Closing since using the wrapper works for me.