RGLab / MAST

Tools and methods for analysis of single cell assay data in R
224 stars 57 forks source link

Error: grouping factors must have > 1 sampled level #187

Open andrewsong777 opened 6 months ago

andrewsong777 commented 6 months ago

I am trying to write code, but I am getting this error. I have attached a csv of my data as well.

find_de_MAST_RE <- function(adata_){
    # create a MAST object
    sca <- SceToSingleCellAssay(adata_, class = "SingleCellAssay")
    print("Dimensions before subsetting:")
    print(dim(sca))
    print("")
    # keep genes that are expressed in more than 10% of all cells
    sca <- sca[freq(sca)>0.1,]
    print("Dimensions after subsetting:")
    print(dim(sca))
    print("")
    # add a column to the data which contains scaled number of genes that are expressed in each cell
    cdr2 <- colSums(assay(sca)>0)
    colData(sca)$ngeneson <- scale(cdr2)
    # store the columns that we are interested in as factors
    label <- factor(colData(sca)$condition)
    # set the reference level
    label <- relevel(label,"control")
    colData(sca)$label <- label
    #celltype <- factor(colData(sca)$auto_manual_merged_annotation)
    #colData(sca)$celltype <- celltype
    # same for donors (which we need to model random effects)
    replicate <- factor(colData(sca)$source)
    colData(sca)$replicate <- replicate
    # create a group per condition-celltype combination
    colData(sca)$group <- paste0(colData(adata_)$condition, ".", colData(adata_)$auto_manual_merged_annotation)
    colData(sca)$group <- factor(colData(sca)$group)

    #selected_cols <- c("group", "ngeneson", "replicate")
    #selected_data <- colData(sca)[, selected_cols]
    #print(selected_data)
    #write.csv(selected_data, file = "/project/data/nb24/Control_Dataset_Analysis/test.csv", row.names = FALSE)

    print(levels(colData(sca)$replicate))
    #print(table(colData(sca)$group, colData(sca)$ngeneson))

    # define and fit the model
    zlmCond <- zlm(formula =~ ngeneson + group + (1 | replicate), 
                   sca=sca, 
                   method='glmer', 
                   ebayes=F, 
                   strictConvergence=F,
                   fitArgsD=list(nAGQ = 0)) # to speed up calculations
    print("done")

    # perform likelihood-ratio test for the condition that we are interested in    
    summaryCond <- summary(zlmCond, doLRT='neuroblastoma.Bridge')
    # get the table with log-fold changes and p-values
    summaryDt <- summaryCond$datatable
    result <- merge(summaryDt[contrast=='neuroblastoma.Bridge' & component=='H',.(primerid, `Pr(>Chisq)`)], # p-values
                     summaryDt[contrast=='neuroblastoma.Bridge' & component=='logFC', .(primerid, coef)],
                     by='primerid') # logFC coefficients
    # MAST uses natural logarithm so we convert the coefficients to log2 base to be comparable to edgeR
    result[,coef:=result[,coef]/log(2)]
    # do multiple testing correction
    result[,FDR:=p.adjust(`Pr(>Chisq)`, 'fdr')]
    result = result[result$FDR<0.01,, drop=F]

    result <- stats::na.omit(as.data.frame(result))
    return(result)
}

test.csv

My error message: Error: grouping factors must have > 1 sampled level