smorabit / hdWGCNA

High dimensional weighted gene co-expression network analysis
https://smorabit.github.io/hdWGCNA/
Other
315 stars 31 forks source link

Tweaking the number of modules #252

Open Thapeachydude opened 1 month ago

Thapeachydude commented 1 month ago

Hello,

I was wondering if there is a "good" / "correct" way for tweaking the number of discovered modules? I.e. I'm running a dataset of 300k cells, but I only end up with 5 modules, of which 3 are found in an individual cell type. The majority of cells (> 80%) share a single module.

I suspect a lot of the transcriptonal heterogeity is still "hidden", hence is there a parameter to tweak to allow for more modules? (instead of subsetting to the 80% or so with a single module and re-running?).

Best

smorabit commented 1 month ago

Hi,

If you can paste your code here then I may be able to provide some advice for what you could change.

Thapeachydude commented 3 weeks ago

Sure. It's quite long, so if there is anything specific you want to see let me know. Also happy to provide plots.

## Input is a seurat object created from an SCE class object. 

## Create seurat object
sce.to.seurat <- CreateSeuratObject(counts = count_mat.raw, assay = "RNA") # create seurat object

## Assign normalized counts
sce.to.seurat[["RNA"]]@data <- count_mat

## Pass variable genes
VariableFeatures(sce.to.seurat) <- hvgenes

## Pass dimred
sce.to.seurat[["PCA"]] <- CreateDimReducObject(embeddings = as.matrix(reducedDim(sce.x, "PCA")),
                                                   key = "PCA", assay = "RNA")

## Pass metadata
sce.to.seurat@meta.data$Treatment <- sce.x$Treatment
sce.to.seurat@meta.data$patientID <- sce.x$patientID
sce.to.seurat@meta.data$Batch <- sce.x$Batch
sce.to.seurat@meta.data$celltype <- sce.x$celltype

## Pass grouping for hdWGCNA
sce.to.seurat@meta.data$groupingVar <- paste( sce.to.seurat@meta.data$patientID,  sce.to.seurat@meta.data$Treatment,  sce.to.seurat@meta.data$celltype, sep = "_")

 message("Setting up the hdwgcna object")
 set.seed(100)
 sce.to.seurat <- SetupForWGCNA(sce.to.seurat, gene_select = "variable", # the gene selection approach
                                 wgcna_name = "hdwgcna")

## Create meta cells
opt$metaover <- 10
opt$k <- 25
opt$nmetacells <- 1000

set.seed(100)
sce.to.seurat <- MetacellsByGroups(seurat_obj = sce.to.seurat, group.by = "groupingVar", # by patientID, celltype, treatment
                                       reduction = "PCA", # select the dimensionality reduction to perform KNN on
                                       k = opt$k, # nearest-neighbors parameter
                                       max_shared = opt$metaover, # maximum number of shared cells between two metacells
                                       ident.group = "groupingVar", # set the Idents of the metacell seurat object
                                       wgcna_name = "hdwgcna", mode = "average", min_cells = 100, 
                                       slot = "counts", verbose = FALSE, target_metacells = opt$nmetacells, 
                                       max_iter = 5000
                                       )

sce.to.seurat <- NormalizeMetacells(sce.to.seurat)

sce.to.seurat <- SetDatExpr(seurat_obj = sce.to.seurat, 
group_name = sce.to.seurat@misc[["hdwgcna"]]$wgcna_params$metacell_stats$groupingVar, 
                                group.by = "groupingVar", 
                                assay = "RNA", # using RNA assay
                                slot = "data", # using normalized data
                                use_metacells = TRUE, 
                                wgcna_name = "hdwgcna"
    )

##  
# For datasets > 100'000 cells I have another step to subset the metacells before TestSoftPowers to reduce the computational burden - not shown here. 
##

## Test different soft powers
message("Testing for soft powers")
set.seed(100)
sce.to.seurat <- TestSoftPowers(sce.to.seurat, networkType = "signed", # you can also use "unsigned" or "signed hybrid"
                                  powers = c(seq(1, 10, by = 1), seq(12, 30, by = 2)
                                  ))

## Part 2 - After looking at results of softpower analysis
soft.pw <- 10 # determined after looking at the plots above
sce.to.seurat <- ConstructNetwork(sce.to.seurat, soft_power = soft.pw, setDatExpr = FALSE, overwrite_tom = TRUE, 
tom_outdir = "hdwgcna/TOM", tom_name = tom.name, wgcna_name = "hdwgcna")

## compute all MEs in the full single-cell dataset
opt$harmonize <- FALSE
set.seed(100)
sce.to.seurat <- ModuleEigengenes(sce.to.seurat, verbose = TRUE, harmonized = opt$harmonize) 

## compute eigengene-based connectivity (kME)
message("Computing kMEs")
set.seed(100)
sce.to.seurat <- ModuleConnectivity(sce.to.seurat, group.by = "groupingVar", 
                                      group_name = unique(sce.to.seurat@meta.data$groupingVar),
                                      sparse = TRUE, harmonized = opt$harmonize)

## More code to assign output back to the original SCE