smorabit / hdWGCNA

High dimensional weighted gene co-expression network analysis
https://smorabit.github.io/hdWGCNA/
Other
316 stars 31 forks source link

Error with function MetacellsByGroups (Error in `*tmp*`[[wgcna_name]]) #62

Closed abhisheksinghnl closed 1 year ago

abhisheksinghnl commented 1 year ago

Hey,

First of all thank you for this amazing tool. I worked with the example dataset and it worked well. Then I tried it with my own dataset and it failed at step of MetacellsByGroup.

Here are the steps I took.

# single-cell analysis package
library(Seurat)
library(SingleR)
library(celldex)
# plotting and data science packages
library(tidyverse)
library(cowplot)
library(patchwork)
library(igraph)
library(SingleCellExperiment)
library(ggpubr)

# co-expression network analysis packages:
library(WGCNA)
library(hdWGCNA)

#Read in data
data.10x = list()
data.10x[[1]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653855/")
data.10x[[2]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653856/")
data.10x[[3]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653857/")
data.10x[[4]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653858/")
data.10x[[5]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653859/")
data.10x[[6]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653865/")
data.10x[[7]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653866/")
data.10x[[8]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653867/")
data.10x[[9]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653868/")
data.10x[[10]] <- Read10X(data.dir = "C:/Users/lab/Downloads/GSE153760_RAW/GSM4653869/")

samples = c("GSM4653855","GSM4653856","GSM4653857","GSM4653858","GSM4653859","GSM4653865","GSM4653866","GSM4653867","GSM4653868","GSM4653869")

scrna.list = list()
for (i in 1:length(data.10x)) {
  scrna.list[[i]] = CreateSeuratObject(counts = data.10x[[i]], min.cells=10, min.features=200, project=samples[i]);
}
rm(data.10x)

scrna <- merge(x=scrna.list[[1]], y=c(scrna.list[[2]],scrna.list[[3]], scrna.list[[4]], scrna.list[[5]], scrna.list[[6]],
                                      scrna.list[[7]], scrna.list[[8]], scrna.list[[9]], scrna.list[[10]] ),
               add.cell.ids =  c("GSM4653855","GSM4653856","GSM4653857","GSM4653858","GSM4653859",
                                 "GSM4653865","GSM4653866","GSM4653867","GSM4653868",
                                 "GSM4653869"), project="skin2")

metadata.D <- scrna@meta.data
# Add cell IDs to metadata
metadata.D$cells <- rownames(metadata.D)
# Create sample column
metadata.D$sample <- NA
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653855_"))] <- "AD"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653856_"))] <- "AD"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653857_"))] <- "AD"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653858_"))] <- "AD"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653859_"))] <- "AD"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653865_"))] <- "Healthy"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653866_"))] <- "Healthy"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653867_"))] <- "Healthy"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653868_"))] <- "Healthy"
metadata.D$sample[which(str_detect(metadata.D$cells, "^GSM4653869_"))] <- "Healthy"
# Add metadata back to Seurat object
scrna@meta.data <- metadata.D
names(scrna@meta.data)[5] <-"Condition"
head(scrna@meta.data)

scrna <- PercentageFeatureSet(scrna, "^MT-", col.name = "percent_mito")
#scrna <- PercentageFeatureSet(scrna, "^RP[SL]", col.name = "percent_ribo")

feats <- c("nFeature_RNA", "nCount_RNA")
VlnPlot(scrna, group.by = "orig.ident", features = feats, pt.size = 0.1, ncol = 3) +
  NoLegend()
FeatureScatter(scrna, "nCount_RNA", "nFeature_RNA", group.by = "orig.ident", pt.size = 0.5)

scrna <- NormalizeData(scrna, normalization.method = "LogNormalize", scale.factor = 10000)

scrna <- FindVariableFeatures(scrna, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(scrna), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(scrna)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2

scrna <- subset(scrna, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
all.genes <- rownames(scrna)
scrna<- ScaleData(scrna, features = all.genes)

scrna<- RunPCA(scrna, features = VariableFeatures(object = scrna))
print(scrna[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(scrna, dims = 1:2, reduction = "pca")

DimPlot(scrna, reduction = "pca")
DimHeatmap(scrna, dims = 1, cells = 500, balanced = TRUE)

scrna <- JackStraw(scrna, num.replicate = 100)
scrna <- ScoreJackStraw(scrna, dims = 1:20)
JackStrawPlot(scrna, dims = 1:15)

ElbowPlot(scrna)

scrna <- FindNeighbors(scrna, dims = 1:10)
scrna <- FindClusters(scrna, resolution = 0.5)

scrna <- RunUMAP(scrna, dims = 1:10)
DimPlot(scrna, reduction = "umap", label = T, split.by = "Condition")

pbmc.markers <- FindAllMarkers(scrna, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
pbmc.markers %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)->top5
DoHeatmap(scrna, features = top5$gene) + NoLegend()

## Cluster Annotation
## upload the reference data 
ref <- HumanPrimaryCellAtlasData()

## get the counts matrix from the object
counts.obj <- GetAssayData(scrna, slot = "counts", assay  = "RNA")

## find the common markers with the reference data and only use those
common <- intersect(rownames(ref), rownames(counts.obj))
counts.obj.singler <- counts.obj[common,]
ref <- ref[common,]

### Create SingleR Predictions
counts.obj.singler.sce <- SingleCellExperiment(assays = list(counts = counts.obj.singler), 
                                               colData = DataFrame(clusters=scrna$RNA_snn_res.0.5))
counts.obj.singler.sce <- logNormCounts(counts.obj.singler.sce)

singler.pred <- SingleR(test = counts.obj.singler.sce@assays@data@listData$logcounts, 
                        ref = ref, 
                        labels = ref$label.main, 
                        clusters = counts.obj.singler.sce$clusters)

singler.results <- data.frame(cell = rownames(singler.pred), singler = singler.pred$labels)

colnames(singler.results) <- c("Seurat_Cluster", "SingleR_Celltype")

# save old idents for reference
scrna[["old.ident"]] <- Idents(object = scrna)

## rename clusters with cell type
celltypes <- singler.results[ ,2]
names(celltypes) <- singler.results[ ,1]
scrna <- RenameIdents(scrna, celltypes)

scrna<-AddMetaData(scrna, scrna@active.ident,col.name = "CellType")

## Visualise as umap
DimPlot(scrna, label = T, split.by = "Condition", group.by = "CellType") + ggtitle("SingleR results")

#### WGCNA ####
p <- DimPlot(scrna, group.by=c('CellType','Sample'), label=TRUE) +
  umap_theme() + ggtitle('Skin') + NoLegend()
p

seurat_obj <- SetupForWGCNA(
  scrna,
  gene_select = "fraction", # the gene selection approach
  fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
  wgcna_name = "tutorial" # the name of the hdWGCNA experiment
)

Look at the metadata

                            orig.ident nCount_RNA nFeature_RNA                         cells Condition     Sample percent_mito
GSM4653855_AAACCTGAGAAGGTTT-1 GSM4653855       7869         1711 GSM4653855_AAACCTGAGAAGGTTT-1        AD GSM4653855     4.003050
GSM4653855_AAACCTGAGGCCCTTG-1 GSM4653855       6182         1238 GSM4653855_AAACCTGAGGCCCTTG-1        AD GSM4653855     2.717567
GSM4653855_AAACCTGAGGGAAACA-1 GSM4653855        727          308 GSM4653855_AAACCTGAGGGAAACA-1        AD GSM4653855    33.975241
GSM4653855_AAACCTGCAACACCCG-1 GSM4653855       7326         1687 GSM4653855_AAACCTGCAACACCCG-1        AD GSM4653855     2.047502
GSM4653855_AAACCTGGTCGAATCT-1 GSM4653855       6016         1422 GSM4653855_AAACCTGGTCGAATCT-1        AD GSM4653855     3.241356
GSM4653855_AAACCTGTCGTGGGAA-1 GSM4653855       5300         1380 GSM4653855_AAACCTGTCGTGGGAA-1        AD GSM4653855     2.905660
                              RNA_snn_res.0.5 seurat_clusters old.ident         CellType
GSM4653855_AAACCTGAGAAGGTTT-1               5               5         5 Epithelial_cells
GSM4653855_AAACCTGAGGCCCTTG-1               2               2         2    Keratinocytes
GSM4653855_AAACCTGAGGGAAACA-1               7               7         7 Epithelial_cells
GSM4653855_AAACCTGCAACACCCG-1               2               2         2    Keratinocytes
GSM4653855_AAACCTGGTCGAATCT-1               2               2         2    Keratinocytes
GSM4653855_AAACCTGTCGTGGGAA-1               2               2         2    Keratinocytes
# construct metacells  in each group
seurat_obj <- MetacellsByGroups(
  scrna,
  group.by = c("CellType","Sample"), # specify the columns in seurat_obj@meta.data to group by
  k = 25, # nearest-neighbors parameter
  max_shared = 10, # maximum number of shared cells between two metacells
  ident.group = 'CellType',
  reduction = "umap"# set the Idents of the metacell seurat object
)

Error that I get

Error in `*tmp*`[[wgcna_name]] : 
  attempt to select less than one element in get1index
In addition: Warning message:
In MetacellsByGroups(scrna, group.by = c("CellType", "Sample"),  :
  Removing the following groups that did not meet min_cells: DC#GSM4653855, DC#GSM4653865, DC#GSM4653867, Endothelial_cells#GSM4653859, Epithelial_cells#GSM4653856, Epithelial_cells#GSM4653857, Epithelial_cells#GSM4653858, Epithelial_cells#GSM4653859, Epithelial_cells#GSM4653867, Fibroblasts#GSM4653859, Keratinocytes#GSM4653858, Keratinocytes#GSM4653867, Monocyte#GSM4653857, Monocyte#GSM4653859, Monocyte#GSM4653868, Monocyte#GSM4653869, Neurons#GSM4653857, Neurons#GSM4653858, Neurons#GSM4653859, Neurons#GSM4653867, Neurons#GSM4653868, Neurons#GSM4653869, T_cells#GSM4653855, Tissue_stem_cells#GSM4653859

I am not sure what I am doing wrong, could you please help me in fixing this error.

Thank you for your time

smorabit commented 1 year ago

Hi,

What version of hdWGCNA are you running? Also wondering if you able to run MetacellsByGroups successfully with different parameters for group.by? At this time unfortunately I am not able to reproduce this error on my datasets.

smorabit commented 1 year ago

Closing due to inactivity but feel free to re-open.