IanevskiAleksandr / sc-type

GNU General Public License v3.0
223 stars 44 forks source link

Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...): 'x' must be an array with at least two dimensions #17

Closed Tommaso12 closed 1 year ago

Tommaso12 commented 1 year ago

Hello, I have a problem with the rowSums function that I do not quite understand due to my es.max object is of class "matrix" "array". Could you maybe spot a mistake in my code below? thank you a lot for your help! https://server1.mohrkeg.co.at/rstudio/file_show?path=%2Fsrv%2Fhome%2Ftgastaldi%2FRProjects%2FMelanoma_immunotherapy_resistance_scRNA_2018%2Fscripts%2Fanalysis.html

IanevskiAleksandr commented 1 year ago

Hello,

The code is not accessible. Could you please share it here?

BR, Aleksandr

Tommaso12 commented 1 year ago

Ah Im sorry. Here the code:

knitr::opts_chunk$set(message = FALSE, warning = FALSE, error=TRUE)
library(dplyr)
## 
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
library(patchwork)
library(HGNChelper)
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")
wd <- params$wd
results_dir = paste0(wd,"/results/")
plots_dir = paste0(wd,"/plots/")
data_dir =  paste0(wd,"/data/")
#re-build seurat obejct

d <- readRDS(paste0(data_dir,"Mel.all.data.QC.rds"))

cd <- d$cd

metadata <- data.frame(cohort = d$cohort, cellType = d$cell.type, treated = d$treated)

rownames(metadata) <- c(colnames(d$cd))

ImmRes <- CreateSeuratObject(counts = d$cd, meta.data = metadata, min.cells = 3, min.features = 200)

saveRDS(ImmRes, paste0(results_dir, "ImmRes.rds"))
#load ImmRes.rds

ImmRes <- readRDS( file = paste0(results_dir, "ImmRes.rds"))

ImmRes <- NormalizeData(ImmRes, normalization.method = "LogNormalize", scale.factor = 10000)

ImmRes <- FindVariableFeatures(ImmRes, selection.method = "vst", nfeatures = 2000)

top10 <- head(VariableFeatures(ImmRes), 10)

plot1 <- VariableFeaturePlot(ImmRes)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2

all.genes <- rownames(ImmRes)
ImmRes <- ScaleData(ImmRes, features = all.genes)

ImmRes <- RunPCA(ImmRes, features = VariableFeatures(object = ImmRes))
print(ImmRes[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(ImmRes, dims = 1:2, reduction = "pca")
DimPlot(ImmRes, reduction = "pca")
DimHeatmap(ImmRes, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(ImmRes, dims = 1:15, cells = 500, balanced = TRUE)

# NOTE: This process can take a long time for big datasets, comment out for expediency. More
# approximate techniques such as those implemented in ElbowPlot() can be used to reduce
# computation time
ImmRes <- JackStraw(ImmRes, num.replicate = 100)
ImmRes <- ScoreJackStraw(ImmRes, dims = 1:20)
JackStrawPlot(ImmRes, dims = 1:15)
ElbowPlot(ImmRes)

ImmRes <- FindNeighbors(ImmRes, dims = 1:10)
ImmRes <- FindClusters(ImmRes, resolution = 1.2)

head(Idents(ImmRes), 5)

ImmRes <- RunUMAP(ImmRes, dims = 1:24)

DimPlot(ImmRes, reduction = "umap", label = TRUE)

# ImmRes.markers <- FindAllMarkers(ImmRes, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
# ImmRes.markers %>%
#     group_by(cluster) %>%
#     slice_max(n = 2, order_by = avg_log2FC)

## create a new RNA matrix for non malignant cells

ImmRes.no.mal <- subset(ImmRes, subset = cellType != "Mal")

saveRDS(ImmRes.no.mal, paste0(results_dir, "ImmRes.no.mal.rds"))
#load ImmRes.no.mal
ImmRes.no.mal <- readRDS(paste0(results_dir, "ImmRes.no.mal.rds"))

# DB file
db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx";
tissue = "Immune system" 

# prepare gene sets
gs_list = gene_sets_prepare(db_, tissue)

# assign cell types
es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

# View results, cell-type by cell matrix. See the complete example below
#View(es.max)

# merge by cluster
cL_resutls = do.call("rbind", lapply(unique(ImmRes.no.mal@meta.data$seurat_clusters), function(cl){
    es.max.cl = sort(rowSums(es.max[ ,rownames(ImmRes.no.mal@meta.data[ImmRes.no.mal@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
    head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(ImmRes.no.mal@meta.data$seurat_clusters==cl)), 10)
}))
## Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...): 'x' muss ein Array mit mindestens zwei Dimensionen sein
sctype_scores = cL_resutls %>% group_by(cluster) %>% top_n(n = 1, wt = scores) 
## Error in group_by(., cluster): Objekt 'cL_resutls' nicht gefunden
# set low-confident (low ScType score) clusters to "unknown"
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
## Error in sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < : Objekt 'sctype_scores' nicht gefunden
print(sctype_scores[,1:3])
## Error in print(sctype_scores[, 1:3]): Objekt 'sctype_scores' nicht gefunden

Thank you a lot!

scas224 commented 1 year ago

Hi, I am receiving the same error in a similar situation. Did you ever figure out how to solve it?

Tommaso12 commented 1 year ago

Hi @scas224 , I think there was a problem in the ImmRes.no.mal object I used and in the metadata. This was a subset object from the original I had ImmRes below. If I use the full object then it works perfectly without any error. I can't really say what was the problem behind, probably the subsetting I did before.

cL_results = do.call("rbind", lapply(unique(ImmRes@meta.data$seurat_clusters), function(cl){
     es.max.cl = sort(rowSums(es.max[ ,rownames(ImmRes@meta.data[ImmRes@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
     head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(ImmRes@meta.data$seurat_clusters==cl)), 10)
 }))
 sctype_scores = cL_results %>% group_by(cluster) %>% top_n(n = 1, wt = scores)
IanevskiAleksandr commented 1 year ago

Hi @Tommaso12 @scas224 ,

After subsetting the object, the "scaled matrix" is not preserved. You should rescale again after subsetting the data, see: https://github.com/satijalab/seurat/issues/2365

So you should either rescale the object and run as you did:

es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

or, just use the non-scaled matrix and tell ScType that the matrix is not scaled:

es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@data, scaled = FALSE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

BR, Aleksandr

Tommaso12 commented 1 year ago

@IanevskiAleksandr thank you a lot for the tip! 💪

scas224 commented 1 year ago

@IanevskiAleksandr I am also using subsetted data and did not get the error when using the full object, but I ran SCTransform to re-scale the subset and am still receiving the same error. Is there anything else that could be causing the error?

pbmc_merged_blood <- SCTransform(pbmc_merged_blood, vars.to.regress = "percent.mt", verbose = FALSE)

lapply(c("dplyr","Seurat","HGNChelper"), library, character.only = T)

source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R"); source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")

gs_list = gene_sets_prepare("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_short.xlsx", "Immune system") # e.g. Immune system, Liver, Pancreas, Kidney, Eye, Brain

scRNAseqData = pbmc_merged_blood@assays$SCT@scale.data; #load example scRNA-seq matrix es.max = sctype_score(scRNAseqData = scRNAseqData, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

cL_resutls = do.call("rbind", lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl){ es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0) head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 10) })) Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...) : 'x' must be an array of at least two dimensions 8. stop("'x' must be an array of at least two dimensions") 7. base::rowSums(x, na.rm = na.rm, dims = dims, ...) 6. rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]) 5. rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]) 4. sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) 3. FUN(X[[i]], ...) 2. lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl) { es.max.cl = sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ... 1. do.call("rbind", lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl) { es.max.cl = sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) ...

IanevskiAleksandr commented 1 year ago

@scas224

Could you print the dimenstions of: scRNAseqData, and es.max: print(dim(scRNAseqData)) print(scRNAseqData[1:5,1:5])

print(dim(es.max)) print(es.max[1:5,1:5])

scas224 commented 1 year ago

@IanevskiAleksandr

print(dim(scRNAseqData)) [1] 3000 8622 print(scRNAseqData[1:5,1:5]) AgedBlood_AAACCTGAGCCACTAT-1 AgedBlood_AAACCTGAGTGAACAT-1 AgedBlood_AAACCTGCACAAGTAA-1 AgedBlood_AAACCTGCACACCGCA-1 AgedBlood_AAACCTGCAGGTGGAT-1 Rb1cc1 -0.512388691 0.76187090 -0.55451559 -0.79083055 -0.619545468 Ncoa2 -0.566647978 -0.90339855 -0.62537123 -0.88991383 -0.872240373 Paqr8 -0.118482909 -0.18687755 -0.13043082 -0.18274169 -0.182223221 Gsta3 -0.003515415 -0.01481537 -0.00563955 -0.01856632 -0.007405477 Gm28836 -0.028314969 -0.03860338 -0.03046086 -0.04548837 -0.025974417

print(dim(es.max)) [1] 34 8622 print(es.max[1:5,1:5]) AgedBlood_AAACCTGAGCCACTAT-1 AgedBlood_AAACCTGAGTGAACAT-1 AgedBlood_AAACCTGCACAAGTAA-1 AgedBlood_AAACCTGCACACCGCA-1 Pro-B cells 2.18074437 -0.5086423 -0.5353237 -0.4448562 Pre-B cells -0.41884539 -0.3104448 -0.3644087 -0.1968483 Immature B cells -0.42348711 -0.3403791 -0.3762546 -0.2374019 Naive B cells -0.09069297 -0.5033071 -0.1178560 -0.6674868 Memory B cells -0.09069297 -0.5033071 -0.1178560 -0.6674868 AgedBlood_AAACCTGCAGGTGGAT-1 Pro-B cells -0.4322325 Pre-B cells -0.3036421 Immature B cells -0.3277322 Naive B cells -0.2773368 Memory B cells -0.2773368

IanevskiAleksandr commented 1 year ago

@scas224 strange, what is the output of unique(pbmc_merged_blood@meta.data$seurat_clusters)

if looks fine then:

for(cl %in% unique(pbmc_merged_blood@meta.data$seurat_clusters)){ print(cl); es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0); print(head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 3)) }

scas224 commented 1 year ago

@IanevskiAleksandr

unique(pbmc_merged_blood@meta.data$seurat_clusters) [1] 5 1 0 3 20 2 4 11 16 12 15 8 21 18 22 10 14 Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

I noticed that some clusters do not have any cells now that I subsetted it, would removing those solve the problem? If so, what would be the best way to do that?

IanevskiAleksandr commented 1 year ago

@scas224 more likely, run this to see at which cluster it fails:

for(cl %in% unique(pbmc_merged_blood@meta.data$seurat_clusters)){ print(cl); es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0); print(head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 3)) }