Closed Tommaso12 closed 1 year ago
Hello,
The code is not accessible. Could you please share it here?
BR, Aleksandr
Ah Im sorry. Here the code:
knitr::opts_chunk$set(message = FALSE, warning = FALSE, error=TRUE)
library(dplyr)
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
library(patchwork)
library(HGNChelper)
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")
wd <- params$wd
results_dir = paste0(wd,"/results/")
plots_dir = paste0(wd,"/plots/")
data_dir = paste0(wd,"/data/")
#re-build seurat obejct
d <- readRDS(paste0(data_dir,"Mel.all.data.QC.rds"))
cd <- d$cd
metadata <- data.frame(cohort = d$cohort, cellType = d$cell.type, treated = d$treated)
rownames(metadata) <- c(colnames(d$cd))
ImmRes <- CreateSeuratObject(counts = d$cd, meta.data = metadata, min.cells = 3, min.features = 200)
saveRDS(ImmRes, paste0(results_dir, "ImmRes.rds"))
#load ImmRes.rds
ImmRes <- readRDS( file = paste0(results_dir, "ImmRes.rds"))
ImmRes <- NormalizeData(ImmRes, normalization.method = "LogNormalize", scale.factor = 10000)
ImmRes <- FindVariableFeatures(ImmRes, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(ImmRes), 10)
plot1 <- VariableFeaturePlot(ImmRes)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
all.genes <- rownames(ImmRes)
ImmRes <- ScaleData(ImmRes, features = all.genes)
ImmRes <- RunPCA(ImmRes, features = VariableFeatures(object = ImmRes))
print(ImmRes[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(ImmRes, dims = 1:2, reduction = "pca")
DimPlot(ImmRes, reduction = "pca")
DimHeatmap(ImmRes, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(ImmRes, dims = 1:15, cells = 500, balanced = TRUE)
# NOTE: This process can take a long time for big datasets, comment out for expediency. More
# approximate techniques such as those implemented in ElbowPlot() can be used to reduce
# computation time
ImmRes <- JackStraw(ImmRes, num.replicate = 100)
ImmRes <- ScoreJackStraw(ImmRes, dims = 1:20)
JackStrawPlot(ImmRes, dims = 1:15)
ElbowPlot(ImmRes)
ImmRes <- FindNeighbors(ImmRes, dims = 1:10)
ImmRes <- FindClusters(ImmRes, resolution = 1.2)
head(Idents(ImmRes), 5)
ImmRes <- RunUMAP(ImmRes, dims = 1:24)
DimPlot(ImmRes, reduction = "umap", label = TRUE)
# ImmRes.markers <- FindAllMarkers(ImmRes, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
# ImmRes.markers %>%
# group_by(cluster) %>%
# slice_max(n = 2, order_by = avg_log2FC)
## create a new RNA matrix for non malignant cells
ImmRes.no.mal <- subset(ImmRes, subset = cellType != "Mal")
saveRDS(ImmRes.no.mal, paste0(results_dir, "ImmRes.no.mal.rds"))
#load ImmRes.no.mal
ImmRes.no.mal <- readRDS(paste0(results_dir, "ImmRes.no.mal.rds"))
# DB file
db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx";
tissue = "Immune system"
# prepare gene sets
gs_list = gene_sets_prepare(db_, tissue)
# assign cell types
es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
# View results, cell-type by cell matrix. See the complete example below
#View(es.max)
# merge by cluster
cL_resutls = do.call("rbind", lapply(unique(ImmRes.no.mal@meta.data$seurat_clusters), function(cl){
es.max.cl = sort(rowSums(es.max[ ,rownames(ImmRes.no.mal@meta.data[ImmRes.no.mal@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(ImmRes.no.mal@meta.data$seurat_clusters==cl)), 10)
}))
## Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...): 'x' muss ein Array mit mindestens zwei Dimensionen sein
sctype_scores = cL_resutls %>% group_by(cluster) %>% top_n(n = 1, wt = scores)
## Error in group_by(., cluster): Objekt 'cL_resutls' nicht gefunden
# set low-confident (low ScType score) clusters to "unknown"
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
## Error in sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < : Objekt 'sctype_scores' nicht gefunden
print(sctype_scores[,1:3])
## Error in print(sctype_scores[, 1:3]): Objekt 'sctype_scores' nicht gefunden
Thank you a lot!
Hi, I am receiving the same error in a similar situation. Did you ever figure out how to solve it?
Hi @scas224 , I think there was a problem in the ImmRes.no.mal
object I used and in the metadata. This was a subset object from the original I had ImmRes
below. If I use the full object then it works perfectly without any error. I can't really say what was the problem behind, probably the subsetting I did before.
cL_results = do.call("rbind", lapply(unique(ImmRes@meta.data$seurat_clusters), function(cl){
es.max.cl = sort(rowSums(es.max[ ,rownames(ImmRes@meta.data[ImmRes@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(ImmRes@meta.data$seurat_clusters==cl)), 10)
}))
sctype_scores = cL_results %>% group_by(cluster) %>% top_n(n = 1, wt = scores)
Hi @Tommaso12 @scas224 ,
After subsetting the object, the "scaled matrix" is not preserved. You should rescale again after subsetting the data, see: https://github.com/satijalab/seurat/issues/2365
So you should either rescale the object and run as you did:
es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
or, just use the non-scaled matrix and tell ScType that the matrix is not scaled:
es.max = sctype_score(scRNAseqData = ImmRes.no.mal[["RNA"]]@data, scaled = FALSE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
BR, Aleksandr
@IanevskiAleksandr thank you a lot for the tip! 💪
@IanevskiAleksandr I am also using subsetted data and did not get the error when using the full object, but I ran SCTransform to re-scale the subset and am still receiving the same error. Is there anything else that could be causing the error?
pbmc_merged_blood <- SCTransform(pbmc_merged_blood, vars.to.regress = "percent.mt", verbose = FALSE)
lapply(c("dplyr","Seurat","HGNChelper"), library, character.only = T)
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R"); source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")
gs_list = gene_sets_prepare("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_short.xlsx", "Immune system") # e.g. Immune system, Liver, Pancreas, Kidney, Eye, Brain
scRNAseqData = pbmc_merged_blood@assays$SCT@scale.data; #load example scRNA-seq matrix es.max = sctype_score(scRNAseqData = scRNAseqData, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
cL_resutls = do.call("rbind", lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl){ es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0) head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 10) })) Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...) : 'x' must be an array of at least two dimensions 8. stop("'x' must be an array of at least two dimensions") 7. base::rowSums(x, na.rm = na.rm, dims = dims, ...) 6. rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]) 5. rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]) 4. sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) 3. FUN(X[[i]], ...) 2. lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl) { es.max.cl = sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ... 1. do.call("rbind", lapply(unique(pbmc_merged_blood@meta.data$seurat_clusters), function(cl) { es.max.cl = sort(rowSums(es.max[, rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters == cl, ])]), decreasing = !0) ...
@scas224
Could you print the dimenstions of: scRNAseqData, and es.max:
print(dim(scRNAseqData))
print(scRNAseqData[1:5,1:5])
print(dim(es.max))
print(es.max[1:5,1:5])
@IanevskiAleksandr
print(dim(scRNAseqData)) [1] 3000 8622 print(scRNAseqData[1:5,1:5]) AgedBlood_AAACCTGAGCCACTAT-1 AgedBlood_AAACCTGAGTGAACAT-1 AgedBlood_AAACCTGCACAAGTAA-1 AgedBlood_AAACCTGCACACCGCA-1 AgedBlood_AAACCTGCAGGTGGAT-1 Rb1cc1 -0.512388691 0.76187090 -0.55451559 -0.79083055 -0.619545468 Ncoa2 -0.566647978 -0.90339855 -0.62537123 -0.88991383 -0.872240373 Paqr8 -0.118482909 -0.18687755 -0.13043082 -0.18274169 -0.182223221 Gsta3 -0.003515415 -0.01481537 -0.00563955 -0.01856632 -0.007405477 Gm28836 -0.028314969 -0.03860338 -0.03046086 -0.04548837 -0.025974417
print(dim(es.max)) [1] 34 8622 print(es.max[1:5,1:5]) AgedBlood_AAACCTGAGCCACTAT-1 AgedBlood_AAACCTGAGTGAACAT-1 AgedBlood_AAACCTGCACAAGTAA-1 AgedBlood_AAACCTGCACACCGCA-1 Pro-B cells 2.18074437 -0.5086423 -0.5353237 -0.4448562 Pre-B cells -0.41884539 -0.3104448 -0.3644087 -0.1968483 Immature B cells -0.42348711 -0.3403791 -0.3762546 -0.2374019 Naive B cells -0.09069297 -0.5033071 -0.1178560 -0.6674868 Memory B cells -0.09069297 -0.5033071 -0.1178560 -0.6674868 AgedBlood_AAACCTGCAGGTGGAT-1 Pro-B cells -0.4322325 Pre-B cells -0.3036421 Immature B cells -0.3277322 Naive B cells -0.2773368 Memory B cells -0.2773368
@scas224 strange, what is the output of
unique(pbmc_merged_blood@meta.data$seurat_clusters)
if looks fine then:
for(cl %in% unique(pbmc_merged_blood@meta.data$seurat_clusters)){ print(cl); es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0); print(head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 3)) }
@IanevskiAleksandr
unique(pbmc_merged_blood@meta.data$seurat_clusters) [1] 5 1 0 3 20 2 4 11 16 12 15 8 21 18 22 10 14 Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
I noticed that some clusters do not have any cells now that I subsetted it, would removing those solve the problem? If so, what would be the best way to do that?
@scas224 more likely, run this to see at which cluster it fails:
for(cl %in% unique(pbmc_merged_blood@meta.data$seurat_clusters)){ print(cl); es.max.cl = sort(rowSums(es.max[ ,rownames(pbmc_merged_blood@meta.data[pbmc_merged_blood@meta.data$seurat_clusters==cl, ])]), decreasing = !0); print(head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(pbmc_merged_blood@meta.data$seurat_clusters==cl)), 3)) }
Hello, I have a problem with the rowSums function that I do not quite understand due to my
es.max
object is ofclass "matrix" "array"
. Could you maybe spot a mistake in my code below? thank you a lot for your help! https://server1.mohrkeg.co.at/rstudio/file_show?path=%2Fsrv%2Fhome%2Ftgastaldi%2FRProjects%2FMelanoma_immunotherapy_resistance_scRNA_2018%2Fscripts%2Fanalysis.html