Closed 12231366ostun closed 9 months ago
Hi, I have also come across this error - it's a result of the seurat v5 updates ("counts" are now stored in "layers"). My current workaround is to edit some of the DoubletFinder functions. Redefining the functions by running the below worked for me (lines that I changed are marked by a comment):
paramSweep_v3 <- function (seu, PCs = 1:10, sct = FALSE, num.cores = 1)
{
require(Seurat)
require(fields)
pK <- c(5e-04, 0.001, 0.005, seq(0.01, 0.3, by = 0.01))
pN <- seq(0.05, 0.3, by = 0.05)
min.cells <- round(nrow(seu@meta.data)/(1 - 0.05) - nrow(seu@meta.data))
pK.test <- round(pK * min.cells)
pK <- pK[which(pK.test >= 1)]
orig.commands <- seu@commands
if (nrow(seu@meta.data) > 10000) {
real.cells <- rownames(seu@meta.data)[sample(1:nrow(seu@meta.data),
10000, replace = FALSE)]
data <- seu@assays$RNA@layers$counts ## M
colnames(data) <- colnames(seu) ## M
rownames(data) <- rownames(seu) ## M
data <- data[ ,real.cells] ## M
n.real.cells <- ncol(data)
}
if (nrow(seu@meta.data) <= 10000) {
real.cells <- rownames(seu@meta.data)
data <- seu@assays$RNA@layers$counts ## M
colnames(data) <- colnames(seu) ## M
rownames(data) <- rownames(seu) ## M
n.real.cells <- ncol(data)
}
if (num.cores > 1) {
require(parallel)
cl <- makeCluster(num.cores)
output2 <- mclapply(as.list(1:length(pN)), FUN = parallel_paramSweep_v3,
n.real.cells, real.cells, pK, pN, data, orig.commands,
PCs, sct, mc.cores = num.cores)
stopCluster(cl)
}
else {
output2 <- lapply(as.list(1:length(pN)), FUN = parallel_paramSweep_v3,
n.real.cells, real.cells, pK, pN, data, orig.commands,
PCs, sct)
}
sweep.res.list <- list()
list.ind <- 0
for (i in 1:length(output2)) {
for (j in 1:length(output2[[i]])) {
list.ind <- list.ind + 1
sweep.res.list[[list.ind]] <- output2[[i]][[j]]
}
}
name.vec <- NULL
for (j in 1:length(pN)) {
name.vec <- c(name.vec, paste("pN", pN[j], "pK", pK,
sep = "_"))
}
names(sweep.res.list) <- name.vec
return(sweep.res.list)
}
parallel_paramSweep_v3 <- function (n, n.real.cells, real.cells, pK, pN, data, orig.commands,
PCs, sct)
{
sweep.res.list = list()
list.ind = 0
print(paste("Creating artificial doublets for pN = ", pN[n] *
100, "%", sep = ""))
n_doublets <- round(n.real.cells/(1 - pN[n]) - n.real.cells)
real.cells1 <- sample(real.cells, n_doublets, replace = TRUE)
real.cells2 <- sample(real.cells, n_doublets, replace = TRUE)
doublets <- (data[, real.cells1] + data[, real.cells2])/2
colnames(doublets) <- paste("X", 1:n_doublets, sep = "")
data_wdoublets <- cbind(data, doublets)
if (sct == FALSE) {
print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)
print("Normalizing Seurat object...")
seu_wdoublets <- NormalizeData(seu_wdoublets, normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method,
scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor,
margin = orig.commands$NormalizeData.RNA@params$margin)
print("Finding variable genes...")
seu_wdoublets <- FindVariableFeatures(seu_wdoublets,
selection.method = orig.commands$FindVariableFeatures.RNA$selection.method,
loess.span = orig.commands$FindVariableFeatures.RNA$loess.span,
clip.max = orig.commands$FindVariableFeatures.RNA$clip.max,
mean.function = orig.commands$FindVariableFeatures.RNA$mean.function,
dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function,
num.bin = orig.commands$FindVariableFeatures.RNA$num.bin,
binning.method = orig.commands$FindVariableFeatures.RNA$binning.method,
nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures,
mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff,
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff)
print("Scaling data...")
seu_wdoublets <- ScaleData(seu_wdoublets, features = orig.commands$ScaleData.RNA$features,
model.use = orig.commands$ScaleData.RNA$model.use,
do.scale = orig.commands$ScaleData.RNA$do.scale,
do.center = orig.commands$ScaleData.RNA$do.center,
scale.max = orig.commands$ScaleData.RNA$scale.max,
block.size = orig.commands$ScaleData.RNA$block.size,
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block)
print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, features = orig.commands$ScaleData.RNA$features,
npcs = length(PCs), rev.pca = orig.commands$RunPCA.RNA$rev.pca,
weight.by.var = orig.commands$RunPCA.RNA$weight.by.var,
verbose = FALSE)
}
if (sct == TRUE) {
require(sctransform)
print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)
print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets)
print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs))
}
print("Calculating PC distance matrix...")
nCells <- nrow(seu_wdoublets@meta.data)
pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[,
PCs]
rm(seu_wdoublets)
gc()
dist.mat <- fields::rdist(pca.coord)[, 1:n.real.cells]
print("Defining neighborhoods...")
for (i in 1:n.real.cells) {
dist.mat[, i] <- order(dist.mat[, i])
}
ind <- round(nCells * max(pK)) + 5
dist.mat <- dist.mat[1:ind, ]
print("Computing pANN across all pK...")
for (k in 1:length(pK)) {
print(paste("pK = ", pK[k], "...", sep = ""))
pk.temp <- round(nCells * pK[k])
pANN <- as.data.frame(matrix(0L, nrow = n.real.cells,
ncol = 1))
colnames(pANN) <- "pANN"
rownames(pANN) <- real.cells
list.ind <- list.ind + 1
for (i in 1:n.real.cells) {
neighbors <- dist.mat[2:(pk.temp + 1), i]
pANN$pANN[i] <- length(which(neighbors > n.real.cells))/pk.temp
}
sweep.res.list[[list.ind]] <- pANN
}
return(sweep.res.list)
}
doubletFinder_v3 <- function (seu, PCs, pN = 0.25, pK, nExp, reuse.pANN = FALSE,
sct = FALSE, annotations = NULL)
{
require(Seurat)
require(fields)
require(KernSmooth)
if (reuse.pANN != FALSE) {
pANN.old <- seu@meta.data[, reuse.pANN]
classifications <- rep("Singlet", length(pANN.old))
classifications[order(pANN.old, decreasing = TRUE)[1:nExp]] <- "Doublet"
seu@meta.data[, paste("DF.classifications", pN, pK, nExp,
sep = "_")] <- classifications
return(seu)
}
if (reuse.pANN == FALSE) {
real.cells <- rownames(seu@meta.data)
data <- seu@assays$RNA@layers$counts ## M
colnames(data) <- colnames(seu) ## M
rownames(data) <- rownames(seu) ## M
data <- data[, real.cells] ## M
n_real.cells <- length(real.cells)
n_doublets <- round(n_real.cells/(1 - pN) - n_real.cells)
print(paste("Creating", n_doublets, "artificial doublets...",
sep = " "))
real.cells1 <- sample(real.cells, n_doublets, replace = TRUE)
real.cells2 <- sample(real.cells, n_doublets, replace = TRUE)
doublets <- (data[, real.cells1] + data[, real.cells2])/2
colnames(doublets) <- paste("X", 1:n_doublets, sep = "")
data_wdoublets <- cbind(data, doublets)
if (!is.null(annotations)) {
stopifnot(typeof(annotations) == "character")
stopifnot(length(annotations) == length(Cells(seu)))
stopifnot(!any(is.na(annotations)))
annotations <- factor(annotations)
names(annotations) <- Cells(seu)
doublet_types1 <- annotations[real.cells1]
doublet_types2 <- annotations[real.cells2]
}
orig.commands <- seu@commands
if (sct == FALSE) {
print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)
print("Normalizing Seurat object...")
seu_wdoublets <- NormalizeData(seu_wdoublets, normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method,
scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor,
margin = orig.commands$NormalizeData.RNA@params$margin)
print("Finding variable genes...")
seu_wdoublets <- FindVariableFeatures(seu_wdoublets,
selection.method = orig.commands$FindVariableFeatures.RNA$selection.method,
loess.span = orig.commands$FindVariableFeatures.RNA$loess.span,
clip.max = orig.commands$FindVariableFeatures.RNA$clip.max,
mean.function = orig.commands$FindVariableFeatures.RNA$mean.function,
dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function,
num.bin = orig.commands$FindVariableFeatures.RNA$num.bin,
binning.method = orig.commands$FindVariableFeatures.RNA$binning.method,
nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures,
mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff,
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff)
print("Scaling data...")
seu_wdoublets <- ScaleData(seu_wdoublets, features = orig.commands$ScaleData.RNA$features,
model.use = orig.commands$ScaleData.RNA$model.use,
do.scale = orig.commands$ScaleData.RNA$do.scale,
do.center = orig.commands$ScaleData.RNA$do.center,
scale.max = orig.commands$ScaleData.RNA$scale.max,
block.size = orig.commands$ScaleData.RNA$block.size,
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block)
print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, features = orig.commands$ScaleData.RNA$features,
npcs = length(PCs), rev.pca = orig.commands$RunPCA.RNA$rev.pca,
weight.by.var = orig.commands$RunPCA.RNA$weight.by.var,
verbose = FALSE)
pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[,
PCs]
cell.names <- rownames(seu_wdoublets@meta.data)
nCells <- length(cell.names)
rm(seu_wdoublets)
gc()
}
if (sct == TRUE) {
require(sctransform)
print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)
print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets)
print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs))
pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[,
PCs]
cell.names <- rownames(seu_wdoublets@meta.data)
nCells <- length(cell.names)
rm(seu_wdoublets)
gc()
}
print("Calculating PC distance matrix...")
dist.mat <- fields::rdist(pca.coord)
print("Computing pANN...")
pANN <- as.data.frame(matrix(0L, nrow = n_real.cells,
ncol = 1))
if (!is.null(annotations)) {
neighbor_types <- as.data.frame(matrix(0L, nrow = n_real.cells,
ncol = length(levels(doublet_types1))))
}
rownames(pANN) <- real.cells
colnames(pANN) <- "pANN"
k <- round(nCells * pK)
for (i in 1:n_real.cells) {
neighbors <- order(dist.mat[, i])
neighbors <- neighbors[2:(k + 1)]
pANN$pANN[i] <- length(which(neighbors > n_real.cells))/k
if (!is.null(annotations)) {
for (ct in unique(annotations)) {
neighbors_that_are_doublets = neighbors[neighbors >
n_real.cells]
if (length(neighbors_that_are_doublets) > 0) {
neighbor_types[i, ] <- table(doublet_types1[neighbors_that_are_doublets -
n_real.cells]) + table(doublet_types2[neighbors_that_are_doublets -
n_real.cells])
neighbor_types[i, ] <- neighbor_types[i,
]/sum(neighbor_types[i, ])
}
else {
neighbor_types[i, ] <- NA
}
}
}
}
print("Classifying doublets..")
classifications <- rep("Singlet", n_real.cells)
classifications[order(pANN$pANN[1:n_real.cells], decreasing = TRUE)[1:nExp]] <- "Doublet"
seu@meta.data[, paste("pANN", pN, pK, nExp, sep = "_")] <- pANN[rownames(seu@meta.data),
1]
seu@meta.data[, paste("DF.classifications", pN, pK, nExp,
sep = "_")] <- classifications
if (!is.null(annotations)) {
colnames(neighbor_types) = levels(doublet_types1)
for (ct in levels(doublet_types1)) {
seu@meta.data[, paste("DF.doublet.contributors",
pN, pK, nExp, ct, sep = "_")] <- neighbor_types[,
ct]
}
}
return(seu)
}
}
@mfranke-2 Thank you!!!!!!! I followed your redefined functions, and everything is working perfectly now. Thank you so much for the help and fast reply~
@mfranke-2 Thanks for tracking this down and providing the updated functions. Much appreciated!
Hello, sorry for bother,
when I run
I encounter the following error:
and the package versions of Seurat and doubletfinder are :
Do you have any insights on how I could address this issue?
Thank you so much.