14% of cells in each of 5 sample are decided as doublets

It's interesting that 14% of cells in each of 5 sample are decided as doublets. Is it due to some coincidence or due to settings in DoubletFinder? Or may be some of my code is wrong. Could you please tell me?

The cell numbers before and after subset (by "percent.mt <= Percent.mt", "DF_hi.lo == "Singlet"", or both) is listed below:

	Original	Only after DF	Percentof...	Only after mt%	Percentof...	After DF & mt%	Percentof...
1	11478	9833	86%	8009	70%	6666	58%
2	10416	8916	86%	9332	90%	7939	76%
3	11481	9839	86%	11174	97%	9558	83%
4	13464	11526	86%	12199	91%	10357	77%
5	12810	10962	86%	11800	92%	10082	79%

the original code is:

library(Seurat)
library(DoubletFinder)
Seurat.list <- c('GTE001', 'GTE002', 'GTE003', 'GTE009', 'GTE012')
for (i in 1:length(Seurat.list)) {
  Filename.brief <- paste('20191122_', Seurat.list[i], sep = '')
  data_dir <- paste('D:/Cell Ranger Processed Data/', Seurat.list[i], '/filtered_feature_bc_matrix', sep = '')
  Seurat.data <- Read10X(data.dir = data_dir)
  dense.size <- object.size(x = as.matrix(x = Seurat.data))
  Seurat.object <- CreateSeuratObject(counts = Seurat.data, project = Seurat.list[i], min.cells = 5)
  Seurat.object[["percent.mt"]] <- PercentageFeatureSet(Seurat.object, pattern = "^MT-")
  Seurat.object <- NormalizeData(Seurat.object, verbose = FALSE)
  Seurat.object <- FindVariableFeatures(Seurat.object, selection.method = "vst", nfeatures = 2000)
  Seurat.object <- ScaleData(Seurat.object, verbose = FALSE)
  Seurat.object <- RunPCA(Seurat.object, npcs = 30, verbose = FALSE, resolution = 0.1)
  Seurat.object <- RunTSNE(Seurat.object, dims = 1:10, resolution = 0.1)
  Seurat.object <- FindNeighbors(Seurat.object, reduction = "tsne", dims = 1:2)
  Seurat.object <- FindClusters(Seurat.object, resolution = 0.1)
  #Doublet Finder
  sweep.res.list_Seurat <- paramSweep_v3(Seurat.object, PCs = 1:10, sct=FALSE)
  sweep.stats_Seurat <- summarizeSweep(sweep.res.list_Seurat, GT = FALSE)
  bcmvn_Seurat <- find.pK(sweep.stats_Seurat)
  mpK<-as.numeric(as.vector(bcmvn_Seurat$pK[which.max(bcmvn_Seurat$BCmetric)]))
  annotations <- Seurat.object@meta.data$seurat_clusters
  homotypic.prop <- modelHomotypic(annotations)           ## ex: annotations <- seu_Seurat@meta.data$ClusteringResults
  nExp_poi <- round(0.075*length(Seurat.object$seurat_clusters))  ## Assuming 7.5% doublet formation rate - tailor for your dataset
  nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
  Seurat.object <- doubletFinder_v3(Seurat.object, PCs = 1:20, pN = 0.25, pK = mpK, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
  Seurat.object <- doubletFinder_v3(Seurat.object, PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN =colnames(Seurat.object@meta.data[1,])[13], sct = FALSE)
  Seurat.object@meta.data[,'DF_hi.lo'] <- "Singlet"
  Code_of_Temp <- paste("Seurat.object@meta.data$DF_hi.lo[which(Seurat.object@meta.data$", colnames(as.matrix(Seurat.object@meta.data[1,]))[13], " == 'Doublet')] <- 'Doublet_lo'", sep = '')
  eval(parse(text = Code_of_Temp))
  Code_of_Temp <- paste("Seurat.object@meta.data$DF_hi.lo[which(Seurat.object@meta.data$", colnames(as.matrix(Seurat.object@meta.data[1,]))[14], " == 'Doublet')] <- 'Doublet_hi'", sep = '')
  eval(parse(text = Code_of_Temp))
  #Subset: MT%<=12% & ==singlet
  Percent.mt <- 12
  Seurat.object_temp <- subset(Seurat.object, subset = percent.mt <= Percent.mt & DF_hi.lo == "Singlet" )
}

chris-mcginnis-ucsf / DoubletFinder

14% of cells in each of 5 sample are decided as doublets #58