powellgenomicslab / scPred

scPred package for cell type prediction from scRNA-seq data
MIT License
71 stars 16 forks source link

Error in if (nrow(spmodel@features[[positiveClass]]) == 0) {: argument is of length zero #29

Closed royfrancis closed 1 month ago

royfrancis commented 1 month ago
library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#> 'SeuratObject' was built under R 4.4.0 but the current version is
#> 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
#> for R may have changed
#> 
#> Attaching package: 'SeuratObject'
#> The following objects are masked from 'package:base':
#> 
#>     intersect, t
library(SeuratData)
#> ── Installed datasets ──────────────────────────────── SeuratData v0.2.2.9001 ──
#> ✔ pbmc3k  3.1.4                         ✔ pbmcref 1.0.0
#> ────────────────────────────────────── Key ─────────────────────────────────────
#> ✔ Dataset loaded successfully
#> ❯ Dataset built with a newer version of Seurat than installed
#> ❓ Unknown version of Seurat installed
library(scPred)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

SeuratData::InstallData("pbmc3k")
#> Warning: The following packages are already installed and will not be
#> reinstalled: pbmc3k
data("pbmc3k")
ref <- UpdateSeuratObject(pbmc3k)
#> Validating object structure
#> Updating object slots
#> Ensuring keys are in the proper structure
#> Warning: Assay RNA changing from Assay to Assay
#> Ensuring keys are in the proper structure
#> Ensuring feature names don't have underscores or pipes
#> Updating slots in RNA
#> Validating object structure for Assay 'RNA'
#> Object representation is consistent with the most current Seurat version

ref <- ref %>% 
  NormalizeData() %>% 
  FindVariableFeatures() %>% 
  ScaleData() %>% 
  RunPCA() %>% 
  RunUMAP(dims = 1:20)
#> Centering and scaling data matrix
#> PC_ 1 
#> Positive:  MALAT1, LTB, IL32, CD2, ACAP1, STK17A, CTSW, CD247, CCL5, GIMAP5 
#>     AQP3, GZMA, CST7, TRAF3IP3, MAL, HOPX, ITM2A, GZMK, MYC, BEX2 
#>     GIMAP7, ETS1, LDLRAP1, ZAP70, LYAR, RIC3, TNFAIP8, KLRG1, SAMD3, NKG7 
#> Negative:  CST3, TYROBP, LST1, AIF1, FTL, FCN1, LYZ, FTH1, S100A9, FCER1G 
#>     TYMP, CFD, LGALS1, CTSS, S100A8, SERPINA1, LGALS2, SPI1, IFITM3, PSAP 
#>     CFP, SAT1, IFI30, COTL1, S100A11, NPC2, LGALS3, GSTP1, PYCARD, NCF2 
#> PC_ 2 
#> Positive:  CD79A, MS4A1, TCL1A, HLA-DQA1, HLA-DRA, HLA-DQB1, LINC00926, CD79B, HLA-DRB1, CD74 
#>     HLA-DPB1, HLA-DMA, HLA-DQA2, HLA-DRB5, HLA-DPA1, HLA-DMB, FCRLA, HVCN1, LTB, BLNK 
#>     KIAA0125, P2RX5, IRF8, IGLL5, SWAP70, ARHGAP24, SMIM14, PPP1R14A, FCRL2, C16orf74 
#> Negative:  NKG7, PRF1, CST7, GZMA, GZMB, FGFBP2, CTSW, GNLY, GZMH, SPON2 
#>     CCL4, FCGR3A, CCL5, CD247, XCL2, CLIC3, AKR1C3, SRGN, HOPX, CTSC 
#>     TTC38, S100A4, ANXA1, IL32, IGFBP7, ID2, ACTB, XCL1, APOBEC3G, SAMD3 
#> PC_ 3 
#> Positive:  HLA-DQA1, CD79A, CD79B, HLA-DQB1, HLA-DPB1, CD74, HLA-DPA1, MS4A1, HLA-DRB1, HLA-DRB5 
#>     HLA-DRA, HLA-DQA2, TCL1A, LINC00926, HLA-DMB, HLA-DMA, HVCN1, FCRLA, IRF8, BLNK 
#>     KIAA0125, SMIM14, PLD4, IGLL5, P2RX5, TMSB10, SWAP70, LAT2, MALAT1, IGJ 
#> Negative:  PPBP, PF4, SDPR, SPARC, GNG11, NRGN, GP9, RGS18, TUBB1, CLU 
#>     HIST1H2AC, AP001189.4, ITGA2B, CD9, TMEM40, CA2, PTCRA, ACRBP, MMD, TREML1 
#>     NGFRAP1, F13A1, RUFY1, SEPT5, MPP1, CMTM5, TSC22D1, MYL9, RP11-367G6.3, GP1BA 
#> PC_ 4 
#> Positive:  HLA-DQA1, CD79A, CD79B, HIST1H2AC, HLA-DQB1, PF4, MS4A1, SDPR, CD74, PPBP 
#>     HLA-DPB1, GNG11, HLA-DQA2, SPARC, HLA-DRB1, HLA-DPA1, GP9, TCL1A, HLA-DRA, LINC00926 
#>     NRGN, RGS18, HLA-DRB5, PTCRA, CD9, AP001189.4, CA2, CLU, TUBB1, ITGA2B 
#> Negative:  VIM, S100A8, S100A6, S100A4, S100A9, TMSB10, IL32, GIMAP7, LGALS2, S100A10 
#>     RBP7, FCN1, MAL, LYZ, S100A12, MS4A6A, CD2, FYB, S100A11, FOLR3 
#>     GIMAP4, AQP3, ANXA1, AIF1, MALAT1, GIMAP5, IL8, IFI6, TRABD2A, TMSB4X 
#> PC_ 5 
#> Positive:  GZMB, FGFBP2, NKG7, GNLY, PRF1, CCL4, CST7, SPON2, GZMA, CLIC3 
#>     GZMH, XCL2, CTSW, TTC38, AKR1C3, CCL5, IGFBP7, XCL1, CCL3, S100A8 
#>     TYROBP, HOPX, CD160, HAVCR2, S100A9, FCER1G, PTGDR, LGALS2, RBP7, S100A12 
#> Negative:  LTB, VIM, AQP3, PPA1, MAL, KIAA0101, CD2, CYTIP, CORO1B, FYB 
#>     IL32, TRADD, ANXA5, TUBA1B, HN1, TYMS, PTGES3, ITM2A, COTL1, GPR183 
#>     TNFAIP8, ACTG1, TRAF3IP3, ATP5C1, GIMAP4, ZWINT, PRDX1, LDLRAP1, ABRACL, NGFRAP1
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 10:54:34 UMAP embedding parameters a = 0.9922 b = 1.112
#> 10:54:34 Read 2700 rows and found 20 numeric columns
#> 10:54:34 Using Annoy for neighbor search, n_neighbors = 30
#> 10:54:34 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 10:54:34 Writing NN index file to temp file /tmp/RtmpOYws1z/filef478ab7d4c6
#> 10:54:34 Searching Annoy index using 1 thread, search_k = 3000
#> 10:54:35 Annoy recall = 100%
#> 10:54:35 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
#> 10:54:35 Initializing from normalized Laplacian + noise (using RSpectra)
#> 10:54:35 Commencing optimization for 500 epochs, with 111172 positive edges
#> 10:54:38 Optimization finished

ref <- getFeatureSpace(ref, "seurat_annotations")
#> ●  Extracting feature space for each cell type...
#> DONE!
ref <- trainModel(ref)
#> ●  Training models for each cell type...
#> Loading required package: ggplot2
#> Loading required package: lattice
#> Error in if (nrow(spmodel@features[[positiveClass]]) == 0) {: argument is of length zero

Created on 2024-08-13 with reprex v2.1.1

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.4.1 (2024-06-14) #> os Ubuntu 22.04.4 LTS #> system x86_64, linux-gnu #> ui X11 #> language (EN) #> collate C.UTF-8 #> ctype C.UTF-8 #> tz Etc/UTC #> date 2024-08-13 #> pandoc 3.2 @ /usr/bin/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> abind 1.4-5 2016-07-21 [1] RSPM (R 4.4.0) #> beeswarm 0.4.0 2021-06-01 [1] RSPM (R 4.4.0) #> caret * 6.0-94 2023-03-21 [1] RSPM (R 4.4.0) #> class 7.3-22 2023-05-03 [2] CRAN (R 4.4.1) #> cli 3.6.3 2024-06-21 [1] RSPM (R 4.4.0) #> cluster 2.1.6 2023-12-01 [2] CRAN (R 4.4.1) #> codetools 0.2-20 2024-03-31 [2] CRAN (R 4.4.1) #> colorspace 2.1-1 2024-07-26 [1] RSPM (R 4.4.0) #> cowplot 1.1.3 2024-01-22 [1] RSPM (R 4.4.0) #> crayon 1.5.3 2024-06-20 [1] RSPM (R 4.4.0) #> data.table 1.15.4 2024-03-30 [1] RSPM (R 4.4.0) #> deldir 2.0-4 2024-02-28 [1] RSPM (R 4.4.0) #> digest 0.6.36 2024-06-23 [1] RSPM (R 4.4.0) #> dotCall64 1.1-1 2023-11-28 [1] RSPM (R 4.4.0) #> dplyr * 1.1.4 2023-11-17 [1] RSPM (R 4.4.0) #> evaluate 0.24.0 2024-06-10 [1] RSPM (R 4.4.0) #> fansi 1.0.6 2023-12-08 [1] RSPM (R 4.4.0) #> fastDummies 1.7.3 2023-07-06 [1] RSPM (R 4.4.0) #> fastmap 1.2.0 2024-05-15 [1] RSPM (R 4.4.0) #> fitdistrplus 1.2-1 2024-07-12 [1] RSPM (R 4.4.0) #> foreach 1.5.2 2022-02-02 [1] RSPM (R 4.4.0) #> fs 1.6.4 2024-04-25 [1] RSPM (R 4.4.0) #> future 1.34.0 2024-07-29 [1] RSPM (R 4.4.0) #> future.apply 1.11.2 2024-03-28 [1] RSPM (R 4.4.0) #> generics 0.1.3 2022-07-05 [1] RSPM (R 4.4.0) #> ggbeeswarm 0.7.2 2023-04-29 [1] RSPM (R 4.4.0) #> ggplot2 * 3.5.1 2024-04-23 [1] RSPM (R 4.4.0) #> ggrepel 0.9.5 2024-01-10 [1] RSPM (R 4.4.0) #> ggridges 0.5.6 2024-01-23 [1] RSPM (R 4.4.0) #> globals 0.16.3 2024-03-08 [1] RSPM (R 4.4.0) #> glue 1.7.0 2024-01-09 [1] RSPM (R 4.4.0) #> goftest 1.2-3 2021-10-07 [1] RSPM (R 4.4.0) #> gower 1.0.1 2022-12-22 [1] RSPM (R 4.4.0) #> gridExtra 2.3 2017-09-09 [1] RSPM (R 4.4.0) #> gtable 0.3.5 2024-04-22 [1] RSPM (R 4.4.0) #> hardhat 1.4.0 2024-06-02 [1] RSPM (R 4.4.0) #> harmony 1.2.0 2024-08-08 [1] Github (immunogenomics/harmony@f054b03) #> htmltools 0.5.8.1 2024-04-04 [1] RSPM (R 4.4.0) #> htmlwidgets 1.6.4 2023-12-06 [1] RSPM (R 4.4.0) #> httpuv 1.6.15 2024-03-26 [1] RSPM (R 4.4.0) #> httr 1.4.7 2023-08-15 [1] RSPM (R 4.4.0) #> ica 1.0-3 2022-07-08 [1] RSPM (R 4.4.0) #> igraph 2.0.3 2024-03-13 [1] RSPM (R 4.4.0) #> ipred 0.9-15 2024-07-18 [1] RSPM (R 4.4.0) #> irlba 2.3.5.1 2022-10-03 [1] RSPM (R 4.4.0) #> iterators 1.0.14 2022-02-05 [1] RSPM (R 4.4.0) #> jsonlite 1.8.8 2023-12-04 [1] RSPM (R 4.4.0) #> kernlab 0.9-32 2023-01-31 [1] RSPM (R 4.4.0) #> KernSmooth 2.23-24 2024-05-17 [2] CRAN (R 4.4.1) #> knitr 1.48 2024-07-07 [1] RSPM (R 4.4.0) #> later 1.3.2 2023-12-06 [1] RSPM (R 4.4.0) #> lattice * 0.22-6 2024-03-20 [2] CRAN (R 4.4.1) #> lava 1.8.0 2024-03-05 [1] RSPM (R 4.4.0) #> lazyeval 0.2.2 2019-03-15 [1] RSPM (R 4.4.0) #> leiden 0.4.3.1 2023-11-17 [1] RSPM (R 4.4.0) #> lifecycle 1.0.4 2023-11-07 [1] RSPM (R 4.4.0) #> listenv 0.9.1 2024-01-29 [1] RSPM (R 4.4.0) #> lmtest 0.9-40 2022-03-21 [1] RSPM (R 4.4.0) #> lubridate 1.9.3 2023-09-27 [1] RSPM (R 4.4.0) #> magrittr 2.0.3 2022-03-30 [1] RSPM (R 4.4.0) #> MASS 7.3-61 2024-06-13 [2] RSPM (R 4.4.0) #> Matrix 1.7-0 2024-04-26 [2] CRAN (R 4.4.1) #> matrixStats 1.3.0 2024-04-11 [1] RSPM (R 4.4.0) #> mime 0.12 2021-09-28 [1] RSPM (R 4.4.0) #> miniUI 0.1.1.1 2018-05-18 [1] RSPM (R 4.4.0) #> ModelMetrics 1.2.2.2 2020-03-17 [1] RSPM (R 4.4.0) #> munsell 0.5.1 2024-04-01 [1] RSPM (R 4.4.0) #> nlme 3.1-165 2024-06-06 [2] RSPM (R 4.4.0) #> nnet 7.3-19 2023-05-03 [2] CRAN (R 4.4.1) #> parallelly 1.38.0 2024-07-27 [1] RSPM (R 4.4.0) #> patchwork 1.2.0 2024-01-08 [1] RSPM (R 4.4.0) #> pbapply 1.7-2 2023-06-27 [1] RSPM (R 4.4.0) #> pbmc3k.SeuratData * 3.1.4 2024-08-13 [1] local #> pbmcref.SeuratData * 1.0.0 2024-08-13 [1] local #> pillar 1.9.0 2023-03-22 [1] RSPM (R 4.4.0) #> pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.4.0) #> plotly 4.10.4 2024-01-13 [1] RSPM (R 4.4.0) #> plyr 1.8.9 2023-10-02 [1] RSPM (R 4.4.0) #> png 0.1-8 2022-11-29 [1] RSPM (R 4.4.0) #> polyclip 1.10-7 2024-07-23 [1] RSPM (R 4.4.0) #> pROC 1.18.5 2023-11-01 [1] RSPM (R 4.4.0) #> prodlim 2024.06.25 2024-06-24 [1] RSPM (R 4.4.0) #> progressr 0.14.0 2023-08-10 [1] RSPM (R 4.4.0) #> promises 1.3.0 2024-04-05 [1] RSPM (R 4.4.0) #> purrr 1.0.2 2023-08-10 [1] RSPM (R 4.4.0) #> R6 2.5.1 2021-08-19 [1] RSPM (R 4.4.0) #> RANN 2.6.1 2019-01-08 [1] RSPM (R 4.4.0) #> rappdirs 0.3.3 2021-01-31 [1] RSPM (R 4.4.0) #> RColorBrewer 1.1-3 2022-04-03 [1] RSPM (R 4.4.0) #> Rcpp 1.0.13 2024-07-17 [1] RSPM (R 4.4.0) #> RcppAnnoy 0.0.22 2024-01-23 [1] RSPM (R 4.4.0) #> RcppHNSW 0.6.0 2024-02-04 [1] RSPM (R 4.4.0) #> recipes 1.1.0 2024-07-04 [1] RSPM (R 4.4.0) #> reprex 2.1.1 2024-07-06 [1] RSPM (R 4.4.0) #> reshape2 1.4.4 2020-04-09 [1] RSPM (R 4.4.0) #> reticulate 1.38.0 2024-06-19 [1] RSPM (R 4.4.0) #> rlang 1.1.4 2024-06-04 [1] RSPM (R 4.4.0) #> rmarkdown 2.27 2024-05-17 [1] RSPM #> ROCR 1.0-11 2020-05-02 [1] RSPM (R 4.4.0) #> rpart 4.1.23 2023-12-05 [2] CRAN (R 4.4.1) #> RSpectra 0.16-2 2024-07-18 [1] RSPM (R 4.4.0) #> rstudioapi 0.16.0 2024-03-24 [1] RSPM (R 4.4.0) #> Rtsne 0.17 2023-12-07 [1] RSPM (R 4.4.0) #> scales 1.3.0 2023-11-28 [1] RSPM (R 4.4.0) #> scattermore 1.2 2023-06-12 [1] RSPM (R 4.4.0) #> scPred * 1.9.2 2024-08-08 [1] Github (powellgenomicslab/scPred@af5492e) #> sctransform 0.4.1 2023-10-19 [1] RSPM (R 4.4.0) #> sessioninfo 1.2.2 2021-12-06 [1] RSPM (R 4.4.0) #> Seurat * 5.1.0 2024-05-10 [1] RSPM (R 4.4.0) #> SeuratData * 0.2.2.9001 2024-08-08 [1] Github (satijalab/seurat-data@4dc08e0) #> SeuratObject * 5.0.2 2024-05-08 [1] RSPM (R 4.4.0) #> shiny 1.9.1 2024-08-01 [1] RSPM (R 4.4.0) #> sp * 2.1-4 2024-04-30 [1] RSPM (R 4.4.0) #> spam 2.10-0 2023-10-23 [1] RSPM (R 4.4.0) #> spatstat.data 3.1-2 2024-06-21 [1] RSPM (R 4.4.0) #> spatstat.explore 3.3-1 2024-07-15 [1] RSPM (R 4.4.0) #> spatstat.geom 3.3-2 2024-07-15 [1] RSPM (R 4.4.0) #> spatstat.random 3.3-1 2024-07-15 [1] RSPM (R 4.4.0) #> spatstat.sparse 3.1-0 2024-06-21 [1] RSPM (R 4.4.0) #> spatstat.univar 3.0-0 2024-06-28 [1] RSPM (R 4.4.0) #> spatstat.utils 3.0-5 2024-06-17 [1] RSPM (R 4.4.0) #> stringi 1.8.4 2024-05-06 [1] RSPM (R 4.4.0) #> stringr 1.5.1 2023-11-14 [1] RSPM (R 4.4.0) #> survival 3.7-0 2024-06-05 [2] RSPM (R 4.4.0) #> tensor 1.5 2012-05-05 [1] RSPM (R 4.4.0) #> tibble 3.2.1 2023-03-20 [1] RSPM (R 4.4.0) #> tidyr 1.3.1 2024-01-24 [1] RSPM (R 4.4.0) #> tidyselect 1.2.1 2024-03-11 [1] RSPM (R 4.4.0) #> timechange 0.3.0 2024-01-18 [1] RSPM (R 4.4.0) #> timeDate 4032.109 2023-12-14 [1] RSPM (R 4.4.0) #> utf8 1.2.4 2023-10-22 [1] RSPM (R 4.4.0) #> uwot 0.2.2 2024-04-21 [1] RSPM (R 4.4.0) #> vctrs 0.6.5 2023-12-01 [1] RSPM (R 4.4.0) #> vipor 0.4.7 2023-12-18 [1] RSPM (R 4.4.0) #> viridisLite 0.4.2 2023-05-02 [1] RSPM (R 4.4.0) #> withr 3.0.1 2024-07-31 [1] RSPM (R 4.4.0) #> xfun 0.46 2024-07-18 [1] RSPM (R 4.4.0) #> xtable 1.8-4 2019-04-21 [1] RSPM (R 4.4.0) #> yaml 2.3.10 2024-07-26 [1] RSPM (R 4.4.0) #> zoo 1.8-12 2023-04-13 [1] RSPM (R 4.4.0) #> #> [1] /usr/local/lib/R/site-library #> [2] /usr/local/lib/R/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
royfrancis commented 1 month ago

Turns out this is due to the presence of NAs in the cell type column "seurat_annotations". Removing cells with missing annotations resolves the issue.

ref <- subset(ref,subset=seurat_annotations %in% c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet"))