sneumann / xcms

This is the git repository matching the Bioconductor package xcms: LC/MS and GC/MS Data Analysis
Other
184 stars 80 forks source link

adjustRtime subscript out of bounds error #366

Open danagibbon opened 5 years ago

danagibbon commented 5 years ago

Hi, I am attempting to follow the vignette using my data. I have previously, successfully run it on a smaller data set from a Shimadzu Scientific Instrument but using MatchedFilter instead of CentWave.

However, with the new data set, when I try to run adjustRtime, I get the following error:

xdata<- adjustRtime(xdata, param = ObiwarpParam())
Sample number 3 used as center sample.
Error in x[, pos[i]:ncol(x)] : subscript out of bounds

My samples were run on TripleTOF 5600. Code:

library(here)
library(xcms)
library(RColorBrewer)

datapath <- here("test-pipeline-2", "data", "msdata")
datafiles <- list.files(datapath, recursive = TRUE, full.names = TRUE,
                        pattern = paste("\\.", "mzXML", "$", sep=''))
datafiles <- datafiles[grep("pos", datafiles)]
exp_design <- read.csv(here("test-pipeline-2", "data", "experimental_design.csv"))
exp_design <- exp_design[grep("pos", exp_design$charge),]
# basename
pd <- data.frame(sample_name = sub(exp_design$file, 
                                   pattern = paste(".", "mzXML", sep=''), replacement = "", fixed = T),
                 sample_group = exp_design$group,
                 stringAsFactors = FALSE)
raw_data.2 <- readMSData(files = datafiles, pdata = new("NAnnotatedDataFrame", pd),
                         mode = "onDisk", centroided. = TRUE) 

# centwave default params
params <- CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10,
                        prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L,
                        mzdiff = -0.001, fitgauss = FALSE, noise = 0,
                        verboseColumns = FALSE, roiList = list(),
                        firstBaselineCheck = TRUE, roiScales = numeric())

xdata <- findChromPeaks(raw_data.2, params,
                        BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L)

My pd dataframe:

sample_name sample_group stringAsFactors
1   pos_MBP_4          MBP           FALSE
2   pos_MBP_5          MBP           FALSE
3   pos_MBP_6          MBP           FALSE
4   pos_RXR_1          RXR           FALSE
5   pos_RXR_2          RXR           FALSE
6   pos_RXR_3          RXR           FALSE

xdata object:

MSn experiment data ("XCMSnExp")
Object size in memory: 38.42 Mb
- - - Spectra data - - -
 MS level(s): 1 2 
 Number of spectra: 155525 
 MSn retention times: 0:0 - 19:29 minutes
- - - Processing information - - -
Data loaded [Tue Apr  2 08:53:53 2019] 
 MSnbase version: 2.8.0 
- - - Meta data  - - -
phenoData
  rowNames: 1 2 ... 6 (6 total)
  varLabels: sample_name sample_group stringAsFactors
  varMetadata: labelDescription
Loaded from:
  [1] pos_MBP_4.mzXML...  [6] pos_RXR_3.mzXML
  Use 'fileNames(.)' to see all files.
protocolData: none
featureData
  featureNames: F1.S00001 F1.S00002 ... F6.S25971 (155525 total)
  fvarLabels: fileIdx spIdx ... spectrum (30 total)
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
- - - xcms preprocessing - - -
Chromatographic peak detection:
 method: centWave 
 381 peaks identified in 6 samples.
 On average 63.5 chromatographic peaks per sample.

traceback:

27: stop(e)
26: value[[3L]](cond)
25: tryCatchOne(expr, names, parentenv, handlers[[1L]])
24: tryCatchList(expr, classes, parentenv, handlers)
23: tryCatch({
        FUN(...)
    }, error = handle_error)
22: withCallingHandlers({
        tryCatch({
            FUN(...)
        }, error = handle_error)
    }, warning = handle_warning)
21: FUN(...)
20: FUN(X[[i]], ...)
19: lapply(X, FUN_, ...)
18: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = param)
17: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = param)
16: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = BPPARAM)
15: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = BPPARAM)
14: bplapply(splitByFile(object, f = theF), function(z, bmethod, 
        bstep, bbaselevel, bbasespace, bmzrange., breturnBreaks) {
        require(xcms, quietly = TRUE)
        sps <- spectra(z, BPPARAM = SerialParam())
        mzs <- lapply(sps, mz)
        if (any(is.na(unlist(mzs, use.names = FALSE)))) {
            sps <- lapply(sps, clean, all = TRUE)
            mzs <- lapply(sps, mz)
        }
        pk_count <- lengths(mzs)
        empty_spectra <- which(pk_count == 0)
        if (length(empty_spectra)) {
            mzs <- mzs[-empty_spectra]
            sps <- sps[-empty_spectra]
        }
        vps <- lengths(mzs, use.names = FALSE)
        res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), 
            int = unlist(lapply(sps, intensity), use.names = FALSE), 
            valsPerSpect = vps, method = bmethod, step = bstep, baselevel = bbaselevel, 
            basespace = bbasespace, mzrange. = bmzrange., returnBreaks = breturnBreaks)
        if (length(empty_spectra)) 
            if (returnBreaks) 
                res$profMat <- .insertColumn(res$profMat, empty_spectra, 
                    0)
            else res <- .insertColumn(res, empty_spectra, 0)
        res
    }, bmethod = method, bstep = step, bbaselevel = baselevel, bbasespace = basespace, 
        bmzrange. = mzrange., breturnBreaks = returnBreaks)
13: bplapply(splitByFile(object, f = theF), function(z, bmethod, 
        bstep, bbaselevel, bbasespace, bmzrange., breturnBreaks) {
        require(xcms, quietly = TRUE)
        sps <- spectra(z, BPPARAM = SerialParam())
        mzs <- lapply(sps, mz)
        if (any(is.na(unlist(mzs, use.names = FALSE)))) {
            sps <- lapply(sps, clean, all = TRUE)
            mzs <- lapply(sps, mz)
        }
        pk_count <- lengths(mzs)
        empty_spectra <- which(pk_count == 0)
        if (length(empty_spectra)) {
            mzs <- mzs[-empty_spectra]
            sps <- sps[-empty_spectra]
        }
        vps <- lengths(mzs, use.names = FALSE)
        res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), 
            int = unlist(lapply(sps, intensity), use.names = FALSE), 
            valsPerSpect = vps, method = bmethod, step = bstep, baselevel = bbaselevel, 
            basespace = bbasespace, mzrange. = bmzrange., returnBreaks = breturnBreaks)
        if (length(empty_spectra)) 
            if (returnBreaks) 
                res$profMat <- .insertColumn(res$profMat, empty_spectra, 
                    0)
            else res <- .insertColumn(res, empty_spectra, 0)
        res
    }, bmethod = method, bstep = step, bbaselevel = baselevel, bbasespace = basespace, 
        bmzrange. = mzrange., breturnBreaks = returnBreaks)
12: .local(object, ...)
11: profMat(object, method = "bin", step = binSize(param), fileIndex = centerSample(param), 
        returnBreaks = TRUE)
10: profMat(object, method = "bin", step = binSize(param), fileIndex = centerSample(param), 
        returnBreaks = TRUE)
9: withCallingHandlers(expr, message = function(c) invokeRestart("muffleMessage"))
8: suppressMessages(profCtr <- profMat(object, method = "bin", step = binSize(param), 
       fileIndex = centerSample(param), returnBreaks = TRUE)[[1]])
7: .obiwarp(object_sub, param = param)
6: .local(object, param, ...)
5: adjustRtime(as(object, "OnDiskMSnExp"), param = param, msLevel = msLevel)
4: adjustRtime(as(object, "OnDiskMSnExp"), param = param, msLevel = msLevel)
3: .local(object, param, ...)
2: adjustRtime(xdata, param = ObiwarpParam())
1: adjustRtime(xdata, param = ObiwarpParam())

Here is my session info:

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-redhat-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] RColorBrewer_1.1-2  xcms_3.4.0          MSnbase_2.8.0       ProtGenerics_1.14.0 S4Vectors_0.20.0    mzR_2.16.0         
 [7] Rcpp_1.0.0          BiocParallel_1.16.0 Biobase_2.42.0      BiocGenerics_0.28.0 here_0.1           

loaded via a namespace (and not attached):
 [1] tidyselect_0.2.5       purrr_0.2.5            splines_3.5.1          lattice_0.20-35        colorspace_1.4-0      
 [6] yaml_2.2.0             survival_2.42-3        vsn_3.50.0             XML_3.98-1.16          rlang_0.3.1           
[11] pillar_1.3.1           glue_1.3.0             affy_1.60.0            bindrcpp_0.2.2         affyio_1.52.0         
[16] foreach_1.4.4          bindr_0.1.1            plyr_1.8.4             mzID_1.20.0            robustbase_0.93-3     
[21] zlibbioc_1.28.0        munsell_0.5.0          pcaMethods_1.74.0      gtable_0.2.0           codetools_0.2-15      
[26] IRanges_2.16.0         doParallel_1.0.14      MassSpecWavelet_1.48.0 preprocessCore_1.44.0  DEoptimR_1.0-8        
[31] scales_1.0.0           backports_1.1.2        BiocManager_1.30.3     limma_3.38.2           RANN_2.6              
[36] impute_1.56.0          ggplot2_3.1.0          digest_0.6.18          dplyr_0.7.7            ncdf4_1.16            
[41] grid_3.5.1             rprojroot_1.3-2        tools_3.5.1            magrittr_1.5           lazyeval_0.2.1        
[46] tibble_2.0.1           crayon_1.3.4           pkgconfig_2.0.2        Matrix_1.2-14          MASS_7.3-50           
[51] assertthat_0.2.0       rstudioapi_0.8         iterators_1.0.10       R6_2.4.0               MALDIquant_1.18       
[56] multtest_2.38.0        compiler_3.5.1        

Please let me know if you need any additional information and I apologize if I'm missing something obvious. Thank you for the help!

mheiser-md commented 5 years ago

You said you've been using matchedFilter before, is your data centroided? (It needs to be for centwave)

jorainer commented 5 years ago

Thanks @danagibbon for the very detailed report. First thing I would suggest is to update xcms to the most recent version and try again: devtools::install_github("sneumann/xcms", ref = "RELEASE_3_8")

Note that, to test obiwarp you don't need to run peak detection first as this can be performed directly on the data - and should I believe work on both centroided and profile-mode data:

xdata <- as(raw_data.2, "XCMSnExp")
xdata <- adjustRtime(xdata, param = ObiwarpParam())

If the error still persists we would have to dig into your data, as there might be something odd.

manuellam31 commented 4 years ago

Hi, I got the same error recently using adjustRtime. However, I checked the results for peak picking, and they were ok. See the traceback().

xchr <- findChromPeaks(raw_data, param = CentWaveParam(snthresh = 10,ppm = 15, noise = 0, peakwidth = c(19.2, 48.5), mzdiff= 0.0155, prefilter = c(3, 100), mzCenterFun="wMean", integrate= 1, fitgauss=FALSE))

Alignment

xchr <- adjustRtime(xchr, param =ObiwarpParam(binSize = 0.6)) Sample number 6 used as center sample. Error in x[, pos[i]:ncol(x)] : subscript out of bounds traceback() 27: stop(e) 26: value[3L] 25: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 24: tryCatchList(expr, classes, parentenv, handlers) 23: tryCatch({ FUN(...) }, error = handle_error) 22: withCallingHandlers({ tryCatch({ FUN(...) }, error = handle_error) }, warning = handlewarning) 21: FUN(...) 20: FUN(X[[i]], ...) 19: lapply(X, FUN, ...) 18: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = param) 17: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = param) 16: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = BPPARAM) 15: bplapply(X, FUN, ..., BPREDO = BPREDO, BPPARAM = BPPARAM) 14: bplapply(splitByFile(object, f = theF), function(z, bmethod, bstep, bbaselevel, bbasespace, bmzrange., breturnBreaks) { require(xcms, quietly = TRUE) sps <- spectra(z, BPPARAM = SerialParam()) mzs <- lapply(sps, mz) if (any(is.na(unlist(mzs, use.names = FALSE)))) { sps <- lapply(sps, clean, all = TRUE) mzs <- lapply(sps, mz) } pk_count <- lengths(mzs) empty_spectra <- which(pk_count == 0) if (length(empty_spectra)) { mzs <- mzs[-empty_spectra] sps <- sps[-empty_spectra] } vps <- lengths(mzs, use.names = FALSE) res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), int = unlist(lapply(sps, intensity), use.names = FALSE), valsPerSpect = vps, method = bmethod, step = bstep, baselevel = bbaselevel, basespace = bbasespace, mzrange. = bmzrange., returnBreaks = breturnBreaks) if (length(empty_spectra)) if (returnBreaks) res$profMat <- .insertColumn(res$profMat, empty_spectra, 0) else res <- .insertColumn(res, empty_spectra, 0) res }, bmethod = method, bstep = step, bbaselevel = baselevel, bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks) 13: bplapply(splitByFile(object, f = theF), function(z, bmethod, bstep, bbaselevel, bbasespace, bmzrange., breturnBreaks) { require(xcms, quietly = TRUE) sps <- spectra(z, BPPARAM = SerialParam()) mzs <- lapply(sps, mz) if (any(is.na(unlist(mzs, use.names = FALSE)))) { sps <- lapply(sps, clean, all = TRUE) mzs <- lapply(sps, mz) } pk_count <- lengths(mzs) empty_spectra <- which(pk_count == 0) if (length(empty_spectra)) { mzs <- mzs[-empty_spectra] sps <- sps[-empty_spectra] } vps <- lengths(mzs, use.names = FALSE) res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE), int = unlist(lapply(sps, intensity), use.names = FALSE), valsPerSpect = vps, method = bmethod, step = bstep, baselevel = bbaselevel, basespace = bbasespace, mzrange. = bmzrange., returnBreaks = breturnBreaks) if (length(empty_spectra)) if (returnBreaks) res$profMat <- .insertColumn(res$profMat, empty_spectra, 0) else res <- .insertColumn(res, empty_spectra, 0) res }, bmethod = method, bstep = step, bbaselevel = baselevel, bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks) 12: .local(object, ...) 11: profMat(object, method = "bin", step = binSize(param), fileIndex = centerSample(param), returnBreaks = TRUE) 10: profMat(object, method = "bin", step = binSize(param), fileIndex = centerSample(param), returnBreaks = TRUE) 9: withCallingHandlers(expr, message = function(c) invokeRestart("muffleMessage")) 8: suppressMessages(profCtr <- profMat(object, method = "bin", step = binSize(param), fileIndex = centerSample(param), returnBreaks = TRUE)[[1]]) 7: .obiwarp(object_sub, param = param) 6: .local(object, param, ...) 5: adjustRtime(as(object, "OnDiskMSnExp"), param = param, msLevel = msLevel) 4: adjustRtime(as(object, "OnDiskMSnExp"), param = param, msLevel = msLevel) 3: .local(object, param, ...) 2: adjustRtime(xchr, param = ObiwarpParam(binSize = 0.6)) 1: adjustRtime(xchr, param = ObiwarpParam(binSize = 0.6))

Tried to run with rawdata and it didn't work either.

xdata <- as(raw_data, "XCMSnExp") xdata <- adjustRtime(xdata, param = ObiwarpParam()) Sample number 6 used as center sample. Error in x[, pos[i]:ncol(x)] : subscript out of bounds

sessionInfo() R version 3.6.2 (2019-12-12) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Mojave 10.14.1 Matrix products: default BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats4 parallel stats graphics grDevices utils
[7] datasets methods base
other attached packages: [1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
[4] purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
[7] tibble_2.1.3 tidyverse_1.3.0 pheatmap_1.0.12
[10] magrittr_1.5 pander_0.6.3 faahKO_1.26.0
[13] Autotuner_1.0.0 cosmiq_1.20.0 made4_1.60.0
[16] scatterplot3d_0.3-41 gplots_3.0.1.2 RColorBrewer_1.1-2
[19] ade4_1.7-13 ggplot2_3.2.1 IPO_1.12.0
[22] CAMERA_1.42.0 rsm_2.10 xcms_3.8.1
[25] MSnbase_2.12.0 ProtGenerics_1.18.0 S4Vectors_0.24.1
[28] mzR_2.20.0 Rcpp_1.0.3 BiocParallel_1.20.1 [31] Biobase_2.46.0 BiocGenerics_0.32.0 loaded via a namespace (and not attached): [1] readxl_1.3.1 backports_1.1.5
[3] Hmisc_4.3-0 plyr_1.8.5
[5] igraph_1.2.4.2 repr_1.0.2
[7] lazyeval_0.2.2 splines_3.6.2
[9] usethis_1.5.1 digest_0.6.23
[11] foreach_1.4.7 htmltools_0.4.0
[13] gdata_2.18.0 fansi_0.4.1
[15] checkmate_1.9.4 memoise_1.1.0
[17] cluster_2.1.0 doParallel_1.0.15
[19] limma_3.42.0 remotes_2.1.0
[21] modelr_0.1.5 prettyunits_1.1.0
[23] jpeg_0.1-8.1 colorspace_1.4-1
[25] rvest_0.3.5 skimr_2.0.2
[27] haven_2.2.0 xfun_0.12
[29] callr_3.4.0 crayon_1.3.4
[31] jsonlite_1.6 graph_1.64.0
[33] zeallot_0.1.0 impute_1.60.0
[35] survival_3.1-8 iterators_1.0.12
[37] glue_1.3.1 gtable_0.3.0
[39] zlibbioc_1.32.0 pkgbuild_1.0.6
[41] DEoptimR_1.0-8 scales_1.1.0
[43] vsn_3.54.0 DBI_1.1.0
[45] htmlTable_1.13.3 foreign_0.8-74
[47] preprocessCore_1.48.0 Formula_1.2-3
[49] httr_1.4.1 htmlwidgets_1.5.1
[51] acepack_1.4.1 ellipsis_0.3.0
[53] farver_2.0.1 pkgconfig_2.0.3
[55] XML_3.98-1.20 nnet_7.3-12
[57] dbplyr_1.4.2 utf8_1.1.4
[59] labeling_0.3 tidyselect_0.2.5
[61] rlang_0.4.2 cellranger_1.1.0
[63] munsell_0.5.0 tools_3.6.2
[65] cli_2.0.1 generics_0.0.2
[67] devtools_2.2.1 broom_0.5.3
[69] mzID_1.24.0 processx_3.4.1
[71] knitr_1.26 fs_1.3.1
[73] robustbase_0.93-5 caTools_1.17.1.4
[75] RANN_2.6.1 ncdf4_1.17
[77] RBGL_1.62.1 nlme_3.1-143
[79] xml2_1.2.2 pracma_2.2.9
[81] compiler_3.6.2 rstudioapi_0.10
[83] curl_4.3 png_0.1-7
[85] testthat_2.3.1 affyio_1.56.0
[87] reprex_0.3.0 MassSpecWavelet_1.52.0 [89] stringi_1.4.5 ps_1.3.0
[91] desc_1.2.0 lattice_0.20-38
[93] Matrix_1.2-18 multtest_2.42.0
[95] vctrs_0.2.1 pillar_1.4.3
[97] lifecycle_0.1.0 BiocManager_1.30.10
[99] MALDIquant_1.19.3 data.table_1.12.8
[101] bitops_1.0-6 R6_2.4.1
[103] latticeExtra_0.6-29 pcaMethods_1.78.0
[105] affy_1.64.0 KernSmooth_2.23-16
[107] gridExtra_2.3 IRanges_2.20.1
[109] sessioninfo_1.1.1 codetools_0.2-16
[111] MASS_7.3-51.5 gtools_3.8.1
[113] assertthat_0.2.1 pkgload_1.0.2
[115] rprojroot_1.3-2 withr_2.1.2
[117] hms_0.5.3 grid_3.6.2
[119] rpart_4.1-15 lubridate_1.7.4
[121] base64enc_0.1-3

Thank you in advance :)

jorainer commented 4 years ago

first of all sorry for the late reply - been pretty busy.

obiwarp seems to be very fragile depending on the input data... could you please try to run that again after:

raw_data <- filterEmptySpectra(raw_data)

maybe some of your spectra are empty and this definitely causes errors/problems with obiwarp.

metabolon1 commented 4 years ago

I was getting the same error message as the original poster during Obiwarp alignment. Using filterEmptySpectra() on the raw data object prior to peak detection and alignment appears to have fixed the problem for me. Thanks for the suggestion!

Taylan