grimbough / biomaRt

R package providing query functionality to BioMart instances like Ensembl
https://bioconductor.org/packages/biomaRt/
32 stars 13 forks source link

biomaRt::select throws Error in `collect()`: ! Failed to collect lazy table #89

Closed ChangqingW closed 8 months ago

ChangqingW commented 8 months ago

This snippet was working perfectly fine for me until today:

biomaRt::select(
    biomaRt::useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl"),
    keys = c("ENSMUSG00000051951", "ENSMUSG00000089699", "ENSMUSG00000102331", "ENSMUSG00000102343", "ENSMUSG00000025900"),
    columns = c("ensembl_gene_id", "chromosome_name"),
    keytype = "ensembl_gene_id"
)

Error in `collect()`:
! Failed to collect lazy table.
Caused by error in `db_collect()`:
! Arguments in `...` must be used.
✖ Problematic argument:
• ..1 = Inf
ℹ Did you misspell an argument name?
Run `rlang::last_trace()` to see where the error occurred.
> rlang::last_trace()
<error/rlang_error>
Error in `collect()`:
! Failed to collect lazy table.
Caused by error in `db_collect()`:
! Arguments in `...` must be used.
✖ Problematic argument:
• ..1 = Inf
ℹ Did you misspell an argument name?
---
Backtrace:
     ▆
  1. ├─biomaRt::select(...)
  2. ├─biomaRt::select(...)
  3. │ └─biomaRt::getBM(...)
  4. │   └─BiocFileCache::BiocFileCache(cache, ask = FALSE)
  5. │     └─BiocFileCache:::.sql_create_db(bfc)
  6. │       └─BiocFileCache:::.sql_validate_version(bfc)
  7. │         └─BiocFileCache:::.sql_schema_version(bfc)
  8. │           ├─base::tryCatch(...)
  9. │           │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
 10. │           └─tbl(src, "metadata") %>% collect(Inf)
 11. ├─dplyr::collect(., Inf)
 12. └─dbplyr:::collect.tbl_sql(., Inf)
 13.   ├─base::tryCatch(...)
 14.   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
 15.   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
 16.   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
 17.   └─dbplyr::db_collect(x$src$con, sql, n = n, warn_incomplete = warn_incomplete, ...)
--

Regarding the ℹ Did you misspell an argument name? message, I checked that columns = c("ensembl_gene_id", "chromosome_name") are present in the columns of biomaRt::useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl"):

> c("ensembl_gene_id", "chromosome_name") %in% biomaRt::columns(biomaRt::useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl"))
[1] TRUE TRUE

And that the argument names for biomaRt::select are consistent with the help page.

What am I missing here? Or is this due to API changes in ensembl service?

ChangqingW commented 8 months ago

Maybe this was due to the dbplyr update (I upgraded to 2.4.0 last night)? Somehow the Inf argument went into ...:

> debug(dbplyr::db_collect)
> biomaRt::select(
+     x = biomaRt::useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl"),
+     keys = c("ENSMUSG00000051951", "ENSMUSG00000089699", "ENSMUSG00000102331", "ENSMUSG00000102343", "ENSMUSG00000025900"),
+     columns = c("ensembl_gene_id", "chromosome_name"),
+     keytype = "ensembl_gene_id"
+ )
debugging in: db_collect(x$src$con, sql, n = n, warn_incomplete = warn_incomplete, 
    ...)
debug: {
    check_dots_used()
    UseMethod("db_collect")
}
Browse[2]> ...
Error: '...' used in an incorrect context
Browse[2]> list(...)
[[1]]
[1] Inf
@@ -119,7 +129,7 @@ collect.tbl_sql <- function(x, ..., n = Inf, warn_incomplete = TRUE, cte = FALSE

   sql <- db_sql_render(x$src$con, x, cte = cte)
   tryCatch(
-    out <- db_collect(x$src$con, sql, n = n, warn_incomplete = warn_incomplete),
+    out <- db_collect(x$src$con, sql, n = n, warn_incomplete = warn_incomplete, ...),
     error = function(cnd) {
       cli_abort("Failed to collect lazy table.", parent = cnd)
     }
@@ -109,6 +150,7 @@ db_compute.DBIConnection <- function(con,
 #' @export
 #' @rdname db-io
 db_collect <- function(con, sql, n = -1, warn_incomplete = TRUE, ...) {
+  check_dots_used()
   UseMethod("db_collect")
 }
 #' @export
> sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /stornext/System/data/tools/openBLAS/openBLAS-0.3.23-gcc-11.3.0/lib/libopenblas_haswellp-r0.3.23.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Australia/Melbourne
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scran_1.28.2                scater_1.28.0               scuttle_1.10.3              lubridate_1.9.3             forcats_1.0.0               stringr_1.5.0              
 [7] dplyr_1.1.3                 purrr_1.0.2                 readr_2.1.4                 tidyr_1.3.0                 tibble_3.2.1                tidyverse_2.0.0            
[13] ggplot2_3.4.4               SpatialExperiment_1.10.0    SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2 Biobase_2.60.0              GenomicRanges_1.52.1       
[19] GenomeInfoDb_1.36.4         IRanges_2.34.1              S4Vectors_0.38.2            BiocGenerics_0.46.0         MatrixGenerics_1.12.3       matrixStats_1.0.0          
[25] targets_1.3.2              

loaded via a namespace (and not attached):
  [1] rstudioapi_0.15.0         jsonlite_1.8.7            magrittr_2.0.3            ggbeeswarm_0.7.2          magick_2.7.5              zlibbioc_1.46.0          
  [7] vctrs_0.6.4               memoise_2.0.1             DelayedMatrixStats_1.22.6 RCurl_1.98-1.12           progress_1.2.2            htmltools_0.5.6.1        
 [13] S4Arrays_1.0.6            curl_5.1.0                BiocNeighbors_1.18.0      Rhdf5lib_1.22.1           rhdf5_2.44.0              htmlwidgets_1.6.2        
 [19] cachem_1.0.8              igraph_1.5.1              lifecycle_1.0.3           pkgconfig_2.0.3           rsvd_1.0.5                Matrix_1.6-1.1           
 [25] R6_2.5.1                  fastmap_1.1.1             GenomeInfoDbData_1.2.10   digest_0.6.33             colorspace_2.1-0          AnnotationDbi_1.62.2     
 [31] ps_1.7.5                  dqrng_0.3.1               irlba_2.3.5.1             RSQLite_2.3.1             base64url_1.4             beachmat_2.16.0          
 [37] filelock_1.0.2            fansi_1.0.5               timechange_0.2.0          httr_1.4.7                abind_1.4-5               compiler_4.3.1           
 [43] bit64_4.0.5               withr_2.5.1               backports_1.4.1           BiocParallel_1.34.2       viridis_0.6.4             DBI_1.1.3                
 [49] qs_0.25.5                 HDF5Array_1.28.1          R.utils_2.12.2            biomaRt_2.56.1            rappdirs_0.3.3            DelayedArray_0.26.7      
 [55] rjson_0.2.21              bluster_1.10.0            tools_4.3.1               vipor_0.4.5               beeswarm_0.4.0            R.oo_1.25.0              
 [61] glue_1.6.2                callr_3.7.3               rhdf5filters_1.12.1       grid_4.3.1                cluster_2.1.4             generics_0.1.3           
 [67] gtable_0.3.4              tzdb_0.4.0                R.methodsS3_1.8.2         data.table_1.14.8         RApiSerialize_0.1.2       hms_1.1.3                
 [73] xml2_1.3.5                BiocSingular_1.16.0       ScaledMatrix_1.8.1        metapod_1.8.0             stringfish_0.15.8         utf8_1.2.4               
 [79] XVector_0.40.0            ggrepel_0.9.4             pillar_1.9.0              limma_3.56.2              BiocFileCache_2.8.0       lattice_0.21-8           
 [85] bit_4.0.5                 tidyselect_1.2.0          locfit_1.5-9.8            Biostrings_2.68.1         knitr_1.44                gridExtra_2.3            
 [91] edgeR_3.42.4              xfun_0.40                 statmod_1.5.0             DropletUtils_1.20.0       visNetwork_2.1.2          stringi_1.7.12           
 [97] yaml_2.3.7                codetools_0.2-19          BiocManager_1.30.22       cli_3.6.1                 RcppParallel_5.1.7        munsell_0.5.0            
[103] processx_3.8.2            Rcpp_1.0.11               dbplyr_2.4.0              png_0.1-8                 XML_3.99-0.14             parallel_4.3.1           
[109] ellipsis_0.3.2            blob_1.2.4                prettyunits_1.2.0         sparseMatrixStats_1.12.2  bitops_1.0-7              viridisLite_0.4.2        
[115] scales_1.2.1              crayon_1.5.2              rlang_1.1.1               KEGGREST_1.40.1  
ChangqingW commented 8 months ago

nvm, seems they already merged a PR for this in BiocFileCache https://github.com/Bioconductor/BiocFileCache/pull/50/files

accelerator819 commented 8 months ago

hey, dude, have you solve this problem, I met the same error when running getBM(). What confusing me is that I can run this function with completely idnetical conmand on another computer with same version of biomaRt using the same data

ChangqingW commented 8 months ago

hey, dude, have you solve this problem, I met the same error when running getBM(). What confusing me is that I can run this function with completely idnetical conmand on another computer with same version of biomaRt using the same data

Just downgrade dbplyr and you should be fine.

devtools::install_version("dbplyr", version = "2.3.4")
# restart R

It's the latest dbplyr version breaking BiocFileCache functions.

You can revert to the latest version once the new BiocFileCache version (2.10.1) is built on bioconductor.

accelerator819 commented 8 months ago

Many thanks, my problem is solved!

grimbough commented 8 months ago

Thanks for the reports. As @ChangqingW says, this is actually an incompatibility between dbplyr and BiocFileCache, and biomaRt is an innocent bystander! The problem in BiocFileCache should already have been fixed if you update to version 2.10.1 or greate (see https://support.bioconductor.org/p/9154865/ for more details.)

ChangqingW commented 8 months ago

Resolved with BiocFileCache 2.10.1 (BiocManager::install(version = "3.18")):

> sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /stornext/System/data/tools/openBLAS/openBLAS-0.3.23-gcc-11.3.0/lib/libopenblas_haswellp-r0.3.23.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C
 [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8
 [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8
 [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C
 [9] LC_ADDRESS=C               LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C

time zone: Australia/Melbourne
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] BiocFileCache_2.10.1 dbplyr_2.4.0

loaded via a namespace (and not attached):
 [1] vctrs_0.6.4         httr_1.4.7          cli_3.6.1
 [4] rlang_1.1.1         DBI_1.1.3           generics_0.1.3
 [7] glue_1.6.2          bit_4.0.5           fansi_1.0.5
[10] filelock_1.0.2      tibble_3.2.1        fastmap_1.1.1
[13] lifecycle_1.0.3     memoise_2.0.1       BiocManager_1.30.22
[16] compiler_4.3.1      dplyr_1.1.3         RSQLite_2.3.2
[19] blob_1.2.4          pkgconfig_2.0.3     R6_2.5.1
[22] tidyselect_1.2.0    utf8_1.2.4          pillar_1.9.0
[25] curl_5.1.0          magrittr_2.0.3      tools_4.3.1
[28] bit64_4.0.5         cachem_1.0.8
> biomaRt::select(
+     biomaRt::useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl"),
+     keys = c("ENSMUSG00000051951", "ENSMUSG00000089699", "ENSMUSG00000102331", "ENSMUSG00000102343"+     columns = c("ensembl_gene_id", "chromosome_name"),
+     keytype = "ensembl_gene_id"
+ )
     ensembl_gene_id chromosome_name
1 ENSMUSG00000025900               1
2 ENSMUSG00000051951               1
3 ENSMUSG00000089699               1
4 ENSMUSG00000102331               1
5 ENSMUSG00000102343               1
>