darwin-eu / CDMConnector

A pipe friendly way to interact with an OMOP Common Data Model
https://darwin-eu.github.io/CDMConnector/
Apache License 2.0
12 stars 9 forks source link

Error when running the example in the vignette Working with cohorts #4

Closed mderidder95 closed 1 year ago

mderidder95 commented 1 year ago

Running the following code:

library(CDMConnector)
library(dplyr)
library(Capr)

downloadEunomiaData(
  pathToData = here::here(), # change to the location you want to save the data
  overwrite = TRUE
)
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- CDMConnector::cdm_from_con(
  con = con,
  cdm_schema = "main",
  write_schema = "main"
)
path <- file.path(getwd(), "cohorts")
dir.create(path)

gibleed_cohort_definition <- cohort(
  entry = condition(cs(descendants(192671))),
  attrition = attrition(
    "no RA" = withAll(
      exactly(0,
              condition(cs(descendants(80809))),
              duringInterval(eventStarts(-Inf, Inf))))
  )
)
writeCohort(gibleed_cohort_definition, file.path(path, "gibleed.json"))

gibleed_cohort_set <- readCohortSet(path = path)

cdm <- generateCohortSet(
  cdm,
  gibleed_cohort_set,
  name = "gibleed",
  computeAttrition = TRUE
)

results in message: Error inmap(): ℹ In index: 15. Caused by error: ! rapi_execute: Failed to run query Error: Conversion Error: extract specifier "" not recognized Runrlang::last_trace()to see where the error occurred.

ablack3 commented 1 year ago

Thanks for reporting this!

ablack3 commented 1 year ago
library(CDMConnector)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(Capr)

# downloadEunomiaData(
#   pathToData = here::here(), # change to the location you want to save the data
#   overwrite = TRUE
# )
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- CDMConnector::cdm_from_con(
  con = con,
  cdm_schema = "main",
  write_schema = "main"
)
path <- file.path(getwd(), "cohorts")
dir.create(path)

gibleed_cohort_definition <- cohort(
  entry = condition(cs(descendants(192671))),
  attrition = attrition(
    "no RA" = withAll(
      exactly(0,
              condition(cs(descendants(80809))),
              duringInterval(eventStarts(-Inf, Inf))))
  )
)
writeCohort(gibleed_cohort_definition, file.path(path, "gibleed.json"))

gibleed_cohort_set <- readCohortSet(path = path)

cdm <- generateCohortSet(
  cdm,
  gibleed_cohort_set,
  name = "gibleed",
  computeAttrition = TRUE
)
#> Generating cohorts ■                                0/1
#> Generating cohorts ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■  1/1
#> 

cdm$gibleed
#> # Source:   table<main.gibleed> [?? x 4]
#> # Database: DuckDB 0.6.1 [root@Darwin 21.6.0:R 4.2.2//var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T//RtmpVmjXCC/zsukesie]
#>    cohort_definition_id subject_id cohort_start_date cohort_end_date
#>                   <int>      <dbl> <date>            <date>         
#>  1                    1         35 1997-07-25        2018-12-25     
#>  2                    1         80 1974-10-27        2019-04-15     
#>  3                    1         99 2000-03-11        2019-04-27     
#>  4                    1        115 2001-04-15        2019-05-05     
#>  5                    1        116 1970-03-12        2006-07-16     
#>  6                    1        133 2019-04-05        2019-04-06     
#>  7                    1        160 2006-02-19        2019-04-11     
#>  8                    1        163 2010-04-25        2018-07-10     
#>  9                    1        164 2009-02-26        2019-06-04     
#> 10                    1        187 1985-02-08        2018-11-19     
#> # … with more rows

sessionInfo()
#> R version 4.2.2 (2022-10-31)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] Capr_2.0.2         dplyr_1.1.0        CDMConnector_0.5.1
#> 
#> loaded via a namespace (and not attached):
#>  [1] pillar_1.8.1      compiler_4.2.2    dbplyr_2.3.0.9000 tools_4.2.2      
#>  [5] digest_0.6.31     jsonlite_1.8.4    lubridate_1.9.2   evaluate_0.20    
#>  [9] lifecycle_1.0.3   tibble_3.1.8      checkmate_2.1.0   timechange_0.2.0 
#> [13] pkgconfig_2.0.3   rlang_1.0.6       reprex_2.0.2      DBI_1.1.3        
#> [17] cli_3.6.0         rstudioapi_0.14   yaml_2.3.7        xfun_0.37        
#> [21] fastmap_1.1.0     rJava_1.0-6       stringr_1.5.0     duckdb_0.6.1     
#> [25] withr_2.5.0       knitr_1.42        generics_0.1.3    fs_1.6.1         
#> [29] vctrs_0.5.2       hms_1.1.2         tidyselect_1.2.0  glue_1.6.2       
#> [33] R6_2.5.1          fansi_1.0.4       rmarkdown_2.20    blob_1.2.3       
#> [37] SqlRender_1.12.1  CirceR_1.2.0      tzdb_0.3.0        purrr_1.0.1      
#> [41] readr_2.1.4       magrittr_2.0.3    backports_1.4.1   htmltools_0.5.4  
#> [45] ellipsis_0.3.2    utf8_1.2.3        stringi_1.7.12

DBI::dbDisconnect(con)

Created on 2023-04-04 with reprex v2.0.2

ablack3 commented 1 year ago

Here is a simpler way to do it using the current main branch.

library(CDMConnector)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(Capr)

# downloadEunomiaData(
#   pathToData = here::here(), # change to the location you want to save the data
#   overwrite = TRUE
# )
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- CDMConnector::cdm_from_con(
  con = con,
  cdm_schema = "main",
  write_schema = "main"
)

gibleed_cohort_definition <- cohort(
  entry = condition(cs(descendants(192671))),
  attrition = attrition(
    "no RA" = withAll(
      exactly(0,
              condition(cs(descendants(80809))),
              duringInterval(eventStarts(-Inf, Inf))))
  )
)

cdm <- generateCohortSet(
  cdm,
  list(gibleed = gibleed_cohort_definition),
  name = "gibleed",
  computeAttrition = TRUE,
  overwrite = TRUE
)
#> Generating cohorts ■                                0/1
#> Generating cohorts ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■  1/1
#> 

cdm$gibleed
#> # Source:   table<main.gibleed> [?? x 4]
#> # Database: DuckDB 0.6.1 [root@Darwin 21.6.0:R 4.2.2//var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T//RtmpiI7S0T/rzbmltfn]
#>    cohort_definition_id subject_id cohort_start_date cohort_end_date
#>                   <int>      <dbl> <date>            <date>         
#>  1                    1         35 1997-07-25        2018-12-25     
#>  2                    1         80 1974-10-27        2019-04-15     
#>  3                    1         99 2000-03-11        2019-04-27     
#>  4                    1        115 2001-04-15        2019-05-05     
#>  5                    1        116 1970-03-12        2006-07-16     
#>  6                    1        133 2019-04-05        2019-04-06     
#>  7                    1        160 2006-02-19        2019-04-11     
#>  8                    1        163 2010-04-25        2018-07-10     
#>  9                    1        164 2009-02-26        2019-06-04     
#> 10                    1        187 1985-02-08        2018-11-19     
#> # … with more rows

DBI::dbDisconnect(con)

Created on 2023-04-04 with reprex v2.0.2

ablack3 commented 1 year ago

@mderidder95 Will you try again using the current main branch and confirm the issue is fixed?

mderidder95 commented 1 year ago

No, sorry, exactly the same error message.

ginberg commented 1 year ago

@ablack3 I am getting the same error as Maria, this is the full stacktrace and my sessionInfo

Error in `map()`:s ■                                0/1
ℹ In index: 15.
Caused by error:
! rapi_execute: Failed to run query
Error: Conversion Error: extract specifier "" not recognized
Backtrace:
  1. CDMConnector::generateCohortSet(...)
  2. purrr::walk(sql, ~DBI::dbExecute(con, .x, immediate = TRUE))
       at CDMConnector/R/generateCohortSet.R:326:4
  3. purrr::map(.x, .f, ..., .progress = .progress)
  4. purrr:::map_("list", .x, .f, ..., .progress = .progress)
     ...
 14. duckdb::dbSendQuery(conn, statement, ...)
 15. duckdb (local) .local(conn, statement, ...)
 16. duckdb:::duckdb_result(...)
 17. duckdb:::duckdb_execute(res)
 18. duckdb:::rapi_execute(...)
Generating cohorts ■                                0/1
 Show Traceback

Error in map(.x, .f, ..., .progress = .progress) :
Caused by error:
! rapi_execute: Failed to run query
Error: Conversion Error: extract specifier "" not recognized
R version 4.2.1 (2022-06-23)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.1

Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] Capr_2.0.2         dplyr_1.1.0        CDMConnector_0.5.1

loaded via a namespace (and not attached):
 [1] pillar_1.8.1     compiler_4.2.1   dbplyr_2.3.0     tools_4.2.1      digest_0.6.31    jsonlite_1.8.0   lubridate_1.8.0  lifecycle_1.0.3  tibble_3.2.1     checkmate_2.1.0 
[11] pkgconfig_2.0.3  rlang_1.1.0      DBI_1.1.3        cli_3.6.0        rstudioapi_0.14  xfun_0.37        rJava_1.0-6      duckdb_0.6.1     withr_2.5.0      stringr_1.5.0   
[21] knitr_1.42       generics_0.1.3   vctrs_0.6.1      hms_1.1.2        tidyselect_1.2.0 glue_1.6.2       R6_2.5.1         fansi_1.0.3      tzdb_0.3.0       readr_2.1.2     
[31] purrr_1.0.1      CirceR_1.2.0     SqlRender_1.13.1 magrittr_2.0.3   backports_1.4.1  ellipsis_0.3.2   assertthat_0.2.1 utf8_1.2.2       stringi_1.7.8   

I seem to have the same versions of dependencies as you, at least for the main ones. Any idea what it could be?

ablack3 commented 1 year ago

Thanks for reporting this. It looks like @edward-burn had a similar error that was fixed when updating packages. I don't know what is causing this error and I can't reproduce the error myself. It looks like we are using the same duckdb version.

https://github.com/darwin-eu-dev/CDMConnector/issues/74#issuecomment-1497885120

@ginberg will you try creating a reprex (with reprex::reprex_selection() or the rstudio addin) with this code after updating your R packages?

library(CDMConnector)
library(dplyr, warn.conflicts = FALSE)
library(Capr)

downloadEunomiaData(
  pathToData = here::here(), # change to the location you want to save the data
  overwrite = TRUE
)
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- CDMConnector::cdm_from_con(
  con = con,
  cdm_schema = "main",
  write_schema = "main"
)

gibleed_cohort_definition <- cohort(
  entry = condition(cs(descendants(192671))),
  attrition = attrition(
    "no RA" = withAll(
      exactly(0,
              condition(cs(descendants(80809))),
              duringInterval(eventStarts(-Inf, Inf))))
  )
)

cdm <- generateCohortSet(
  cdm,
  list(gibleed = gibleed_cohort_definition),
  name = "gibleed",
  computeAttrition = TRUE,
  overwrite = TRUE
)

cdm$gibleed

DBI::dbDisconnect(con, shutdown = TRUE)

sessionInfo()
ginberg commented 1 year ago

I updated packages using update.packages() but I am still getting the error..

library(CDMConnector)
library(dplyr, warn.conflicts = FALSE)
library(Capr)

downloadEunomiaData(
  pathToData = "/Users/ginberg/Data/eunomia", # change to the location you want to save the data
  overwrite = TRUE
)

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- CDMConnector::cdm_from_con(
  con = con,
  cdm_schema = "main",
  write_schema = "main"
)

gibleed_cohort_definition <- cohort(
  entry = condition(cs(descendants(192671))),
  attrition = attrition(
    "no RA" = withAll(
      exactly(0,
              condition(cs(descendants(80809))),
              duringInterval(eventStarts(-Inf, Inf))))
  )
)

cdm <- generateCohortSet(
  cdm,
  list(gibleed = gibleed_cohort_definition),
  name = "gibleed",
  computeAttrition = TRUE,
  overwrite = TRUE
)
#> Generating cohorts ■                                0/1
#> Error in `map()`:
#> ℹ In index: 15.
#> Caused by error:
#> ! rapi_execute: Failed to run query
#> Error: Conversion Error: extract specifier "" not recognized
#> Backtrace:
#>      ▆
#>   1. ├─CDMConnector::generateCohortSet(...)
#>   2. │ └─purrr::walk(sql, ~DBI::dbExecute(con, .x, immediate = TRUE)) at CDMConnector/R/generateCohortSet.R:326:4
#>   3. │   └─purrr::map(.x, .f, ..., .progress = .progress)
#>   4. │     └─purrr:::map_("list", .x, .f, ..., .progress = .progress)
#>   5. │       ├─purrr:::with_indexed_errors(...)
#>   6. │       │ └─base::withCallingHandlers(...)
#>   7. │       ├─purrr:::call_with_cleanup(...)
#>   8. │       └─CDMConnector (local) .f(.x[[i]], ...)
#>   9. │         ├─DBI::dbExecute(con, .x, immediate = TRUE)
#>  10. │         └─DBI::dbExecute(con, .x, immediate = TRUE)
#>  11. │           ├─DBI::dbSendStatement(conn, statement, ...)
#>  12. │           └─DBI::dbSendStatement(conn, statement, ...)
#>  13. │             ├─DBI::dbSendQuery(conn, statement, ...)
#>  14. │             └─duckdb::dbSendQuery(conn, statement, ...)
#>  15. │               └─duckdb (local) .local(conn, statement, ...)
#>  16. │                 └─duckdb:::duckdb_result(...)
#>  17. │                   └─duckdb:::duckdb_execute(res)
#>  18. │                     └─duckdb:::rapi_execute(...)
#>  19. └─base::.handleSimpleError(...)
#>  20.   └─purrr (local) h(simpleError(msg, call))
#>  21.     └─cli::cli_abort(...)
#>  22.       └─rlang::abort(...)
#> Generating cohorts ■                                0/1
#> 

cdm$gibleed
#> NULL

DBI::dbDisconnect(con, shutdown = TRUE)

sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] Capr_2.0.2         dplyr_1.1.1        CDMConnector_0.5.1
#> 
#> loaded via a namespace (and not attached):
#>  [1] compiler_4.2.1    pillar_1.9.0      dbplyr_2.3.2      R.methodsS3_1.8.2
#>  [5] R.utils_2.12.2    tools_4.2.1       digest_0.6.31     checkmate_2.1.0  
#>  [9] jsonlite_1.8.4    lubridate_1.9.2   evaluate_0.20     lifecycle_1.0.3  
#> [13] tibble_3.2.1      R.cache_0.16.0    timechange_0.2.0  pkgconfig_2.0.3  
#> [17] rlang_1.1.0       reprex_2.0.2      DBI_1.1.3         cli_3.6.1        
#> [21] rstudioapi_0.14   yaml_2.3.7        xfun_0.38         fastmap_1.1.1    
#> [25] rJava_1.0-6       stringr_1.5.0     duckdb_0.6.1      withr_2.5.0      
#> [29] styler_1.9.1      knitr_1.42        generics_0.1.3    fs_1.6.1         
#> [33] vctrs_0.6.1       tidyselect_1.2.0  glue_1.6.2        R6_2.5.1         
#> [37] fansi_1.0.4       rmarkdown_2.21    SqlRender_1.13.1  CirceR_1.2.0     
#> [41] purrr_1.0.1       magrittr_2.0.3    backports_1.4.1   htmltools_0.5.5  
#> [45] utf8_1.2.3        stringi_1.7.12    R.oo_1.25.0
Created on 2023-04-06 with [reprex v2.0.2](https://reprex.tidyverse.org/)
edward-burn commented 1 year ago

@ginberg did you also download the eunomia data again? That seemed to help in my case

ginberg commented 1 year ago

@edward-burn yes I did that, didn't help. Do you remember if you updated anything else besided the eunomia data?

ablack3 commented 1 year ago

This is probably a sql error. @edward-burn identified another sql error when building drug cohorts. I tracked the cause down to the fact that I'm not using the correct data types in the the Eunomia dataset. I was lazy and just had the types automatically assigned by readr. I'll change CDMConnector so the datatypes exactly match the CDM DDL. Perhaps that will fix the error.

mderidder95 commented 1 year ago

I hope so!

mderidder95 commented 1 year ago

This example using Eunomia now runs, with CDMConnector 0.6.0

ginberg commented 1 year ago

works for me too, so I guess we can close this issue @ablack3? Thanks for the update