OHDSI / CohortGenerator

An R package for instantiating cohorts using data in the CDM.
https://ohdsi.github.io/CohortGenerator/
11 stars 10 forks source link

isCohortSet requires 64 bit integer cohort_definition_id but they don't seem to work with generateCohortSet #108

Closed ablack3 closed 10 months ago

ablack3 commented 1 year ago

The isCohortSet function requires 64 bit integer cohort_definition_id but they don't seem to work with generateCohortSet

@anthonysena

cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
connectionDetails <- Eunomia::getEunomiaConnectionDetails()
filePath <- system.file("testdata", "name", "cohorts", "celecoxib.json", package = "CohortGenerator")
json <- readChar(filePath, file.info(filePath)$size)
sql <- CirceR::buildCohortQuery(CirceR::cohortExpressionFromJson(json), 
                                options =  CirceR::createGenerateOptions(generateStats = FALSE))

cohortsToCreate <- data.frame(
  cohortId = bit64::as.integer64(1L),
  cohortName = "gibleed",
  json = json,
  sql = sql
)

# this works
CohortGenerator::isCohortDefinitionSet(cohortsToCreate)
#> Warning in checkAndFixCohortDefinitionSetDataTypes(x = x, fixDataTypes = FALSE, : Your cohortDefinitionSet had a mismatch in data types. Please check your cohortDefinitionSet to ensure it conforms to the following expected data types:Expected column == data type
#> --------------------------
#> cohortId == integer
#> cohortName == character
#> sql == character
#> json == character
#> --------------------------
#> Your cohortDefinitionSet 
#> --------------------------
#> cohortId == integer64
#> cohortName == character
#> json == character
#> sql == character
#> [1] FALSE

CohortGenerator::createCohortTables(connectionDetails = connectionDetails,
                                    cohortDatabaseSchema = "main",
                                    cohortTableNames = cohortTableNames)
#> Connecting using SQLite driver
#> Currently in a tryCatch or withCallingHandlers block, so unable to add global calling handlers. ParallelLogger will not capture R messages, errors, and warnings, only explicit calls to ParallelLogger. (This message will not be shown again this R session)
#> Creating cohort tables
#> - Created table main.my_cohort_table
#> - Created table main.my_cohort_table_inclusion
#> - Created table main.my_cohort_table_inclusion_result
#> - Created table main.my_cohort_table_inclusion_stats
#> - Created table main.my_cohort_table_summary_stats
#> - Created table main.my_cohort_table_censor_stats
#> Creating cohort tables took 0.26secs

# but generation fails
cohortsGenerated <- CohortGenerator::generateCohortSet(connectionDetails = connectionDetails,
                                                       cdmDatabaseSchema = "main",
                                                       cohortDatabaseSchema = "main",
                                                       cohortTableNames = cohortTableNames,
                                                       cohortDefinitionSet = cohortsToCreate)
#> Connecting using SQLite driver
#> /1- Generating cohort:
#> Error in tryCatch(withCallingHandlers({: 1 assertions failed:
#>  * Variable 'sql': Must have length 1, but has length 0.

cohortsToCreate <- data.frame(
  cohortId = 1L,
  cohortName = "gibleed",
  json = json,
  sql = sql
)

# this returns FALSE
CohortGenerator::isCohortDefinitionSet(cohortsToCreate)
#> [1] TRUE

CohortGenerator::createCohortTables(connectionDetails = connectionDetails,
                                    cohortDatabaseSchema = "main",
                                    cohortTableNames = cohortTableNames)
#> Connecting using SQLite driver
#> Creating cohort tables
#> - Created table main.my_cohort_table
#> - Created table main.my_cohort_table_inclusion
#> - Created table main.my_cohort_table_inclusion_result
#> - Created table main.my_cohort_table_inclusion_stats
#> - Created table main.my_cohort_table_summary_stats
#> - Created table main.my_cohort_table_censor_stats
#> Creating cohort tables took 0.07secs

# but generation works
cohortsGenerated <- CohortGenerator::generateCohortSet(connectionDetails = connectionDetails,
                                                       cdmDatabaseSchema = "main",
                                                       cohortDatabaseSchema = "main",
                                                       cohortTableNames = cohortTableNames,
                                                       cohortDefinitionSet = cohortsToCreate)
#> Connecting using SQLite driver
#> 1/1- Generating cohort: gibleed
#>   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |======                                                                |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  16%  |                                                                              |==============                                                        |  20%  |                                                                              |=================                                                     |  24%  |                                                                              |====================                                                  |  28%  |                                                                              |======================                                                |  32%  |                                                                              |=========================                                             |  36%  |                                                                              |============================                                          |  40%  |                                                                              |===============================                                       |  44%  |                                                                              |==================================                                    |  48%  |                                                                              |====================================                                  |  52%  |                                                                              |=======================================                               |  56%  |                                                                              |==========================================                            |  60%  |                                                                              |=============================================                         |  64%  |                                                                              |================================================                      |  68%  |                                                                              |==================================================                    |  72%  |                                                                              |=====================================================                 |  76%  |                                                                              |========================================================              |  80%  |                                                                              |===========================================================           |  84%  |                                                                              |==============================================================        |  88%  |                                                                              |================================================================      |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.049 secs
#> Generating cohort set took 0.28 secs

Created on 2023-07-01 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.2 (2022-10-31) #> os macOS Big Sur ... 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Europe/Amsterdam #> date 2023-07-01 #> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0) #> bit 4.0.5 2022-11-15 [1] CRAN (R 4.2.0) #> bit64 4.0.5 2020-08-30 [1] CRAN (R 4.2.0) #> blob 1.2.4 2023-03-17 [1] CRAN (R 4.2.0) #> cachem 1.0.8 2023-05-01 [1] CRAN (R 4.2.0) #> checkmate 2.2.0 2023-04-27 [1] CRAN (R 4.2.0) #> CirceR 1.3.0 2023-06-12 [1] Github (ohdsi/CirceR@f5a0824) #> cli 3.6.1 2023-03-23 [1] CRAN (R 4.2.0) #> CohortGenerator 0.8.0 2023-07-01 [1] local #> crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.0) #> DatabaseConnector 6.2.3 2023-06-29 [1] CRAN (R 4.2.0) #> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0) #> digest 0.6.32 2023-06-26 [1] CRAN (R 4.2.0) #> dplyr 1.1.2 2023-04-20 [1] CRAN (R 4.2.0) #> Eunomia 1.0.2 2023-07-01 [1] Github (ohdsi/Eunomia@1220fdb) #> evaluate 0.21 2023-05-05 [1] CRAN (R 4.2.0) #> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.0) #> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.2.0) #> fs 1.6.2 2023-04-25 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> hms 1.1.3 2023-03-21 [1] CRAN (R 4.2.0) #> htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.2.0) #> knitr 1.43 2023-05-25 [1] CRAN (R 4.2.0) #> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0) #> lubridate 1.9.2 2023-02-10 [1] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.0) #> ParallelLogger 3.1.0 2023-06-12 [1] Github (ohdsi/ParallelLogger@e1ef69e) #> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> purrr 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.0) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> readr 2.1.4 2023-02-10 [1] CRAN (R 4.2.0) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rJava 1.0-6 2021-12-10 [1] CRAN (R 4.2.0) #> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.0) #> rmarkdown 2.22 2023-06-01 [1] CRAN (R 4.2.0) #> RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.2.0) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> SqlRender 1.15.1 2023-06-30 [1] Github (ohdsi/SqlRender@45ee2c5) #> styler 1.10.1 2023-06-05 [1] CRAN (R 4.2.0) #> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.2.0) #> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0) #> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.0) #> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.2.0) #> utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.0) #> vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.2.0) #> vroom 1.6.3 2023-04-28 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> xfun 0.39 2023-04-20 [1] CRAN (R 4.2.0) #> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
ablack3 commented 1 year ago

Maybe this is fixed in develop? https://github.com/OHDSI/CohortGenerator/blob/6935ccf28ad019ffb4afaf9bc5f0f3a8fe9c7503/inst/cohortDefinitionSetSpecificationDescription.csv?plain=1#L2

ablack3 commented 1 year ago

Yep looks like it is fixed in develop. But maybe we want to allow either integers or doubles in the cohortId field?

remotes::install_github("OHDSI/CohortGenerator", "develop")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'CohortGenerator' from a github remote, the SHA1 (6935ccf2) has not changed since last install.
#>   Use `force = TRUE` to force installation

cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
connectionDetails <- Eunomia::getEunomiaConnectionDetails()
filePath <- system.file("testdata", "name", "cohorts", "celecoxib.json", package = "CohortGenerator")
json <- readChar(filePath, file.info(filePath)$size)
sql <- CirceR::buildCohortQuery(CirceR::cohortExpressionFromJson(json), 
                                options =  CirceR::createGenerateOptions(generateStats = FALSE))

cohortsToCreate <- data.frame(
  cohortId = 1,
  cohortName = "gibleed",
  json = json,
  sql = sql
)

CohortGenerator::isCohortDefinitionSet(cohortsToCreate)
#> [1] TRUE

CohortGenerator::createCohortTables(connectionDetails = connectionDetails,
                                    cohortDatabaseSchema = "main",
                                    cohortTableNames = cohortTableNames)
#> Connecting using SQLite driver
#> Creating cohort tables
#> - Created table main.my_cohort_table
#> - Created table main.my_cohort_table_inclusion
#> - Created table main.my_cohort_table_inclusion_result
#> - Created table main.my_cohort_table_inclusion_stats
#> - Created table main.my_cohort_table_summary_stats
#> - Created table main.my_cohort_table_censor_stats
#> Creating cohort tables took 0.22secs

cohortsGenerated <- CohortGenerator::generateCohortSet(connectionDetails = connectionDetails,
                                                       cdmDatabaseSchema = "main",
                                                       cohortDatabaseSchema = "main",
                                                       cohortTableNames = cohortTableNames,
                                                       cohortDefinitionSet = cohortsToCreate)
#> Connecting using SQLite driver
#> Initiating cluster consisting only of main thread
#> 1/1- Generating cohort: gibleed
#>   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |======                                                                |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  16%  |                                                                              |==============                                                        |  20%  |                                                                              |=================                                                     |  24%  |                                                                              |====================                                                  |  28%  |                                                                              |======================                                                |  32%  |                                                                              |=========================                                             |  36%  |                                                                              |============================                                          |  40%  |                                                                              |===============================                                       |  44%  |                                                                              |==================================                                    |  48%  |                                                                              |====================================                                  |  52%  |                                                                              |=======================================                               |  56%  |                                                                              |==========================================                            |  60%  |                                                                              |=============================================                         |  64%  |                                                                              |================================================                      |  68%  |                                                                              |==================================================                    |  72%  |                                                                              |=====================================================                 |  76%  |                                                                              |========================================================              |  80%  |                                                                              |===========================================================           |  84%  |                                                                              |==============================================================        |  88%  |                                                                              |================================================================      |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.052 secs
#> Generating cohort set took 0.31 secs

cohortsToCreate <- data.frame(
  cohortId = 1L,
  cohortName = "gibleed",
  json = json,
  sql = sql
)

CohortGenerator::isCohortDefinitionSet(cohortsToCreate)
#> Warning in checkAndFixCohortDefinitionSetDataTypes(x = x, fixDataTypes = FALSE, : Your cohortDefinitionSet had a mismatch in data types. Please check your cohortDefinitionSet to ensure it conforms to the following expected data types:Expected column == data type
#> --------------------------
#> cohortId == numeric
#> cohortName == character
#> sql == character
#> json == character
#> --------------------------
#> Your cohortDefinitionSet 
#> --------------------------
#> cohortId == integer
#> cohortName == character
#> json == character
#> sql == character
#> [1] FALSE

CohortGenerator::createCohortTables(connectionDetails = connectionDetails,
                                    cohortDatabaseSchema = "main",
                                    cohortTableNames = cohortTableNames)
#> Connecting using SQLite driver
#> Creating cohort tables
#> - Created table main.my_cohort_table
#> - Created table main.my_cohort_table_inclusion
#> - Created table main.my_cohort_table_inclusion_result
#> - Created table main.my_cohort_table_inclusion_stats
#> - Created table main.my_cohort_table_summary_stats
#> - Created table main.my_cohort_table_censor_stats
#> Creating cohort tables took 0.04secs

cohortsGenerated <- CohortGenerator::generateCohortSet(connectionDetails = connectionDetails,
                                                       cdmDatabaseSchema = "main",
                                                       cohortDatabaseSchema = "main",
                                                       cohortTableNames = cohortTableNames,
                                                       cohortDefinitionSet = cohortsToCreate)
#> Connecting using SQLite driver
#> Initiating cluster consisting only of main thread
#> 1/1- Generating cohort: gibleed
#> Executing SQL took 0.0476 secs
#> Generating cohort set took 0.15 secs

Created on 2023-07-01 with reprex v2.0.2