OHDSI / CohortGenerator

Cohort Generation for the OMOP Common Data Model
https://ohdsi.github.io/CohortGenerator/
11 stars 10 forks source link

Multiple call to export stats causing duplicates in cohort inclusion file #179

Closed anthonysena closed 1 month ago

anthonysena commented 1 month ago

Here is a reprex of the problem on v0.10:

if (dir.exists("E:/TEMP/cg_test")) {
  unlink("E:/TEMP/cg_test", recursive = TRUE, force = TRUE)
}
createCirceExpressionFromFile <- function(filePath) {
  cohortExpression <- readChar(filePath, file.info(filePath)$size)
  return(CirceR::cohortExpressionFromJson(cohortExpression))
}

generateSql <- function(cohortJsonFileName, generateStats = FALSE) {
  cohortExpression <- createCirceExpressionFromFile(cohortJsonFileName)
  cohortSql <- CirceR::buildCohortQuery(cohortExpression, options = CirceR::createGenerateOptions(generateStats = generateStats))
  return(cohortSql)
}

cohortJsonFiles <- list.files(path = system.file("testdata/name/cohorts", package = "CohortGenerator"), full.names = TRUE)
cohorts <- setNames(data.frame(matrix(ncol = 5, nrow = 0), stringsAsFactors = FALSE), c("atlasId", "cohortId", "cohortName", "json", "cohortJsonFile"))
for (i in 1:length(cohortJsonFiles)) {
  cohortJsonFileName <- cohortJsonFiles[i]
  cohortFullName <- tools::file_path_sans_ext(basename(cohortJsonFileName))
  cohortJson <- readChar(cohortJsonFileName, file.info(cohortJsonFileName)$size)
  cohorts <- rbind(cohorts, data.frame(
    atlasId = i,
    cohortId = i,
    cohortName = cohortFullName,
    json = cohortJson,
    cohortJsonFile = cohortJsonFileName,
    stringsAsFactors = FALSE
  ))
}

getCohortsForTest <- function(cohorts, generateStats = FALSE) {
  cohortSql <- data.frame()
  for (i in 1:nrow(cohorts)) {
    cohortSql <- rbind(cohortSql, data.frame(sql = generateSql(cohorts$cohortJsonFile[i], generateStats)))
  }
  if (length(intersect(colnames(cohorts), c("sql"))) == 1) {
    cohorts$sql <- NULL
  }
  cohorts <- cbind(cohorts, cohortSql)
  return(cohorts)
}

connectionDetails <- Eunomia::getEunomiaConnectionDetails()
cd <- getCohortsForTest(cohorts, generateStats = T)
cohortTableNames <- CohortGenerator::getCohortTableNames("sena_test")
CohortGenerator::createCohortTables(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames
)
#> Connecting using SQLite driver
#> Creating cohort tables
#> - Created table main.sena_test
#> - Created table main.sena_test
#> - Created table main.sena_test_inclusion
#> - Created table main.sena_test_inclusion_result
#> - Created table main.sena_test_inclusion_stats
#> - Created table main.sena_test_summary_stats
#> - Created table main.sena_test_censor_stats
#> Creating cohort tables took 0.81secs
CohortGenerator::generateCohortSet(
  connectionDetails = connectionDetails,
  cdmDatabaseSchema = "main",
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames,
  cohortDefinitionSet = cd,
  incremental = T,
  incrementalFolder = "E:/TEMP/cg_test"
)
#> Connecting using SQLite driver
#> Initiating cluster consisting only of main thread
#> 1/4- Generating cohort: celecoxib (id = 1)
#>   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   7%  |                                                                              |========                                                              |  11%  |                                                                              |==========                                                            |  15%  |                                                                              |=============                                                         |  19%  |                                                                              |================                                                      |  22%  |                                                                              |==================                                                    |  26%  |                                                                              |=====================                                                 |  30%  |                                                                              |=======================                                               |  33%  |                                                                              |==========================                                            |  37%  |                                                                              |=============================                                         |  41%  |                                                                              |===============================                                       |  44%  |                                                                              |==================================                                    |  48%  |                                                                              |====================================                                  |  52%  |                                                                              |=======================================                               |  56%  |                                                                              |=========================================                             |  59%  |                                                                              |============================================                          |  63%  |                                                                              |===============================================                       |  67%  |                                                                              |=================================================                     |  70%  |                                                                              |====================================================                  |  74%  |                                                                              |======================================================                |  78%  |                                                                              |=========================================================             |  81%  |                                                                              |============================================================          |  85%  |                                                                              |==============================================================        |  89%  |                                                                              |=================================================================     |  93%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.126 secs
#> 2/4- Generating cohort: celecoxibAge40 (id = 2)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   2%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   8%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |===========                                                           |  15%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  19%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  23%  |                                                                              |==================                                                    |  25%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  29%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |============================                                          |  40%  |                                                                              |==============================                                        |  42%  |                                                                              |===============================                                       |  44%  |                                                                              |================================                                      |  46%  |                                                                              |==================================                                    |  48%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  52%  |                                                                              |======================================                                |  54%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  58%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  63%  |                                                                              |==============================================                        |  65%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  69%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  79%  |                                                                              |=========================================================             |  81%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  85%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  90%  |                                                                              |=================================================================     |  92%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  96%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.673 secs
#> 3/4- Generating cohort: celecoxibAge40Male (id = 3)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   4%  |                                                                              |====                                                                  |   5%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   9%  |                                                                              |========                                                              |  11%  |                                                                              |=========                                                             |  12%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  23%  |                                                                              |==================                                                    |  25%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  32%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  38%  |                                                                              |============================                                          |  39%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  46%  |                                                                              |==================================                                    |  48%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  52%  |                                                                              |======================================                                |  54%  |                                                                              |=======================================                               |  55%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  61%  |                                                                              |============================================                          |  62%  |                                                                              |=============================================                         |  64%  |                                                                              |==============================================                        |  66%  |                                                                              |================================================                      |  68%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |================================================================      |  91%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  95%  |                                                                              |====================================================================  |  96%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.78 secs
#> 4/4- Generating cohort: celecoxibCensored (id = 4)
#>   |                                                                              |                                                                      |   0%  |                                                                              |==                                                                    |   4%  |                                                                              |=====                                                                 |   7%  |                                                                              |========                                                              |  11%  |                                                                              |==========                                                            |  14%  |                                                                              |============                                                          |  18%  |                                                                              |===============                                                       |  21%  |                                                                              |==================                                                    |  25%  |                                                                              |====================                                                  |  29%  |                                                                              |======================                                                |  32%  |                                                                              |=========================                                             |  36%  |                                                                              |============================                                          |  39%  |                                                                              |==============================                                        |  43%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |========================================                              |  57%  |                                                                              |==========================================                            |  61%  |                                                                              |=============================================                         |  64%  |                                                                              |================================================                      |  68%  |                                                                              |==================================================                    |  71%  |                                                                              |====================================================                  |  75%  |                                                                              |=======================================================               |  79%  |                                                                              |==========================================================            |  82%  |                                                                              |============================================================          |  86%  |                                                                              |==============================================================        |  89%  |                                                                              |=================================================================     |  93%  |                                                                              |====================================================================  |  96%  |                                                                              |======================================================================| 100%
#> Executing SQL took 0.121 secs
#> Generating cohort set took 3.18 secs

CohortGenerator::exportCohortStatsTables(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames,
  cohortStatisticsFolder = "E:/TEMP/cg_test/stats",
  snakeCaseToCamelCase = FALSE,
  fileNamesInSnakeCase = TRUE,
  incremental = TRUE,
  cohortDefinitionSet = cd,
  databaseId = "Eunomia"
)
#> Connecting using SQLite driver
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inclusion.csv
#> - Fetching data from sena_test_inclusion
#> - Fetching data from sena_test_inclusion_result
#> - Fetching data from sena_test_inclusion_stats
#> - Fetching data from sena_test_inclusion_stats
#> - Fetching data from sena_test_summary_stats
#> - Fetching data from sena_test_censor_stats
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inc_result.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inc_stats.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_summary_stats.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_censor_stats.csv

data <- CohortGenerator::readCsv(
  file = "E:/TEMP/cg_test/stats/cohort_inclusion.csv"
)
data
#> # A tibble: 3 x 4
#>   cohortDefinitionId ruleSequence name          description
#>                <dbl>        <dbl> <chr>         <lgl>      
#> 1                  2            0 Age > 40      NA         
#> 2                  3            0 Age > 40      NA         
#> 3                  3            1 Gender = MALE NA

CohortGenerator::exportCohortStatsTables(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames,
  cohortStatisticsFolder = "E:/TEMP/cg_test/stats",
  snakeCaseToCamelCase = FALSE,
  fileNamesInSnakeCase = TRUE,
  incremental = TRUE,
  cohortDefinitionSet = cd,
  databaseId = "Eunomia"
)
#> Connecting using SQLite driver
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inclusion.csv
#> - Fetching data from sena_test_inclusion
#> - Fetching data from sena_test_inclusion_result
#> - Fetching data from sena_test_inclusion_stats
#> - Fetching data from sena_test_inclusion_stats
#> - Fetching data from sena_test_summary_stats
#> - Fetching data from sena_test_censor_stats
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inc_result.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_inc_stats.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_summary_stats.csv
#> - Saving data to - E:/TEMP/cg_test/stats/cohort_censor_stats.csv

data <- CohortGenerator::readCsv(
  file = "E:/TEMP/cg_test/stats/cohort_inclusion.csv"
)
data
#> # A tibble: 6 x 4
#>   cohortDefinitionId ruleSequence name          description
#>                <dbl>        <dbl> <chr>         <lgl>      
#> 1           2  e+  0            0 Age > 40      NA         
#> 2           3  e+  0            0 Age > 40      NA         
#> 3           3  e+  0            1 Gender = MALE NA         
#> 4           1  e-307            0 Age > 40      NA         
#> 5           1.5e-306            0 Age > 40      NA         
#> 6           1.5e-306            1 Gender = MALE NA

Created on 2024-08-09 with reprex v2.0.2