Open martaalcalde opened 1 week ago
I think it may only happen when "result" is not collected locally, as when adding dplyr::collect() in line 84, this does not happen again:
# Functions ----
cdmEunomia <- function() {
con <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
schema <- c(schema = "main")
conDuck <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir())
cdmDuck <- CDMConnector::cdmFromCon(
con = conDuck, cdmSchema = "main", writeSchema = "main"
)
cdm <- CDMConnector::copyCdmTo(con = con, cdm = cdmDuck, schema = schema)
CDMConnector::cdmDisconnect(cdm = cdmDuck)
return(cdm)
}
getOmopTableStartDate <- function(omopTable, date){
omopTable |>
dplyr::summarise("start_date" = min(.data[[date]], na.rm = TRUE)) |>
dplyr::collect() |>
dplyr::mutate("start_date" = as.Date(paste0(clock::get_year(.data$start_date),"-01-01"))) |>
dplyr::pull("start_date")
}
getOmopTableEndDate <- function(omopTable, date){
omopTable |>
dplyr::summarise("end_date" = max(.data[[date]], na.rm = TRUE)) |>
dplyr::collect() |>
dplyr::mutate("end_date" = as.Date(paste0(clock::get_year(.data$end_date),"-12-31"))) |>
dplyr::pull("end_date")
}
getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){
startDate <- getOmopTableStartDate(omopTable, start_date_name)
endDate <- getOmopTableEndDate(omopTable, end_date_name)
tibble::tibble(
"group" = seq.Date(as.Date(startDate), as.Date(endDate), "month")
) |>
dplyr::rowwise() |>
dplyr::mutate("interval" = max(which(
.data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit))
),
na.rm = TRUE)) |>
dplyr::ungroup() |>
dplyr::group_by(.data$interval) |>
dplyr::mutate(
"interval_start_date" = min(.data$group),
"interval_end_date" = dplyr::if_else(.env$unit == "year",
clock::add_years(min(.data$group),.env$unitInterval)-1,
clock::add_months(min(.data$group),.env$unitInterval)-1)
) |>
dplyr::mutate(
"interval_start_date" = as.Date(.data$interval_start_date),
"interval_end_date" = as.Date(.data$interval_end_date)
) |>
dplyr::mutate(
"interval_group" = paste(.data$interval_start_date,"to",.data$interval_end_date)
) |>
dplyr::ungroup() |>
dplyr::mutate("my" = paste0(clock::get_month(.data$group),"-",clock::get_year(.data$group))) |>
dplyr::select("interval_group", "my", "interval_start_date","interval_end_date") |>
dplyr::distinct()
}
splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){
cdm$interval |>
dplyr::inner_join(
omopTable |>
dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |>
dplyr::mutate("my" = paste0(clock::get_month(.data$incidence_date),"-",clock::get_year(.data$incidence_date))),
by = "my"
) |>
dplyr::select(-c("my")) |>
dplyr::relocate("person_id") |>
dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date", "person_id"))
}
# Bespoke code -----
cdm <- cdmEunomia()
#> Note: method with signature 'DBIConnection#Id' chosen for function 'dbExistsTable',
#> target signature 'duckdb_connection#Id'.
#> "duckdb_connection#ANY" would also be valid
omopTable <- cdm[["condition_occurrence"]] |>
dplyr::ungroup() |>
dplyr::select("person_id", "condition_start_date") |>
PatientProfiles::addAgeQuery(indexDate = "condition_start_date",
ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60)))
interval <- getIntervalTibble(omopTable, "condition_start_date", "condition_start_date", "year", 1)
cdm <- cdm |>
omopgenerics::insertTable(name = "interval", table = interval)
result <- splitIncidenceBetweenIntervals(cdm, omopTable, "condition_start_date", "age_group") |> dplyr::collect()
for(i in 1:50){
t <- PatientProfiles::summariseResult(result,
includeOverallStrata = TRUE,
strata = list("age_group"),
estimates = "count",
counts = FALSE) |>
suppressMessages()
print(t |> dplyr::filter(variable_level == "1920-01-01 to 1920-12-31", strata_level == "<=20") |> dplyr::pull("estimate_value"))
}
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
#> [1] "50"
Created on 2024-10-10 with reprex v2.1.1
Hi! I'm using summariseResult to count the number of records of an omopTable within specific intervals in time. However, I keep obtaining different results when using summariseResult().
I have encountered this issue when running this function in a Macbook, but not in a windows.
Created on 2024-10-09 with reprex v2.1.1