insightsengineering / tern

Table, Listings, and Graphs (TLG) library for common outputs used in clinical trials
https://insightsengineering.github.io/tern/
Other
77 stars 22 forks source link

How to get correct denominator for DTHT01 #577

Closed wangh107 closed 2 years ago

wangh107 commented 2 years ago

@shajoezhu https://docs.roche.com/doc/tlg-catalog/2022_01_28/tlg_catalog/tables/dtht01/

Screen Shot 2022-06-02 at 9 12 42 AM

The denominator for items inside "OTHER" should be n instead of the count of "OTHER". We tried different options of denom in summarize_vars. However, none of them gives correct denominator.

library(tern)
library(scda)
library(dplyr)

adsl <- synthetic_cdisc_data("latest")$adsl

# Ensure character variables are converted to factors and empty strings and NAs are explicit missing levels.
# For details, refer to Teal and Study Data article.
adsl <- df_explicit_na(adsl) %>% filter(SAFFL == "Y")

#Reorder the levels in "DTHCAT" to put Other category at the end.
adsl$DTHCAT <- factor(adsl$DTHCAT, levels = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER", "<Missing>"))

lyt1 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  count_values(
    "DTHFL",
    values = "Y",
    .labels =  c(count_fraction = "Total number of deaths"),
    .formats = c(count_fraction = "xx (xx.x%)"))  %>%
  summarize_vars(vars = c("DTHCAT"), var_labels = c("Primary cause of death")) 
part1 <- build_table(lyt1, df = adsl)

lyt2 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  split_rows_by("DTHCAT", split_fun = keep_split_levels("OTHER"), child_labels = "hidden") %>%
  summarize_vars(
    "DTHCAUS",
    nested = TRUE,
    .stats = "count_fraction",
    .indent_mods = c("count_fraction" = 4L))
part2 <- build_table(lyt2, df = adsl) %>%
  prune_table()
# now combine tables
col_info(part2) <- col_info(part1)

result <- rbind(part1,
                part2)

result
BFalquet commented 2 years ago

This works:

DTHT01_1 ----

#' `DTHT01` Table 1 (Default) Death Table.
#'
#' A description of the causes of death optionally with the breakdown of the `OTHER` category and/or post-study
#' reporting of death.
#'
#' @inheritParams gen_args
#' @param time_since_last_dose (`logical`) should the time to event information be displayed.
#' @param other_category (`logical`) should the breakdown of the `OTHER` category be displayed.
#'
#' @details
#'  * Numbers represent absolute numbers of subjects and fraction of `N`, or absolute numbers when specified.
#'  * Remove zero-count rows unless overridden with `prune_0 = FALSE`.
#'  * Does not include a total column by default.
#'
#' @export
#'
#' @examples
#' library(dm)
#'
#' db <- syn_test_data() %>%
#'   dunlin::dm_explicit_na() %>%
#'   dtht01_1_pre()
#'
#' dtht01_1(adam_db = db)
#' dtht01_1(adam_db = db, other_category = TRUE)
#' dtht01_1(adam_db = db, time_since_last_dose = TRUE)
#' dtht01_1(adam_db = db, time_since_last_dose = TRUE, other_category = TRUE)
dtht01_1 <- function(adam_db,
                     armvar = .study$actualarm,
                     time_since_last_dose = FALSE,
                     other_category = FALSE,
                     lbl_overall = .study$lbl_overall,
                     prune_0 = TRUE,
                     deco = std_deco("DTHT01"),
                     .study = list(
                       actualarm = "ACTARM",
                       lbl_overall = NULL
                     )) {
  dbsel <- get_db_data(adam_db, "adsl")

  checkmate::assert_factor(dbsel$adsl$DTHFL, any.missing = FALSE)
  checkmate::assert_factor(dbsel$adsl$DTHCAT, any.missing = FALSE)
  checkmate::assert_flag(time_since_last_dose)
  checkmate::assert_flag(other_category)

  lyt <- dtht01_1_lyt(
    armvar = armvar,
    other_category = other_category,
    lbl_overall = lbl_overall,
    deco = deco
  )

  tbl <- build_table(lyt, dbsel$adsl)

  if (time_since_last_dose) {
    assert_factor(dbsel$adsl$LDDTHGR1, any.missing = FALSE)

    lyt2 <- dtht01_1_opt_lyt(
      armvar = armvar,
      lbl_overall = lbl_overall,
      deco = deco
    )

    tbl_other <- build_table(lyt2, dbsel$adsl)

    col_info(tbl_other) <- col_info(tbl)

    tbl <- rbind(tbl, tbl_other)

    tbl <- set_decoration(tbl, deco)
  }

  if (prune_0) {
    tbl <- prune_table(tbl)
  }

  tbl
}

#' @describeIn dtht01_1 `dtht01_1` Layout
#'
#' @inheritParams gen_args
#' @param other_category (`logical`) should the breakdown of the `OTHER` category be displayed.
#'
#' @export
#'
#' @examples
#' dtht01_1_lyt(
#'   armvar = "ACTARM",
#'   lbl_overall = NULL
#' )
dtht01_1_lyt <- function(armvar = .study$actualarm,
                         lbl_overall = .study$lbl_overall,
                         other_category = TRUE,
                         deco = std_deco("DTHT01"),
                         .study = list(
                           actualarm = "ACTARM",
                           lbl_overall = NULL
                         )) {
  tab <-
    basic_table_deco(deco) %>%
    split_cols_by(var = armvar) %>%
    add_colcounts() %>%
    ifneeded_add_overall_col(lbl_overall) %>%
    count_values(
      "DTHFL",
      values = "Y",
      .labels =  c(count_fraction = "Total number of deaths"),
      .formats = c(count_fraction = "xx (xx.x%)")
    ) %>%
    summarize_vars(vars = c("DTHCAT"), var_labels = c("Primary cause of death"))

  if (other_category) {
    tab <-
      tab %>%
      split_rows_by(
        "DTHCAT",
        split_fun = keep_split_levels("OTHER"),
        child_labels = "hidden"
      ) %>%
      summarize_vars(
        "DTHCAUS",
        .stats = "count_fraction",
        .indent_mods = c("count_fraction" = 4L),
        denom = "n",
        nested = FALSE
      )
  }
  tab
}

#' @describeIn dtht01_1 `dtht01_1` Optional Layout
#'
#' @inheritParams gen_args
#'
#' @export
#'
#' @examples
#' dtht01_1_opt_lyt(
#'   armvar = "ACTARM",
#'   lbl_overall = NULL
#' )
dtht01_1_opt_lyt <- function(armvar = .study$actualarm,
                             lbl_overall = .study$lbl_overall,
                             deco = std_deco("DTHT01"),
                             .study = list(
                               actualarm = "ACTARM",
                               lbl_overall = NULL
                             )) {
  basic_table_deco(deco) %>%
    split_cols_by(var = armvar) %>%
    add_colcounts() %>%
    ifneeded_add_overall_col(lbl_overall) %>%
    summarize_vars(
      vars = "LDDTHGR1",
      var_labels = "Days from last drug administration",
      show_labels = "visible"
    ) %>%
    split_rows_by(
      "LDDTHGR1",
      split_fun = remove_split_levels("<Missing>"),
      split_label = "Primary cause by days from last study drug administration",
      label_pos = "visible"
    ) %>%
    summarize_vars("DTHCAT")
}

#' @describeIn dtht01_1 `dtht01_1` Preprocessing
#'
#' @inheritParams gen_args
#'
#' @export
#'
#' @examples
#' syn_test_data() %>%
#'   dtht01_1_pre()
dtht01_1_pre <- function(adam_db) {
  checkmate::assert_class(adam_db, "dm")

  death_fact <- levels(adam_db$adsl$DTHCAT)
  death_fact <- setdiff(death_fact, "OTHER")
  death_fact <- c(death_fact, "OTHER")

  adam_db %>%
    dm_zoom_to("adsl") %>%
    mutate(DTHCAT = fct_relevel(.data$DTHCAT, death_fact)) %>%
    dm_update_zoomed()
}

Image

you have to specify nested = FALSE and denom = "n" in the last summarize_vars of the first layout

Meixh2021 commented 2 years ago

@BFalquet Yes I have tested it. It works. Thank you!

Meixh2021 commented 2 years ago

Hi @BFalquet, nested = FALSE will cause another issue that it will include all sub-categories in the DTHCAUS but not related to the OTHER in DTHCAT. For example, in the OTHER category, we would only need LOST TO FOLLOW UP, MISSING, SUICIDE, and UNKNOWN.

wangh107 commented 2 years ago

As @Meixh2021 commented above, 1

@ayogasekaram , could you please also help follow up with SME on this issue?

BFalquet commented 2 years ago

Here is a slightly ugly but working solution

# DTHT01_1 ----

#' `DTHT01` Table 1 (Default) Death Table.
#'
#'  A description of the causes of death optionally with the breakdown of the
#' `OTHER` category and/or post-study reporting of death.
#'
#' @inheritParams gen_args
#' @param time_since_last_dose (`logical`) should the time to event information be displayed.
#' @param other_category (`logical`) should the breakdown of the `OTHER` category be displayed.
#'
#' @details
#'  * Numbers represent absolute numbers of subjects and fraction of `N`, or absolute numbers when specified.
#'  * Remove zero-count rows unless overridden with `prune_0 = FALSE`.
#'  * Does not include a total column by default.
#'
#' @export
#'
#' @examples
#' library(dm)
#'
#' db <- syn_test_data() %>%
#'   dunlin::dm_explicit_na() %>%
#'   dtht01_1_pre()
#'
#' dtht01_1_main(adam_db = db, other_category = TRUE)
#' dtht01_1_main(adam_db = db, time_since_last_dose = TRUE)
#' dtht01_1_main(adam_db = db, time_since_last_dose = TRUE, other_category = TRUE)
dtht01_1_main <- function(adam_db,
                          armvar = .study$actualarm,
                          time_since_last_dose = FALSE,
                          other_category = FALSE,
                          lbl_overall = .study$lbl_overall,
                          prune_0 = TRUE,
                          deco = std_deco("DTHT01"),
                          .study = list(
                            actualarm = "ACTARM",
                            lbl_overall = NULL
                          )) {
  dbsel <- get_db_data(adam_db, "adsl")

  checkmate::assert_factor(dbsel$adsl$DTHFL, any.missing = FALSE)
  checkmate::assert_factor(dbsel$adsl$DTHCAT, any.missing = FALSE)
  checkmate::assert_flag(time_since_last_dose)
  checkmate::assert_flag(other_category)

  lyt <- dtht01_1_lyt(
    armvar = armvar,
    lbl_overall = lbl_overall,
    deco = deco
  )

  tbl <- build_table(lyt[[1]], dbsel$adsl)

  if (other_category) {

    tbl_2 <- build_table(lyt[[2]], dbsel$adsl %>% filter(.data$DTHFL == "Y"))
    col_info(tbl_2) <- col_info(tbl)
    tbl <- rbind(tbl, tbl_2)
  }

  if (time_since_last_dose) {
    assert_factor(dbsel$adsl$LDDTHGR1, any.missing = FALSE)

    lyt2 <- dtht01_1_opt_lyt(
      armvar = armvar,
      lbl_overall = lbl_overall,
      deco = deco
    )

    tbl_opt <- build_table(lyt2, dbsel$adsl)
    col_info(tbl_opt) <- col_info(tbl)
    tbl <- rbind(tbl, tbl_opt)
    tbl <- set_decoration(tbl, deco)
  }

  if (prune_0) {
    tbl <- prune_table(tbl)
  }

  tbl
}

#' @describeIn dtht01_1_main `dtht01_1` Layout
#'
#' @inheritParams gen_args
#' @param other_category (`logical`) should the breakdown of the `OTHER` category be displayed.
#'
#' @export
#'
#' @examples
#' dtht01_1_lyt(
#'   armvar = "ACTARM",
#'   lbl_overall = NULL
#' )
dtht01_1_lyt <- function(armvar = .study$actualarm,
                         lbl_overall = .study$lbl_overall,
                         deco = std_deco("DTHT01"),
                         .study = list(
                           actualarm = "ACTARM",
                           lbl_overall = NULL
                         )) {
  tab <-
    basic_table_deco(deco) %>%
    split_cols_by(var = armvar) %>%
    add_colcounts() %>%
    ifneeded_add_overall_col(lbl_overall) %>%
    count_values(
      "DTHFL",
      values = "Y",
      .labels =  c(count_fraction = "Total number of deaths"),
      .formats = c(count_fraction = "xx (xx.x%)")
    ) %>%
    summarize_vars(vars = c("DTHCAT"), var_labels = c("Primary cause of death"))

  tab2 <-
    basic_table_deco(deco) %>%
    split_cols_by(var = armvar) %>%
    add_colcounts() %>%
    ifneeded_add_overall_col(lbl_overall) %>%
    summarize_vars(
      "DTHCAUS",
      nested = FALSE,
      .stats = "count_fraction",
      .indent_mods = c("count_fraction" = 4L),
      .formats = c(count_fraction = "xx (xx.x%)"),
      denom = "N_col"
    )

  list(tab, tab2)
}

#' @describeIn dtht01_1_main `dtht01_1` Optional Layout
#'
#' @inheritParams gen_args
#'
#' @export
#'
#' @examples
#' dtht01_1_opt_lyt(
#'   armvar = "ACTARM",
#'   lbl_overall = NULL
#' )
dtht01_1_opt_lyt <- function(armvar = .study$actualarm,
                             lbl_overall = .study$lbl_overall,
                             deco = std_deco("DTHT01"),
                             .study = list(
                               actualarm = "ACTARM",
                               lbl_overall = NULL
                             )) {
  basic_table_deco(deco) %>%
    split_cols_by(var = armvar) %>%
    add_colcounts() %>%
    ifneeded_add_overall_col(lbl_overall) %>%
    summarize_vars(
      vars = "LDDTHGR1",
      var_labels = "Days from last drug administration",
      show_labels = "visible"
    ) %>%
    split_rows_by(
      "LDDTHGR1",
      split_fun = remove_split_levels("<Missing>"),
      split_label = "Primary cause by days from last study drug administration",
      label_pos = "visible"
    ) %>%
    summarize_vars("DTHCAT")
}

#' @describeIn dtht01_1_main `dtht01_1` Preprocessing
#'
#' @inheritParams gen_args
#' @param ... not used.
#'
#' @export
#'
#' @examples
#' syn_test_data() %>%
#'   dtht01_1_pre()
dtht01_1_pre <- function(adam_db, ...) {
  checkmate::assert_class(adam_db, "dm")

  death_fact <- levels(adam_db$adsl$DTHCAT)
  death_fact <- setdiff(death_fact, "OTHER")
  death_fact <- c(death_fact, "OTHER")

  adam_db %>%
    dm_zoom_to("adsl") %>%
    mutate(DTHCAT = forcats::fct_relevel(.data$DTHCAT, death_fact)) %>%
    mutate(is_OTHER = ifelse(.data$DTHCAT %in% c("OTHER", "<Missing>"), "Y", "N")) %>%
    mutate(DTHCAUS = as.factor(ifelse(.data$DTHCAT == "OTHER" | .data$DTHCAT == "<Missing>", as.character(.data$DTHCAUS), "<Missing>"))) %>%
    dm_update_zoomed()
}

# `DTHT01_1` Pipeline ----

#' `DTHT01_1`
#'
#' @seealso [dtht01_1_main()]
#' @rdname chevron_tlg-class
#' @export
dtht01_1 <- chevron_tlg(dtht01_1_main, dtht01_1_pre, adam_datasets = c("adsl"))

image

shajoezhu commented 2 years ago

hi @ayogasekaram , could you look into this please. Thanks

ayogasekaram commented 2 years ago

Hey @shajoezhu Not sure if this is an ideal solution because there's some overriding involved: We can change the col_count argument for lyt2 with the same split as lyt1 and changing the denom to "N_col". attn: "new_count", "col_counts" and "denom" below

adsl <- synthetic_cdisc_data("latest")$adsl

# Ensure character variables are converted to factors and empty strings and NAs are explicit missing levels.
# For details, refer to Teal and Study Data article.
adsl <- df_explicit_na(adsl) %>% filter(SAFFL == "Y")

#Reorder the levels in "DTHCAT" to put Other category at the end.
adsl$DTHCAT <- factor(adsl$DTHCAT, levels = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER", "<Missing>"))

lyt1 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  count_values(
    "DTHFL",
    values = "Y",
    .labels =  c(count_fraction = "Total number of deaths"),
    .formats = c(count_fraction = "xx (xx.x%)"))  %>%
  summarize_vars(vars = c("DTHCAT"), var_labels = c("Primary cause of death")) 
part1 <- build_table(lyt1, df = adsl)

lyt2 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  split_rows_by("DTHCAT", split_fun = keep_split_levels("OTHER"), child_labels = "hidden") %>%
  summarize_vars(
    "DTHCAUS",
    nested = TRUE,
    .stats = "count_fraction",
    .indent_mods = c("count_fraction" = 4L),
    denom = "N_col")

new_count <- c(tapply(adsl$DTHFL == "Y", adsl$ARM, sum), sum(tapply(adsl$DTHFL == "Y", adsl$ARM, sum)))

part2 <- build_table(lyt2, df = adsl, col_counts = new_count) %>%
  prune_table()

# now combine tables
col_info(part2) <- col_info(part1)

result <- rbind(part1,
                part2)

result

This results in:

result
Meixh2021 commented 2 years ago

@ayogasekaram @BFalquet An interesting issue would happen when we split the tables and combine them later. Please refer to the following codes by using the real testing dataset:

# adsl <- synthetic_cdisc_data("latest")$adsl
library(tern)

# Load a real testing dataset
dpath <- "root/clinical_studies/ro_testing/cdp_testing/spa_testing/data_analysis/spaenv_testing/qa/outdata_vad"
adsl <- rice::rice_read(file.path(dpath,"adsl.sas7bdat"))

# Ensure character variables are converted to factors and empty strings and NAs are explicit missing levels.
# For details, refer to Teal and Study Data article.
adsl <- df_explicit_na(adsl) %>% filter(SAFFL == "Y")

#Reorder the levels in "DTHCAT" to put Other category at the end.
adsl$DTHCAT <- factor(adsl$DTHCAT, levels = c("ADVERSE EVENT", "OTHER", "<Missing>"))

lyt1 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  count_values(
    "DTHFL",
    values = "Y",
    .labels =  c(count_fraction = "Total number of deaths"),
    .formats = c(count_fraction = "xx (xx.x%)"))  %>%
  summarize_vars(vars = c("DTHCAT"), var_labels = c("Primary cause of death")) 
part1 <- build_table(lyt1, df = adsl)

lyt2 <- basic_table() %>%
  split_cols_by("ARM", split_fun = add_overall_level("All Patients", first = FALSE)) %>%
  add_colcounts() %>%
  split_rows_by("DTHCAT", split_fun = keep_split_levels("OTHER"), child_labels = "hidden") %>%
  summarize_vars(
    "DTHCAUS",
    nested = TRUE,
    .stats = "count_fraction",
    .indent_mods = c("count_fraction" = 4L),
    denom = "N_col")

new_count <- c(tapply(adsl$DTHFL == "Y", adsl$ARM, sum), sum(tapply(adsl$DTHFL == "Y", adsl$ARM, sum)))

part2 <- build_table(lyt2, df = adsl, col_counts = new_count) %>%
  prune_table()

# now combine tables
col_info(part2) <- col_info(part1)

result <- rbind(part1,
                          part2)

result

The result shows:

Screen Shot 2022-07-12 at 5 23 44 PM

In the second and third columns, you will see the percentages are not equal in Other and sub-category (DEATH DUE TO Death) of Other. The reason I found is the one missing value in DTHFL would not be counted in the DTHCAT.

Screen Shot 2022-07-12 at 5 29 30 PM

The percentage of the main category (OTHER) here is calculated by: 374/(374 + 1 + 1 + 1 + 2 + 1 + 1 + 1 + 1 = 383)=97.7%

Screen Shot 2022-07-12 at 5 35 54 PM

However, the percentage of the sub-category (DEATH DUE TO Death) is calculated by: 374/(374 + 1 + 1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 = 384) = 97.4%. Since one more <MISSING> is included in this scenario.

The correct presentation of the table should be based on the small n = 383, which gives a percentage of 97.7%.

Thanks