insightsengineering / teal.osprey

Community efforts to collect teal modules for TLGs defined in the osprey package
https://insightsengineering.github.io/teal.osprey/
Other
5 stars 2 forks source link

tm_g_heat_bygrade #2

Open cicdguy opened 3 years ago

cicdguy commented 3 years ago

The example does not work when changing the data to cached = TRUE.

library(random.cdisc.data)
ADSL <- radsl(cached = TRUE)
ADEX <- radex(cached = TRUE)
ADAE <- radae(cached = TRUE)
ADCM <- radcm(cached = TRUE)
# function to derive AVISIT from ADEX
add_visit <- function(data_need_visit){
  visit_dates <- ADEX %>%
    filter(PARAMCD == "DOSE") %>%
    distinct(USUBJID, AVISIT, ASTDTM) %>%
    group_by(USUBJID) %>%
    arrange(ASTDTM) %>%
    mutate(next_vis = lead(ASTDTM), is_last = ifelse(is.na(next_vis), TRUE, FALSE)) %>%
    rename(this_vis = ASTDTM)
  data_visit <- data_need_visit %>%
    select(USUBJID, ASTDTM) %>%
    left_join(visit_dates, by = "USUBJID") %>%
    filter(ASTDTM > this_vis & (ASTDTM < next_vis | is_last == TRUE)) %>%
    left_join(data_need_visit)
  return(data_visit)
  }
# derive AVISIT for ADAE and ADCM
ADAE <- add_visit(ADAE)
ADCM <- add_visit(ADCM)
# derive ongoing status variable for ADEX
ADEX  <- ADEX %>%
  filter(PARCAT1 == "INDIVIDUAL") %>%
  mutate(ongo_status = (EOSSTT == "ONGOING"))

app <- init(
  data = cdisc_data(
    cdisc_dataset("ADSL", ADSL),
    cdisc_dataset("ADEX", ADEX),
    cdisc_dataset("ADAE", ADAE),
    cdisc_dataset("ADCM", ADCM),
    code = "
    ADSL <- radsl(N = 30, seed = 1)
    ADEX <- radex(ADSL)
    ADAE <- radae(ADSL)
    ADCM <- radcm(ADSL)
    ADEX  <- ADEX %>%
      filter(PARCAT1 == 'INDIVIDUAL') %>%
      mutate(ongo_status = (EOSSTT == 'ONGOING'))
    add_visit <- function(data_need_visit){
      visit_dates <- ADEX %>%
        filter(PARAMCD == 'DOSE') %>%
        distinct(USUBJID, AVISIT, ASTDTM) %>%
        group_by(USUBJID) %>%
        arrange(ASTDTM) %>%
        mutate(next_vis = lead(ASTDTM), is_last = ifelse(is.na(next_vis), TRUE, FALSE)) %>%
        rename(this_vis = ASTDTM)
      data_visit <- data_need_visit %>%
        select(USUBJID, ASTDTM) %>%
        left_join(visit_dates, by = 'USUBJID') %>%
        filter(ASTDTM > this_vis & (ASTDTM < next_vis | is_last == TRUE)) %>%
        left_join(data_need_visit)
      return(data_visit)
    }
    ADAE <- add_visit(ADAE)
    ADCM <- add_visit(ADCM)
    ",
    check = TRUE
  ),
  modules = root_modules(
    tm_g_heat_bygrade(
      label = "Heatmap by grade",
      sl_dataname = "ADSL",
      ex_dataname = "ADEX",
      ae_dataname = "ADAE",
      cm_dataname = "ADCM",
      id_var = choices_selected(selected = "USUBJID",
                               choices = c("USUBJID", "SUBJID")),
      visit_var = choices_selected(selected = "AVISIT",
                                 choices = c("AVISIT")),
      ongo_var = choices_selected(selected = "ongo_status",
                                 choices = c("ongo_status")),
      anno_var = choices_selected(selected = c("SEX", "COUNTRY"),
                                 choices = c("SEX", "COUNTRY", "USUBJID")),
      heat_var = choices_selected(selected = "AETOXGR",
                                 choices = c("AETOXGR")),
      conmed_var = choices_selected(selected = "CMDECOD",
                                    choices = c("CMDECOD")),
      plot_height = c(600, 200, 2000)
    )
  )
)

shinyApp(app$ui, app$server)
user/3166/files/fac79380-acdf-11eb-80b6-136101c77b0b) NEST/agile-R/issues/1756 becomes incredibly tricky for me. Provenance: ``` Creator: kpagacz ```
cicdguy commented 3 years ago

@hey59 would you have time to look into this today?

Provenance:

Creator: anajens
cicdguy commented 3 years ago

Thanks for bringing up this issue and thanks for the tag! I believe this is due to the different data being produced in the preprocessing step and the code = in cdisc.data

Here's a modified version of your code that runs with no issue (although including 400 subjects makes the heatmap very busy)

library(random.cdisc.data)
ADSL <- radsl(cached = TRUE)
ADEX <- radex(cached = TRUE)
ADAE <- radae(cached = TRUE)
ADCM <- radcm(cached = TRUE)

add_visit <- function(data_need_visit){
  visit_dates <- ADEX %>%
    filter(PARAMCD == "DOSE") %>%
    distinct(USUBJID, AVISIT, ASTDTM) %>%
    group_by(USUBJID) %>%
    arrange(ASTDTM) %>%
    mutate(next_vis = lead(ASTDTM), is_last = ifelse(is.na(next_vis), TRUE, FALSE)) %>%
    rename(this_vis = ASTDTM)
  data_visit <- data_need_visit %>%
    select(USUBJID, ASTDTM) %>%
    left_join(visit_dates, by = "USUBJID") %>%
    filter(ASTDTM > this_vis & (ASTDTM < next_vis | is_last == TRUE)) %>%
    left_join(data_need_visit)
  return(data_visit)
}

ADAE <- add_visit(ADAE) %>% distinct()
ADCM <- add_visit(ADCM) %>% distinct()

ADEX  <- ADEX %>%
  filter(PARCAT1 == "INDIVIDUAL") %>%
  mutate(ongo_status = (EOSSTT == "ONGOING"))

app <- init(
  data = cdisc_data(
    cdisc_dataset("ADSL", ADSL),
    cdisc_dataset("ADEX", ADEX),
    cdisc_dataset("ADAE", ADAE),
    cdisc_dataset("ADCM", ADCM),
    code = "
    ADSL <- radsl(cached = TRUE)
ADEX <- radex(cached = TRUE)
ADAE <- radae(cached = TRUE)
ADCM <- radcm(cached = TRUE)
    ADEX  <- ADEX %>%
      filter(PARCAT1 == 'INDIVIDUAL') %>%
      mutate(ongo_status = (EOSSTT == 'ONGOING'))
    add_visit <- function(data_need_visit){
      visit_dates <- ADEX %>%
        filter(PARAMCD == 'DOSE') %>%
        distinct(USUBJID, AVISIT, ASTDTM) %>%
        group_by(USUBJID) %>%
        arrange(ASTDTM) %>%
        mutate(next_vis = lead(ASTDTM), is_last = ifelse(is.na(next_vis), TRUE, FALSE)) %>%
        rename(this_vis = ASTDTM)
      data_visit <- data_need_visit %>%
        select(USUBJID, ASTDTM) %>%
        left_join(visit_dates, by = 'USUBJID') %>%
        filter(ASTDTM > this_vis & (ASTDTM < next_vis | is_last == TRUE)) %>%
        left_join(data_need_visit)
      return(data_visit)
    }
    ADAE <- add_visit(ADAE) %>% distinct()
    ADCM <- add_visit(ADCM) %>% distinct()
    ",
    check = TRUE
  ),
  modules = root_modules(
    tm_g_heat_bygrade(
      label = "Heatmap by grade",
      sl_dataname = "ADSL",
      ex_dataname = "ADEX",
      ae_dataname = "ADAE",
      cm_dataname = "ADCM",
      id_var = choices_selected(selected = "USUBJID",
                                choices = c("USUBJID", "SUBJID")),
      visit_var = choices_selected(selected = "AVISIT",
                                   choices = c("AVISIT")),
      ongo_var = choices_selected(selected = "ongo_status",
                                  choices = c("ongo_status")),
      anno_var = choices_selected(selected = c("SEX", "COUNTRY"),
                                  choices = c("SEX", "COUNTRY", "USUBJID")),
      heat_var = choices_selected(selected = "AETOXGR",
                                  choices = c("AETOXGR")),
      conmed_var = choices_selected(selected = "CMDECOD",
                                    choices = c("CMDECOD")),
      plot_height = c(600, 200, 2000)
    )
  )
)

shinyApp(app$ui, app$server)

Provenance:

Creator: hey59
cicdguy commented 3 years ago

Hi @hey59 and @kpagacz , I was checking this earlier and I noticed that Konrad is trying to include this module in the osprey sample app. By doing the add_visit steps for heatmap, it seems like we lost some records (for example, ADAE had 1934 records, but after add_visit call, only 1611 left). I'm just wondering if such preprocessing will affect other modules' output?

Provenance:

Creator: yli110-stat697
cicdguy commented 3 years ago

Hi @hey59 and @kpagacz , I was checking this earlier and I noticed that Konrad is trying to include this module in the osprey sample app. By doing the add_visit steps for heatmap, it seems like we lost some records (for example, ADAE had 1934 records, but after add_visit call, only 1611 left). I'm just wondering if such preprocessing will affect other modules' output?

My understanding is that, because the data in the example are randomly generated, not every record in ADAE has a corresponding AVISIT found in ADEX. For example, for subject 23, in ADAE

user/2729/files/cca07e80-acf4-11eb-9970-cf9450f5f8e5) and in ADEX user/2729/files/c959c280-acf5-11eb-8fab-52e1b268d418) The algorithm of matching appropriate AVISIT for ADAE is to match the `ASTDTM` to one of the visits where `ASTDTM` from ADAE is either between `this_visit` and `next visit`, or later than `this visit` and is the last visit. With that being said, only the ADAE record with `ASTDTM = 2021-07-07` for id-23 can get a match from ADEX's AVISIT, and 2 out of 3 rows of ADAE, in this case, is deleted. This mismatch issue should only happen with randomly generated data. If there are other ideas on how the AVISIT could be better matched from ADEX to ADAE, I'd have to direct this discussion to @qit3 in the future :) Provenance: ``` Creator: hey59 ```
cicdguy commented 3 years ago

Thanks for the explanation @hey59! I think the case you describe is not just limited to the random data and can happen very often with real data too due to data entry errors and missed visits, etc. So re-defining visits across domains is very tricky to do.

Since it's likely that this module is used in an app with other modules that use ADAE data, the algorithm for deriving AVISIT should modify the number of ADAE records. This probably requires a little more work in the module itself so for now I will block issue to be addressed in the future.

Provenance:

Creator: anajens