Artificial data example

andkov commented 8 years ago

Paste and run to recreated the above data

ds_wide <- structure(list(id = 1:10, male = c(0L, 0L, 1L, 1L, 1L, 0L, 0L, 
1L, 1L, 1L), edu = c(-1L, 1L, 0L, 1L, 1L, -1L, 1L, 0L, 0L, 1L
), age_death = c("76.9", "77.9", "71.2", "72.8", "70.1", "#N/A", 
"#N/A", "#N/A", "#N/A", "#N/A"), age_0 = c(71L, 75L, 70L, 69L, 
65L, 71L, 75L, 70L, 69L, 65L), age_1 = c("72.53", "76", "71", 
"#N/A", "65.7", "72.11", "76", "71", "#N/A", "#N/A"), age_2 = c("73.53", 
"77", "#N/A", "#N/A", "66.2", "73.11", "77", "#N/A", "#N/A", 
"66.2"), age_3 = c("74.53", "#N/A", "#N/A", "#N/A", "68", "74.11", 
"#N/A", "#N/A", "#N/A", "68"), mmse_0 = c(30L, 29L, 22L, 25L, 
29L, 30L, 29L, 22L, 25L, 29L), mmse_1 = c("25", "28", "25", "#N/A", 
"#N/A", "25", "28", "25", "#N/A", "#N/A"), mmse_2 = c("20", "27", 
"#N/A", "#N/A", "29", "20", "27", "#N/A", "#N/A", "29"), mmse_3 = c("10", 
"#N/A", "#N/A", "#N/A", "#N/A", "10", "#N/A", "#N/A", "#N/A", 
"25"), state_0 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), state_1 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), state_2 = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), state_3 = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA)), .Names = c("id", "male", "edu", "age_death", 
"age_0", "age_1", "age_2", "age_3", "mmse_0", "mmse_1", "mmse_2", 
"mmse_3", "state_0", "state_1", "state_2", "state_3"), class = "data.frame", row.names = c(NA, 
-10L))

andkov commented 8 years ago

Definitions of the missing states:

`-2`

(negative 2) is a "right censored state". It implies being alive but in unknown living state (age is known, but the measure defining the living state of the multistate variable (e.g. mmse) is not. ( see ELECT vignette, p.10)
Right censoring: The current state is known to be a state in the set of values
For example, at the end of a chronic-disease study, patients are known to be alive but in an unknown state
This is different from interval censoring where observation times are censored, in right censoring states are censored

`-1`

(negative 1) is an "intermediate missing state". Neither age nor the variable defining a living state (e.g. mmse) are known (ELECT vignette, p.10) at this time point.

andkov commented 8 years ago

Translate to long format

time_invariant_varnames <- c(
  "id",
  "male",
  "edu", 
  "age_death" 
) 
make_long_from_wide <- function( # names of time-variant variables with `_wave` indicator)
  d = d, # data in wide format, with encoded multi-state 
  time_invariant # specify the variables that do not change with time (all other will be expected to)
){
  (time_variant <- setdiff(names(d), time_invariant))

  ds_long <- data.table::melt(data = d, id.vars = time_invariant,  measure.vars = time_variant)
  ds_long$variable <- as.character(ds_long$variable)
  unique(ds_long$variable)
  # 
  regex <- "^(\\w+?)_(\\d+?)$" 
  d_long <- ds_long %>%
    dplyr::mutate(
      measure = gsub(regex,"\\1",variable),
      time_point = gsub(regex,"\\2",variable)
    ) %>%
    dplyr::select(-variable)
  head(d_long)  

  d_wide <- d_long %>%
    tidyr::spread(key=measure,value=value) %>%
    dplyr::arrange_(.dots=time_invariant)
  head(d_wide)
  return(d_wide)
}
ds_long <- make_long_from_wide(ds_wide,time_invariant_varnames)
ds_long %>% dplyr::filter(id %in% c(2))

> ds_long %>% dplyr::filter(id %in% c(2))
  id male edu age_death time_point  age mmse state
1  2    0   1      77.9          0   75   29  <NA>
2  2    0   1      77.9          1   76   28  <NA>
3  2    0   1      77.9          2   77   27  <NA>
4  2    0   1      77.9          3 #N/A #N/A  <NA>

IALSA / ialsa-2016-amsterdam

Artificial data example #15

`-2`

`-1`