Open andkov opened 8 years ago
Paste and run to recreated the above data
ds_wide <- structure(list(id = 1:10, male = c(0L, 0L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 1L), edu = c(-1L, 1L, 0L, 1L, 1L, -1L, 1L, 0L, 0L, 1L
), age_death = c("76.9", "77.9", "71.2", "72.8", "70.1", "#N/A",
"#N/A", "#N/A", "#N/A", "#N/A"), age_0 = c(71L, 75L, 70L, 69L,
65L, 71L, 75L, 70L, 69L, 65L), age_1 = c("72.53", "76", "71",
"#N/A", "65.7", "72.11", "76", "71", "#N/A", "#N/A"), age_2 = c("73.53",
"77", "#N/A", "#N/A", "66.2", "73.11", "77", "#N/A", "#N/A",
"66.2"), age_3 = c("74.53", "#N/A", "#N/A", "#N/A", "68", "74.11",
"#N/A", "#N/A", "#N/A", "68"), mmse_0 = c(30L, 29L, 22L, 25L,
29L, 30L, 29L, 22L, 25L, 29L), mmse_1 = c("25", "28", "25", "#N/A",
"#N/A", "25", "28", "25", "#N/A", "#N/A"), mmse_2 = c("20", "27",
"#N/A", "#N/A", "29", "20", "27", "#N/A", "#N/A", "29"), mmse_3 = c("10",
"#N/A", "#N/A", "#N/A", "#N/A", "10", "#N/A", "#N/A", "#N/A",
"25"), state_0 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), state_1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), state_2 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), state_3 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)), .Names = c("id", "male", "edu", "age_death",
"age_0", "age_1", "age_2", "age_3", "mmse_0", "mmse_1", "mmse_2",
"mmse_3", "state_0", "state_1", "state_2", "state_3"), class = "data.frame", row.names = c(NA,
-10L))
Definitions of the missing states:
-2
-1
Translate to long format
time_invariant_varnames <- c(
"id",
"male",
"edu",
"age_death"
)
make_long_from_wide <- function( # names of time-variant variables with `_wave` indicator)
d = d, # data in wide format, with encoded multi-state
time_invariant # specify the variables that do not change with time (all other will be expected to)
){
(time_variant <- setdiff(names(d), time_invariant))
ds_long <- data.table::melt(data = d, id.vars = time_invariant, measure.vars = time_variant)
ds_long$variable <- as.character(ds_long$variable)
unique(ds_long$variable)
#
regex <- "^(\\w+?)_(\\d+?)$"
d_long <- ds_long %>%
dplyr::mutate(
measure = gsub(regex,"\\1",variable),
time_point = gsub(regex,"\\2",variable)
) %>%
dplyr::select(-variable)
head(d_long)
d_wide <- d_long %>%
tidyr::spread(key=measure,value=value) %>%
dplyr::arrange_(.dots=time_invariant)
head(d_wide)
return(d_wide)
}
ds_long <- make_long_from_wide(ds_wide,time_invariant_varnames)
ds_long %>% dplyr::filter(id %in% c(2))
> ds_long %>% dplyr::filter(id %in% c(2))
id male edu age_death time_point age mmse state
1 2 0 1 77.9 0 75 29 <NA>
2 2 0 1 77.9 1 76 28 <NA>
3 2 0 1 77.9 2 77 27 <NA>
4 2 0 1 77.9 3 #N/A #N/A <NA>