IALSA / HRS

Shaping data from the Health and Retirement Study.
GNU General Public License v2.0
5 stars 2 forks source link

Wrangling : long - longer - wide #11

Open andkov opened 7 years ago

andkov commented 7 years ago

rm(list=ls(all=TRUE))  #Clear the variables from previous runs.
cat("\f") # clear console

ds_long <- data.frame(
  "id" = c(101,101,101,102,102,102),
  "time" = c(1,2,3,1,2,3),
  "sex" = c("F","F","F","M","M","M"),
  "v1"  = c(4,5,6, 5,8,9  ),
  "v2"  = c(55, 57, 59,  23, 34, 25)
)
ds_long

# define variable properties for long-to-wide conversion
variables_static <- c("id", "sex")
variables_longitudinal <- setdiff(colnames(ds_long),variables_static)  # not static
(variables_longitudinal <- variables_longitudinal[!variables_longitudinal=="time"]) # all except wave
# establish a wide format
ds_wide <- ds_long %>%
  dplyr::select_(.dots=c(variables_static, "time", variables_longitudinal)) %>%
  tidyr::gather_(key="variable", value="value", variables_longitudinal) %>%
  dplyr::mutate(wave = as.character(time)) %>%
  dplyr::mutate(wave = ifelse( wave %in% paste0(0:9), paste0("0",wave),wave)) %>%
  dplyr::arrange(id) %>% 
  dplyr::mutate(
    # variable = gsub("^v","",variable),
    temp = paste0(variable,"_",wave)
  ) %>% 
  dplyr::select(-time,-variable,-wave) %>% 
  tidyr::spread(temp, value)
ds_wide