Open hadley opened 9 years ago
we are "translating" most of the code with tidyr and dplyr, here is the new version of this part
# household size proportion
house.props <- workers %>%
select( -grep('worker', colnames(workers)) ) %>%
gather('house', 'cnt', -c(1:2)) %>%
mutate(prop = cnt / Total.) %>%
select(community, house, prop) %>%
spread(house,prop)
colnames(house.props)[-1] <- c('prop.1.per', 'prop.2.per', 'prop.3.per', 'prop.4.per')
# working people proportions
xx1 <- workers %>% select(1:6) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='Total') %>% rename(cnt.h = Total.)
xx2 <- workers %>% select(c(1,7:9)) %>% gather('worker', 'cnt.w',-c(1:2)) %>% mutate(house='1per') %>% rename(cnt.h = X1.person.household.)
xx3 <- workers %>% select(c(1,10:13)) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='2per') %>% rename(cnt.h = X2.person.household.)
xx4 <- workers %>% select(c(1,14:18)) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='3per') %>% rename(cnt.h = X3.person.household.)
xx <- rbind(xx1,xx2,xx3,xx4)
work.prop <- xx %>%
unite(ho_wk, house,worker) %>%
mutate(prop = cnt.w / cnt.h) %>%
select(community, ho_wk, prop) %>%
spread(ho_wk,prop)
colnames(work.prop)[-1] <- c("prop1.no.workers","prop1.worker", "prop2.no.workers", "prop2.1worker", "prop2.2worker", "prop3.no.workers", "prop3.1worker", "prop3.2worker" , "prop3.3worker","prop.no.workers" ,"prop.1worker" , "prop.2worker" , "prop.3worker")
workers <- cbind(workers, house.props[,-1], work.prop[,-1])
Code like this makes me a bit worried, because it's so easier to make typos when copying and pasting:
Could you reshape the data so this collapsed to just a couple of dplyr commands?