COSTDataExpo2013 / DaSilvaAlvarez

This repository contains supplementary material for the paper "Clicks and Cliques. Exploring the Soul of the Community" to enable the reader to reproduce the analysis.
1 stars 0 forks source link

Switch to a "tidier data" format? #6

Open hadley opened 9 years ago

hadley commented 9 years ago

Code like this makes me a bit worried, because it's so easier to make typos when copying and pasting:

workers <- mutate(workers,prop.no.workers = workers$No.workers/workers$Total.,
                  prop.1worker = workers$X1.worker/workers$Total.,
                  prop.2worker = workers$X2.workers/workers$Total.,
                  prop.3worker = workers$X3.or.more.workers/workers$Total.,
                  prop.1.per = workers$X1.person.household./workers$Total.,
                  prop.2.per = workers$X2.person.household./workers$Total.,
                  prop.3.per = workers$X3.person.household./workers$Total.,
                  prop.4.per= workers$X4.or.more.person.household./workers$Total,
                  prop1.no.workers= workers$No.workers.1/workers$X1.person.household.,
                  prop1.worker = workers$X1.worker.1/workers$X1.person.household.,
                  prop2.no.workers =  workers$No.workers.2/workers$X2.person.household.,
                  prop2.1worker = workers$X1.worker.2/workers$X2.person.household.,
                  prop2.2worker = workers$X2.workers.1/workers$X2.person.household.,
                  prop3.no.workers = workers$No.workers.2/workers$X3.person.household.,
                  prop3.1worker = workers$X1.worker.3/workers$X3.person.household.,
                  prop3.2worker = workers$X2.workers.2/workers$X3.person.household.,
                  prop3.3worker = workers$X3.workers/workers$X3.person.household.,
                  prop3.no.workers = workers$No.workers.2/workers$X3.person.household.,
                  prop3.1worker = workers$X1.worker.3/workers$X3.person.household.,
                  prop3.2worker = workers$X2.workers.2/workers$X3.person.household.,
                  prop3.3worker = workers$X3.workers/workers$X3.person.household.)

Could you reshape the data so this collapsed to just a couple of dplyr commands?

nachalca commented 9 years ago

we are "translating" most of the code with tidyr and dplyr, here is the new version of this part

# household size proportion
house.props <- workers %>% 
  select( -grep('worker', colnames(workers)) ) %>% 
  gather('house', 'cnt', -c(1:2)) %>% 
  mutate(prop = cnt / Total.) %>%
  select(community, house, prop) %>%
  spread(house,prop)

colnames(house.props)[-1] <-  c('prop.1.per', 'prop.2.per', 'prop.3.per', 'prop.4.per')   

# working people proportions
xx1 <- workers %>% select(1:6) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='Total') %>% rename(cnt.h = Total.)
xx2 <- workers %>% select(c(1,7:9)) %>% gather('worker', 'cnt.w',-c(1:2)) %>% mutate(house='1per') %>% rename(cnt.h = X1.person.household.)
xx3 <- workers %>% select(c(1,10:13)) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='2per') %>% rename(cnt.h = X2.person.household.)                    
xx4 <- workers %>% select(c(1,14:18)) %>% gather('worker', 'cnt.w', -c(1:2)) %>% mutate(house='3per') %>% rename(cnt.h = X3.person.household.)
xx <- rbind(xx1,xx2,xx3,xx4)

work.prop <- xx %>% 
  unite(ho_wk, house,worker) %>%
  mutate(prop = cnt.w / cnt.h) %>%
  select(community, ho_wk, prop) %>%
  spread(ho_wk,prop)

colnames(work.prop)[-1] <- c("prop1.no.workers","prop1.worker", "prop2.no.workers", "prop2.1worker",    "prop2.2worker",  "prop3.no.workers", "prop3.1worker",    "prop3.2worker" ,   "prop3.3worker","prop.no.workers" ,"prop.1worker" ,    "prop.2worker"   ,  "prop.3worker")  
workers <- cbind(workers, house.props[,-1], work.prop[,-1])