This data are then used to build and fit the model
box_pref <- SurveyData$new(
data = shape_survey_edit,
questions = list(
age = "Please identify your age group",
gender = "Please select your gender",
vote_for = "Which party did you vote for in the 2018 election?",
highest_educ = "Please identify your completed highest education",
state = "Which State do you live in?",
y = "If today is election day, will you vote for the Box Party?"
),
responses = list(
age = levels(shape_survey$age),
gender = levels(shape_survey$gender),
vote_for = levels(shape_survey$vote_for),
highest_educ = levels(shape_survey$highest_educ),
state = levels(shape_survey$state),
y = c("no", "yes")
),
weights = "wt",
design = list(ids = ~ 1)
)
popn_obj <- SurveyData$new(
data = pop_data,
questions = c(
age_group = "Which age group are you?",
gender = "Which gender are you identified?",
vote_pref = "Which party do you prefer to vote?",
education = "What is the highest grade or level of school you have completed",
state = "Please identify the state where you live in"
),
responses = list(
age_group = levels(approx_voters_popn$age_group),
gender = levels(approx_voters_popn$gender),
vote_pref = levels(approx_voters_popn$vote_pref),
education = levels(approx_voters_popn$education),
state = levels(approx_voters_popn$state)
),
weights = "wt",
design = list(ids = ~ 1)
)
# Create QuestionMap$object for the question related to age
q_age <- QuestionMap$new(
name = "age",
col_names = c("age", "age_group"),
values_map = list(
"18-25" = "18-35",
"26-35" = "18-35",
"36-45" = "36-55",
"46-55" = "36-55",
"56-65" = "56-65",
"66-75" = "66+",
"76-90" = "66+"
)
)
# Create QuestionMap$object for the question related to party preference
q_party_pref <- QuestionMap$new(
name = "party_pref",
col_names = c("vote_for", "vote_pref"),
values_map = list(
"Box Party" = "BP",
"BP" = "BP",
"Circle Party" = "CP",
"CP" = "CP"
)
)
# Create QuestionMap$object for the question related to gender
q_gender <- QuestionMap$new(
name = "gender",
col_names = c("gender", "gender"),
values_map = data.frame(
"male" = "m",
"female" = "f",
"nonbinary" = "nb"
)
)
# Create QuestionMap$object for the question related to education
q_educ <- QuestionMap$new(
name = "highest_education",
col_names = c("highest_educ", "education"),
values_map = list(
"no high school" = "no high school",
"high school" = "high school",
"some college" = "some college",
"associates" = "some college",
"4-year college" = "4-years college",
"post-graduate" = "post-grad"
)
)
# Create QuestionMap$object for the question related to state
q_state <- QuestionMap$new(
name = "state",
col_names = c("state", "state"),
values_map = list(
"State A" = "A",
"State B" = "B",
"State C" = "C",
"State D" = "D",
"State E" = "E"
)
)
ex_map <- SurveyMap$new(sample = box_pref, population = popn_obj, q_age, q_educ, q_gender, q_party_pref, q_state)
ex_map$mapping()
ex_map$tabulate()
fit1 <- ex_map$fit(
fun = rstanarm::stan_glmer,
formula = y ~ (1 | age) + (1 | gender) + (1 | highest_education) + (1 | state),
family = "binomial",
refresh = 100,
cores = 2)
# save the fit object
saveRDS(fit1, "fit1_ex.RDS")
From this point forward, I used the ordered levels of the data and reading the saved fit object.
party_pref <- SurveyData$new(
data = shape_survey,
questions = list(
age = "Please identify your age group",
gender = "Please select your gender",
vote_for = "Which party did you vote for in the 2018 election?",
highest_educ = "Please identify your completed highest education",
state = "Which State do you live in?",
y = "If today is election day, will you vote for the Box Party?"
),
responses = list(
age = levels(shape_survey$age),
gender = levels(shape_survey$gender),
vote_for = levels(shape_survey$vote_for),
highest_educ = levels(shape_survey$highest_educ),
state = levels(shape_survey$state),
y = c("no", "yes")
),
weights = "wt",
design = list(ids = ~ 1)
)
popn <- SurveyData$new(
data = approx_voters_popn,
questions = c(
age_group = "Which age group are you?",
gender = "Which gender are you identified?",
vote_pref = "Which party do you prefer to vote?",
education = "What is the highest grade or level of school you have completed",
state = "Please identify the state where you live in"
),
responses = list(
age_group = levels(approx_voters_popn$age_group),
gender = levels(approx_voters_popn$gender),
vote_pref = levels(approx_voters_popn$vote_pref),
education = levels(approx_voters_popn$education),
state = levels(approx_voters_popn$state)
),
weights = "wt",
design = list(ids = ~ 1)
)
# Create QuestionMap$object for the question related to age
q_age <- QuestionMap$new(
name = "age",
col_names = c("age", "age_group"),
values_map = list(
"18-25" = "18-35",
"26-35" = "18-35",
"36-45" = "36-55",
"46-55" = "36-55",
"56-65" = "56-65",
"66-75" = "66+",
"76-90" = "66+"
)
)
# Create QuestionMap$object for the question related to party preference
q_party_pref <- QuestionMap$new(
name = "party_pref",
col_names = c("vote_for", "vote_pref"),
values_map = list(
"Box Party" = "BP",
"BP" = "BP",
"Circle Party" = "CP",
"CP" = "CP"
)
)
# Create QuestionMap$object for the question related to gender
q_gender <- QuestionMap$new(
name = "gender",
col_names = c("gender", "gender"),
values_map = data.frame(
"male" = "m",
"female" = "f",
"nonbinary" = "nb"
)
)
# Create QuestionMap$object for the question related to education
q_educ <- QuestionMap$new(
name = "highest_education",
col_names = c("highest_educ", "education"),
values_map = list(
"no high school" = "no high school",
"high school" = "high school",
"some college" = "some college",
"associates" = "some college",
"4-year college" = "4-years college",
"post-graduate" = "post-grad"
)
)
# Create QuestionMap$object for the question related to state
q_state <- QuestionMap$new(
name = "state",
col_names = c("state", "state"),
values_map = list(
"State A" = "A",
"State B" = "B",
"State C" = "C",
"State D" = "D",
"State E" = "E"
)
)
ex_map <- SurveyMap$new(sample = party_pref, population = popn, q_age, q_educ, q_gender, q_party_pref, q_state)
ex_map$mapping()
ex_map$tabulate()
# read the saved fit object, hence the ordered-levels of the data using the fit from the un-ordered levels.
fit1 <- readRDS("fit1_ex.RDS")
poststrat_est_fit1 <- fit1$population_predict()
# aggregate the predicted value by age using fit1
age_estimation <- fit1$aggregate(poststrat_est_fit1, by = "age")
age_estimation %>%
group_by(age) %>%
summarize(mean = mean(value), sd = sd(value))
I fit the model again (from the ordered data) and re-doing the prediction.
# same as fit1 essentially
fit2 <- ex_map$fit(
fun = rstanarm::stan_glmer,
formula = y ~ (1 | age) + (1 | gender) + (1 | highest_education) + (1 | state),
family = "binomial",
refresh = 100,
cores = 2)
poststrat_est_fit2 <- fit2$population_predict()
age_estimation2 <- fit2$aggregate(poststrat_est_fit2, by = "age")
age_estimation2 %>%
group_by(age) %>%
summarize(mean = mean(value), sd = sd(value))
I made a slight change in the data to be unordered:
This data are then used to build and fit the model
From this point forward, I used the ordered levels of the data and reading the saved fit object.
I fit the model again (from the ordered data) and re-doing the prediction.
In which the results are slightly different.