Open andkov opened 8 years ago
Interpretation Note: (If you were taking this test, hope that you get graded by rater 1031 or 1034, and not 1030.)
```r
ReliabilityPair <- function( dsPlot, xName, yName, jitterAmount=.25, mainLabel=NULL ) {
m <- lm(as.formula(paste(yName, "~", xName)), dsPlot)
eqn <- as.character(as.expression( #See Recipe 5.9 in Chang, 2013
substitute(italic(y)==a + b * italic(x) * "," ~ ~italic(r)^2 ~ "=" ~ r2,
list(a=format(coef(m)[1], digits=3),#The intercept
b=format(coef(m)[2], digits=3), #The slope
r2=format(summary(m)$r.squared, digits=3)))
))
g <- ggplot(dsPlot, aes_string(x=xName, y=yName)) +
geom_abline(color=alpha("turquoise", alpha=.2), size=2) +
annotate("text", label=eqn, x=Inf, y=Inf, hjust=1.1, vjust=1.5, parse=TRUE, size=6, color="gray60") +
annotate("text", label="italic(bar(x))", x=mean(dsPlot[, xName], na.rm=T), y=Inf, hjust=.5, vjust=1.5, parse=TRUE, size=7, color="gray60") +
annotate("text", label="italic(bar(y))", x=Inf, y=mean(dsPlot[, yName], na.rm=T), hjust=1.5, vjust=.5, parse=TRUE, size=7, color="gray60") +
geom_vline(x=mean(dsPlot[, xName], na.rm=T), color=rgb(.3, .3, .1, .2), size=3) +
geom_hline(y=mean(dsPlot[, yName], na.rm=T), color=rgb(.3, .3, .1, .2), size=3) +
geom_smooth(method="lm", color="orange", fill="orange", alpha=.2, na.rm=T) +
geom_smooth(method="loess", color="purple", fill="purple", alpha=.2, na.rm=T) +
geom_point(stat="identity", position = position_jitter(w=jitterAmount, h=jitterAmount), shape=1) +
coord_fixed(xlim=c(1-jitterAmount-.1, 5+jitterAmount+.1), ylim=c(1-jitterAmount-.1, 5+jitterAmount+.1)) +
labs(title=mainLabel) +
theme_bw()
g
}
( see Chang (2013) section 5.7 )
# install.packages("gcookbook")
library(gcookbook) # For the data set
library(ggplot2)
model <- lm(heightIn ~ ageYear + I(ageYear^2), heightweight)
model
# Create a data frame with ageYear column, interpolating across range
xmin <- min(heightweight$ageYear)
xmax <- max(heightweight$ageYear)
predicted <- data.frame(ageYear=seq(xmin, xmax, length.out=100))
# Calculate predicted values of heightIn
predicted$heightIn <- predict(model, predicted)
predicted
sp <- ggplot2::ggplot(heightweight, aes(x=ageYear, y=heightIn)) +
geom_point(colour="grey40")+
geom_line(data=predicted, size=1)
sp
# Given a model, predict values of yvar from xvar
# This supports one predictor and one predicted variable
# xrange: If NULL, determine the x range from the model object. If a vector with
# two numbers, use those as the min and max of the prediction range.
# samples: Number of samples across the x range.
# ...: Further arguments to be passed to predict()
predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {
# If xrange isn't passed in, determine xrange from the models.
# Different ways of extracting the x range, depending on model type
if (is.null(xrange)) {
if (any(class(model) %in% c("lm", "glm")))
xrange <- range(model$model[[xvar]])
else if (any(class(model) %in% "loess"))
xrange <- range(model$x)
}
newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))
names(newdata) <- xvar
newdata[[yvar]] <- predict(model, newdata = newdata, ...)
newdata
}
modlinear <- lm(heightIn ~ ageYear, heightweight)
modloess <- loess(heightIn ~ ageYear, heightweight)
lm_predicted <- predictvals(modlinear, "ageYear", "heightIn")
loess_predicted <- predictvals(modloess, "ageYear", "heightIn")
sp +
geom_line(data=lm_predicted, colour="red", size=.8) +
geom_line(data=loess_predicted, colour="blue", size=.8)
This issue will explore graphical means to use scatter plots and smoothers to compare observed and modelled values, as well as predictions of models with different configuration.