andkov / psy532

Univariate GLM. Accompanies a graduate statistics course at the University of Victoria
5 stars 2 forks source link

Scatters and Smoothers #15

Open andkov opened 8 years ago

andkov commented 8 years ago

This issue will explore graphical means to use scatter plots and smoothers to compare observed and modelled values, as well as predictions of models with different configuration.

andkov commented 8 years ago

Example A

Interpretation Note: (If you were taking this test, hope that you get graded by rater 1031 or 1034, and not 1030.)

Code to recreate the graph in Example A

```r
ReliabilityPair <- function( dsPlot, xName, yName, jitterAmount=.25, mainLabel=NULL ) {  
  m <- lm(as.formula(paste(yName, "~", xName)), dsPlot)
  eqn <- as.character(as.expression( #See Recipe 5.9 in Chang, 2013
    substitute(italic(y)==a + b * italic(x) * "," ~ ~italic(r)^2 ~ "=" ~ r2,
               list(a=format(coef(m)[1], digits=3),#The intercept
                    b=format(coef(m)[2], digits=3), #The slope
                    r2=format(summary(m)$r.squared, digits=3)))
  ))

  g <- ggplot(dsPlot, aes_string(x=xName, y=yName)) +
    geom_abline(color=alpha("turquoise", alpha=.2), size=2) +
    annotate("text", label=eqn, x=Inf, y=Inf, hjust=1.1, vjust=1.5, parse=TRUE, size=6, color="gray60") +
    annotate("text", label="italic(bar(x))", x=mean(dsPlot[, xName], na.rm=T), y=Inf, hjust=.5, vjust=1.5, parse=TRUE, size=7, color="gray60") +
    annotate("text", label="italic(bar(y))", x=Inf, y=mean(dsPlot[, yName], na.rm=T), hjust=1.5, vjust=.5, parse=TRUE, size=7, color="gray60") +
    geom_vline(x=mean(dsPlot[, xName], na.rm=T), color=rgb(.3, .3, .1, .2), size=3) +
    geom_hline(y=mean(dsPlot[, yName], na.rm=T), color=rgb(.3, .3, .1, .2), size=3) +
    geom_smooth(method="lm", color="orange", fill="orange", alpha=.2, na.rm=T) +
    geom_smooth(method="loess", color="purple", fill="purple", alpha=.2,  na.rm=T) +
    geom_point(stat="identity", position = position_jitter(w=jitterAmount, h=jitterAmount), shape=1) +  
    coord_fixed(xlim=c(1-jitterAmount-.1, 5+jitterAmount+.1), ylim=c(1-jitterAmount-.1, 5+jitterAmount+.1)) +
    labs(title=mainLabel) +
    theme_bw() 
  g
}
andkov commented 8 years ago

Example B

( see Chang (2013) section 5.7 ) image

# install.packages("gcookbook")
library(gcookbook) # For the data set
library(ggplot2)
model <- lm(heightIn ~ ageYear + I(ageYear^2), heightweight)
model

# Create a data frame with ageYear column, interpolating across range
xmin <- min(heightweight$ageYear)
xmax <- max(heightweight$ageYear)
predicted <- data.frame(ageYear=seq(xmin, xmax, length.out=100))
# Calculate predicted values of heightIn
predicted$heightIn <- predict(model, predicted)
predicted

sp <- ggplot2::ggplot(heightweight, aes(x=ageYear, y=heightIn)) +
  geom_point(colour="grey40")+
  geom_line(data=predicted, size=1)
sp

# Given a model, predict values of yvar from xvar
# This supports one predictor and one predicted variable
# xrange: If NULL, determine the x range from the model object. If a vector with
# two numbers, use those as the min and max of the prediction range.
# samples: Number of samples across the x range.
# ...: Further arguments to be passed to predict()
predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {
# If xrange isn't passed in, determine xrange from the models.
# Different ways of extracting the x range, depending on model type
  if (is.null(xrange)) {
    if (any(class(model) %in% c("lm", "glm")))
    xrange <- range(model$model[[xvar]])
    else if (any(class(model) %in% "loess"))
    xrange <- range(model$x)
  }
  newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))
  names(newdata) <- xvar
  newdata[[yvar]] <- predict(model, newdata = newdata, ...)
  newdata
}

modlinear <- lm(heightIn ~ ageYear, heightweight)
modloess <- loess(heightIn ~ ageYear, heightweight)

lm_predicted <- predictvals(modlinear, "ageYear", "heightIn")
loess_predicted <- predictvals(modloess, "ageYear", "heightIn")
sp + 
  geom_line(data=lm_predicted, colour="red", size=.8) +
  geom_line(data=loess_predicted, colour="blue", size=.8)