sammo3182 / interplot

interplot: plotting interactive effects
Other
15 stars 4 forks source link

plm support #26

Open DvP17 opened 6 years ago

DvP17 commented 6 years ago

This is really a great package! However, linear panel models from the plm package are up until now not supported, right? Is there a workaround to use the data of plm objects in interplot? Thank you for your efforts!

sammo3182 commented 6 years ago

Thank you for using our package. We aim to expend the package to fit more diverse models. However, it often takes time and also depends on whether there has been a way to simulate the output of certain models. If you already have a way to simulate the estimates of the plm outputs, free to use the customized function of interplot to visualize it. You can find more details about this in the last section of the vignette.

ruettenauer commented 5 years ago

In case you are still looking for a way to use interplot with plm, I wrote a work-around code (just adjusted some lines in the original code) some time ago. Note that I probably used an older version of interplot and I did not extensively test the functions, but for my case it seemed to work quite well and maybe it helps:

#rm(list = ls())

library(plm)
library(interplot)

#############################
### Define interplot.plot ###
#############################

## S3 method for class 'data.frame'
interplot.plot <- function(m, var1 = NULL, var2 = NULL, plot = TRUE, steps = NULL, ci = .95, hist = FALSE, var2_dt = NULL, point = FALSE, sims = 5000, xmin = NA, xmax = NA, ercolor = NA, esize = 0.5, ralpha = 0.5, rfill = "grey70", ...) {
  if(is.null(steps)) steps <- nrow(m)
  levels <- sort(unique(m$fake))
  ymin <- ymax <- vector() # to deal with the "no visible binding for global variable" issue
  xdiff <- vector() # to deal with the "no visible binding for global variable" issue

  if (hist == FALSE) {
    if (steps < 10 | point == T) {
      if (is.na(ercolor)) 
      {
        ercolor <- "black"
      }  # ensure whisker can be drawn
      coef.plot <- ggplot(m, aes_string(x = "fake", y = "coef1")) + geom_point(...) + geom_errorbar(aes_string(ymin = "lb", 
                                                                                                               ymax = "ub"), width = 0, color = ercolor, size = esize) + scale_x_continuous(breaks = levels) + 
        ylab(NULL) + xlab(NULL)
    } else {
      coef.plot <- ggplot(m, aes_string(x = "fake", y = "coef1")) + geom_line(...) + geom_ribbon(aes_string(ymin = "lb", 
                                                                                                            ymax = "ub"), alpha = ralpha, color = ercolor, fill = rfill) + ylab(NULL) + xlab(NULL)
    }
    return(coef.plot)
  } else {
    if (point == T) {
      if (is.na(ercolor)) 
      {
        ercolor <- "black"
      }  # ensure whisker can be drawn

      yrange <- c(m$ub, m$lb, var2_dt)
      maxdiff <- (max(yrange) - min(yrange))

      break_var2 <- steps + 1
      if (break_var2 >= 100) 
        break_var2 <- 100
      hist.out <- hist(var2_dt, breaks = seq(min(var2_dt), max(var2_dt), l = break_var2), plot = FALSE)

      n.hist <- length(hist.out$mids)

      if (steps <10) {dist <- (hist.out$mids[2] - hist.out$mids[1])/3
      } else {dist <- hist.out$mids[2] - hist.out$mids[1]}
      hist.max <- max(hist.out$counts)

      if (steps <10) {
        histX <- data.frame(ymin = rep(min(yrange) - maxdiff/5, n.hist),
                            ymax = hist.out$counts/hist.max * maxdiff/5 + min(yrange) - maxdiff/5, 
                            xmin = sort(unique(var2_dt)) - dist/2, 
                            xmax = sort(unique(var2_dt)) + dist/2)
      } else {
        histX <- data.frame(ymin = rep(min(yrange) - maxdiff/5, n.hist), 
                            ymax = hist.out$counts/hist.max * maxdiff/5 + min(yrange) - maxdiff/5, 
                            xmin = hist.out$mids - dist/2, 
                            xmax = hist.out$mids + dist/2)
      } 
      #when up to 10, the sort(unique(var2_dt)) - dist/2 leads to problemtic histogram

      if (steps <10) {
        histX_sub <- histX
      } else {
        histX_sub <- mutate(histX, xdiff = xmax - xmin, xmax = xmax - xdiff/2)
      }

      coef.plot <- ggplot()
      coef.plot <- coef.plot + geom_rect(data = histX, aes(xmin = xmin, xmax = xmax, ymin = ymin, 
                                                           ymax = ymax), colour = "gray50", alpha = 0, size = 0.5)  #histgram

      coef.plot <- coef.plot +
        geom_errorbar(data = m, aes_string(x = "fake", ymin = "lb", ymax = "ub"), width = 0, 
                      color = ercolor, size = esize) + scale_x_continuous(breaks = levels) + ylab(NULL) + 
        xlab(NULL) + geom_point(data = m, aes_string(x = "fake", y = "coef1")) 

    } else {

      yrange <- c(m$ub, m$lb)

      maxdiff <- (max(yrange) - min(yrange))

      break_var2 <- length(unique(var2_dt))
      if (break_var2 >= 100) 
        break_var2 <- 100
      hist.out <- hist(var2_dt, breaks = break_var2, plot = FALSE)

      n.hist <- length(hist.out$mids)
      dist <- hist.out$mids[2] - hist.out$mids[1]
      hist.max <- max(hist.out$counts)

      histX <- data.frame(ymin = rep(min(yrange) - maxdiff/5, n.hist), ymax = hist.out$counts/hist.max * 
                            maxdiff/5 + min(yrange) - maxdiff/5, xmin = hist.out$mids - dist/2, xmax = hist.out$mids + 
                            dist/2)

      # interplot.plot(m = coef, var1 = 'cyl', var2 = 'wt')

      coef.plot <- ggplot()
      coef.plot <- coef.plot + geom_rect(data = histX, aes(xmin = xmin, xmax = xmax, ymin = ymin, 
                                                           ymax = ymax), colour = "gray50", alpha = 0, size = 0.5)

      coef.plot <- coef.plot + geom_line(data = m, aes_string(x = "fake", y = "coef1")) + 
        geom_ribbon(data = m, aes_string(x = "fake", ymin = "lb", ymax = "ub"), alpha = ralpha, 
                    color = ercolor, fill = rfill) + ylab(NULL) + xlab(NULL)
    }
    return(coef.plot)
  }

} 

#################################
#### Set sims method for plm ####
#################################

sim.plm<-function(object, n.sims=100)
{
  object.class <- class(object)[[1]]
  summ <- summary (object)
  coef <- summ$coef[,1:2,drop=FALSE]
  dimnames(coef)[[2]] <- c("coef.est","coef.sd")
  # sigma.hat <- summ$sigma 
  # TR: define sigma by hand
  NN <- nrow(object$model)
  PP <- nrow(coef)
  sigma.hat <- sqrt(deviance(object) / (NN-PP))
  # TR: end              
  beta.hat <- coef[,1,drop = FALSE]
  # V.beta <- summ$vcov # TR: Use scaled vcov
  # V.beta <- summ$cov.unscaled
  V.beta <- vcov(summ)/sigma.hat^2 # TR: unscale scaled vcov
  # n <- summ$df[1] + summ$df[2]
  # k <- summ$df[1]
  n <- nrow(summ$model) # TR: define n
  k <- nrow(summ$coefficients) # TR: define k
  sigma <- rep (NA, n.sims)
  beta <- array (NA, c(n.sims,k))
  dimnames(beta) <- list (NULL, rownames(beta.hat))
  for (s in 1:n.sims){
    sigma[s] <- sigma.hat*sqrt((n-k)/rchisq(1,n-k))
    # beta[s,] <- MASS::mvrnorm (1, beta.hat, V.beta) # TR: if above: vcov (scaled) than without "*sigma[s]^2"
    beta[s,] <- MASS::mvrnorm(1, beta.hat, V.beta * sigma[s]^2)
  }

  ans <- new("sim",
             coef = beta,
             sigma = sigma)
  return (ans)
}
setMethod("sim", signature = "plm",
          definition = sim.plm)

# ### Old way ###
# 
# sim.plm<-function(object, n.sims=100)
# {
#   object.class <- class(object)[[1]]
#   summ <- summary (object)
#   coef <- summ$coef[,1:2,drop=FALSE]
#   dimnames(coef)[[2]] <- c("coef.est","coef.sd")
#   # sigma.hat <- summ$sigma 
#   # TR: define sigma by hand
#   NN <- nrow(object$model)
#   PP <- length(coef)
#   sigma.hat <- sqrt(deviance(object) / (NN-PP))
#   # TR: end              
#   beta.hat <- coef[,1,drop = FALSE]
#   V.beta <- summ$vcov
#   # V.beta <- summ$cov.unscaled
#   n <- nrow(summ$model)
#   k <- nrow(summ$coefficients)
#   # n <- summ$df[1] + summ$df[2]
#   # k <- summ$df[1]
#   sigma <- rep (NA, n.sims)
#   beta <- array (NA, c(n.sims,k))
#   dimnames(beta) <- list (NULL, rownames(beta.hat))
#   for (s in 1:n.sims){
#     sigma[s] <- sigma.hat*sqrt((n-k)/rchisq(1,n-k))
#     beta[s,] <- MASS::mvrnorm (1, beta.hat, V.beta) # oben vcov (scaled) hier "*sigma[s]^2" raus
#     # beta[s,] <- MASS::mvrnorm(1, beta.hat, V.beta * sigma[s]^2)
#   }
#   
#   ans <- new("sim",
#              coef = beta,
#              sigma = sigma)
#   return (ans)
# }
# setMethod("sim", signature = "plm",
#           definition = sim.plm)

#####################
### Interplot plm ###
#####################

interplot.plm <- function(m, var1, var2, plot = TRUE, steps = NULL, 
                              ci = .95, hist = FALSE, var2_dt = NA, point = FALSE, sims = 5000, xmin = NA, 
                              xmax = NA, ercolor = NA, esize = 0.5, ralpha = 0.5, rfill = "grey70", 
                              ...) {
  set.seed(324)

  m.class <- class(m)
  m.sims <- arm::sim(m, sims)

  ### For factor base terms###
  factor_v1 <- factor_v2 <- FALSE

  if (is.factor(eval(parse(text = paste0("m$model$", var1)))) & is.factor(eval(parse(text = paste0("m$model$", 
                                                                                                   var2))))) 
    stop("The function does not support interactions between two factors.")

  if (is.factor(eval(parse(text = paste0("m$model$", var1))))) {
    var1_bk <- var1
    var1 <- paste0(var1, eval(parse(text = paste0("m$xlevel$", var1))))
    factor_v1 <- TRUE
    ifelse(var1 == var2, var12 <- paste0("I(", var1, "^2)"), var12 <- paste0(var2, 
                                                                             ":", var1)[-1])

    # the first category is censored to avoid multicolinarity
    for (i in seq(var12)) {
      if (!var12[i] %in% names(m$coef)) 
        var12[i] <- paste0(var1, ":", var2)[-1][i]
      if (!var12[i] %in% names(m$coef)) 
        stop(paste("Model does not include the interaction of", 
                   var1, "and", var2, "."))
    }

  } else if (is.factor(eval(parse(text = paste0("m$model$", var2))))) {
    var2_bk <- var2
    var2 <- paste0(var2, eval(parse(text = paste0("m$xlevel$", var2))))
    factor_v2 <- TRUE
    ifelse(var1 == var2, var12 <- paste0("I(", var1, "^2)"), var12 <- paste0(var2, 
                                                                             ":", var1)[-1])

    # the first category is censored to avoid multicolinarity
    for (i in seq(var12)) {
      if (!var12[i] %in% names(m$coef)) 
        var12[i] <- paste0(var1, ":", var2)[-1][i]
      if (!var12[i] %in% names(m$coef)) 
        stop(paste("Model does not include the interaction of", 
                   var1, "and", var2, "."))
    }

  } else {
    ifelse(var1 == var2, var12 <- paste0("I(", var1, "^2)"), var12 <- paste0(var2, 
                                                                             ":", var1))

    # the first category is censored to avoid multicolinarity
    for (i in seq(var12)) {
      if (!var12[i] %in% names(m$coef)) 
        var12[i] <- paste0(var1, ":", var2)[i]
      if (!var12[i] %in% names(m$coef)) 
        stop(paste("Model does not include the interaction of", 
                   var1, "and", var2, "."))
    }
  }

  ################### 

  if (factor_v2) {
    xmin <- 0
    xmax <- 1
    steps <- 2
  } else {
    if (is.na(xmin)) 
      #xmin <- min(m$model[var2], na.rm = T) 
      xmin <- min(m$model[, which(names(m$model)==var2)], na.rm = T) # TR
    if (is.na(xmax)) 
      #xmax <- max(m$model[var2], na.rm = T)
      xmax <- max(m$model[, which(names(m$model)==var2)], na.rm = T) # TR

    if (is.null(steps)) {
      steps <- eval(parse(text = paste0("length(unique(na.omit(m$model$", 
                                        var2, ")))")))
    }

    if (steps > 100) 
      steps <- 100  # avoid redundant calculation
  }

  coef <- data.frame(fake = seq(xmin, xmax, length.out = steps), coef1 = NA, 
                     ub = NA, lb = NA)
  coef_df <- data.frame(fake = numeric(0), coef1 = numeric(0), ub = numeric(0), 
                        lb = numeric(0), model = character(0))

  if (factor_v1) {
    for (j in 1:(length(eval(parse(text = paste0("m$xlevel$", var1_bk)))) - 
                 1)) {
      # only n - 1 interactions; one category is avoided against
      # multicolinarity

      for (i in 1:steps) {
        coef$coef1[i] <- mean(m.sims@coef[, match(var1[j + 1], 
                                                  names(m$coef))] + coef$fake[i] * m.sims@coef[, match(var12[j], 
                                                                                                       names(m$coef))])
        coef$ub[i] <- quantile(m.sims@coef[, match(var1[j + 1], 
                                                   names(m$coef))] + coef$fake[i] * m.sims@coef[, match(var12[j], 
                                                                                                        names(m$coef))], 1 - (1 - ci) / 2)
        coef$lb[i] <- quantile(m.sims@coef[, match(var1[j + 1], 
                                                   names(m$coef))] + coef$fake[i] * m.sims@coef[, match(var12[j], 
                                                                                                        names(m$coef))], (1 - ci) / 2)
      }

      if (plot == TRUE) {
        coef$value <- var1[j + 1]
        coef_df <- rbind(coef_df, coef)
        if (hist == TRUE) {
          if (is.na(var2_dt)) {
            var2_dt <- eval(parse(text = paste0("m$model$", var2)))
          } else {
            var2_dt <- var2_dt
          }
        }
      } else {
        names(coef) <- c(var2, "coef", "ub", "lb")
        return(coef)
      }
    }
    coef_df$value <- as.factor(coef_df$value)
    interplot.plot(m = coef_df, hist = hist, var2_dt = var2_dt, steps = steps, 
                   point = point, ercolor = ercolor, esize = esize, ralpha = ralpha, 
                   rfill = rfill, ...) + facet_grid(. ~ value)

  } else if (factor_v2) {
    for (j in 1:(length(eval(parse(text = paste0("m$xlevel$", var2_bk)))) - 
                 1)) {
      # only n - 1 interactions; one category is avoided against
      # multicolinarity

      for (i in 1:steps) {
        coef$coef1[i] <- mean(m.sims@coef[, match(var1, names(m$coef))] + 
                                coef$fake[i] * m.sims@coef[, match(var12[j], names(m$coef))])
        coef$ub[i] <- quantile(m.sims@coef[, match(var1, names(m$coef))] + 
                                 coef$fake[i] * m.sims@coef[, match(var12[j], names(m$coef))], 
                               1 - (1 - ci) / 2)
        coef$lb[i] <- quantile(m.sims@coef[, match(var1, names(m$coef))] + 
                                 coef$fake[i] * m.sims@coef[, match(var12[j], names(m$coef))], 
                               (1 - ci) / 2)
      }

      if (plot == TRUE) {
        coef$value <- var2[j + 1]
        coef_df <- rbind(coef_df, coef)
        if (hist == TRUE) {
          if (is.na(var2_dt)) {
            var2_dt <- eval(parse(text = paste0("m$model$", var2)))
          } else {
            var2_dt <- var2_dt
          }
        }
      } else {
        names(coef) <- c(var2, "coef", "ub", "lb")
        return(coef)
      }
    }
    coef_df$value <- as.factor(coef_df$value)
    interplot.plot(m = coef_df, hist = hist, steps = steps, var2_dt = var2_dt, 
                   point = point, ercolor = ercolor, esize = esize, ralpha = ralpha, 
                   rfill = rfill, ...) + facet_grid(. ~ value)

  } else {
    ## Correct marginal effect for quadratic terms
    multiplier <- if (var1 == var2) 
      2 else 1

    for (i in 1:steps) {
      coef$coef1[i] <- mean(m.sims@coef[, match(var1, names(m$coef))] + 
                              multiplier * coef$fake[i] * m.sims@coef[, match(var12, 
                                                                              names(m$coef))])
      coef$ub[i] <- quantile(m.sims@coef[, match(var1, names(m$coef))] + 
                               multiplier * coef$fake[i] * m.sims@coef[, match(var12, 
                                                                               names(m$coef))], 1 - (1 - ci) / 2)
      coef$lb[i] <- quantile(m.sims@coef[, match(var1, names(m$coef))] + 
                               multiplier * coef$fake[i] * m.sims@coef[, match(var12, 
                                                                               names(m$coef))], (1 - ci) / 2)
    }

    if (plot == TRUE) {
      if (hist == TRUE) {
        if (is.na(var2_dt)) {
          var2_dt <- eval(parse(text = paste0("m$model$", var2)))
        } else {
          var2_dt <- var2_dt
        }
      }
      interplot.plot(m = coef, steps = steps, hist = hist, var2_dt = var2_dt, 
                     point = point, ercolor = ercolor, esize = esize, ralpha = ralpha, 
                     rfill = rfill, ...)
    } else {
      names(coef) <- c(var2, "coef", "ub", "lb")
      return(coef)
    }

  }

}

# ###############
# ### Example ###
# ###############
# 
# 
# data(Cigar)
# 
# 
# 
# ### models ###
# 
# plm.mod<-plm(sales~ price + pop + pimin + price*pimin,
#              model="within", effect="individual",
#              index=c("state", "year"), data=Cigar)
# summary(plm.mod)
# 
# 
# lm.mod<-lm(sales~ price + pop + pimin + price*pimin,
#            data=Cigar)
# summary(lm.mod)
# 
# 
# 
# ### Interplot lm ###
# 
# lm.intpl<-interplot(lm.mod, "price", "pimin")
# 
# 
# lm.intpl
# 
# 
# ### Interplot plm ###
# 
# plm.intpl<-interplot.plm(plm.mod, "price", "pimin", plot=T)
# 
# plm.intpl
sammo3182 commented 5 years ago

I'll look into the codes. Thank you so much for the amazing work, Tobias @ruettenauer ! Hope the next release of interplot could be compatible with plm with your help.