pachadotdev / gravity

R package that provides estimation methods for Gravity Models
https://pacha.dev/gravity/
Apache License 2.0
34 stars 14 forks source link

summary not reporting the ppml robust coefficient t-values #20

Open marcoaurelioguerrap opened 2 years ago

marcoaurelioguerrap commented 2 years ago

Hi, I'm using the ppml. When I use the ppml with the robust estimation, I get the correct values if I print the values below. But if I use the summary, I get the non-robust standard deviations. Correction one:

> ppmlModel

Call:  y_ppml ~ dist_log + contiguity + common_language + colony_of_origin_ever + 
    colony_of_destination_ever + agree_eia + member_eu_joint + 
    member_wto_joint + exporter_iso3 + importer_iso3

Coefficients:
                            Estimate     Std. Error   z value      Pr(>|z|)   
(Intercept)                   4.079e+00    1.234e+00    3.304e+00    9.527e-04
dist_log                     -1.851e+00    9.825e-02   -1.884e+01    3.302e-79
contiguity                   -1.143e+00    1.745e-01   -6.550e+00    5.736e-11

Same model with wrong t-values

> summary(ppmlModel)

Call:
y_ppml ~ dist_log + contiguity + common_language + colony_of_origin_ever + 
    colony_of_destination_ever + agree_eia + member_eu_joint + 
    member_wto_joint + exporter_iso3 + importer_iso3

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-461.65    -7.32    -0.80     3.47   435.70  

Coefficients:
         Estimate Std. Error t value Pr(>|t|)    
  [1,]  4.079e+00  2.592e+03   0.002  0.99874    
  [2,] -1.851e+00  4.957e-01  -3.735  0.00019 ***
  [3,] -1.143e+00  1.336e+00  -0.856  0.39224    

Anyways, appreciate your effort with this package. I'll try to see if I can figure it out. I'll comment here on any advance I have.

pachadotdev commented 2 years ago

Hi Thanks a lot for reporting this. I'll edit the summary class in the internals and I'll report back.

pachadotdev commented 2 years ago

@marcoaurelioguerrap hi, I'm working on a fix

at the moment I have this

#' @export
#' @noRd
summary.gravity <- function(object, dispersion = NULL,
                         correlation = FALSE, symbolic.cor = FALSE, ...) {
  est.disp <- FALSE
  df.r <- object$df.residual
  if (is.null(dispersion)) { # calculate dispersion if needed
    dispersion <-
      if (object$family$family %in% c("poisson", "binomial")) {
        1
      } else if (df.r > 0) {
        est.disp <- TRUE
        if (any(object$weights == 0)) {
          warning("observations with zero weight not used for calculating dispersion")
        }
        sum((object$weights * object$residuals^2)[object$weights > 0]) / df.r
      } else {
        est.disp <- TRUE
        NaN
      }
  }

  ## calculate scaled and unscaled covariance matrix

  aliased <- is.na(coef(object)) # used in print method
  p <- object$rank
  if (p > 0) {
    p1 <- 1L:p
    Qr <- qr(object)
    ## WATCHIT! doesn't this rely on pivoting not permuting 1L:p? -- that's quaranteed
    coef.p <- object$coefficients[Qr$pivot[p1]]
    covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
    dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
    covmat <- dispersion * covmat.unscaled
    var.cf <- diag(covmat)

    ## calculate coef table

    s.err <- if (isTRUE(object$robust)) {
      object$coefficients[,2]
    } else {
      sqrt(var.cf)
    }

    # robust -> z value
    # not robust -> t value
    tzvalue <- if (isTRUE(object$robust)) {
      object$coefficients[,3]
    } else {
      coef.p / s.err
    }

    dn <- c("Estimate", "Std. Error")
    if (!est.disp) { # known dispersion
      pvalue <- if (isTRUE(object$robust)) {
        object$coefficients[,4]
      } else {
        2 * pnorm(-abs(tzvalue))
      }
      coef.table <- cbind(coef.p, s.err, tzvalue, pvalue)
      dimnames(coef.table) <- list(
        names(coef.p),
        ifelse(isTRUE(object$robust), c(dn, "z value", "Pr(>|z|)"), c(dn, "t value", "Pr(>|t|)"))
      )
    } else if (df.r > 0) {
      pvalue <- 2 * pt(-abs(tzvalue), df.r)
      coef.table <- cbind(coef.p, s.err, tzvalue, pvalue)
      dimnames(coef.table) <- list(
        names(coef.p),
        c(dn, "t value", "Pr(>|t|)")
      )
    } else { # df.r == 0
      coef.table <- cbind(coef.p, NaN, NaN, NaN)
      dimnames(coef.table) <- list(
        names(coef.p),
        c(dn, "t value", "Pr(>|t|)")
      )
    }
    df.f <- NCOL(Qr$qr)
  } else {
    coef.table <- matrix(, 0L, 4L)
    dimnames(coef.table) <- list(
      NULL,
      c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
    )
    covmat.unscaled <- covmat <- matrix(, 0L, 0L)
    df.f <- length(aliased)
  }
  ## return answer

  ## these need not all exist, e.g. na.action.
  keep <- match(c(
    "call", "terms", "family", "deviance", "aic",
    "contrasts", "df.residual", "null.deviance", "df.null",
    "iter", "na.action"
  ), names(object), 0L)
  ans <- c(
    object[keep],
    list(
      deviance.resid = residuals(object, type = "deviance"),
      coefficients = coef.table,
      aliased = aliased,
      dispersion = dispersion,
      df = c(object$rank, df.r, df.f),
      cov.unscaled = covmat.unscaled,
      cov.scaled = covmat
    )
  )

  if (correlation && p > 0) {
    dd <- sqrt(diag(covmat.unscaled))
    ans$correlation <-
      covmat.unscaled / outer(dd, dd)
    ans$symbolic.cor <- symbolic.cor
  }
  class(ans) <- c("summary.eglm", "summary.glm")
  return(ans)
}

which leads to

fit4 <- ppml(
  dependent_variable = "flow",
  distance = "distw",
  additional_regressors = c("rta", "comcur", "contig"),
  data = gravity_no_zeros,
  robust = TRUE
)

> summary.gravity(fit4)

Call:
y_ppml ~ dist_log + rta + comcur + contig

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-217.03   -28.10   -27.59   -23.42  1857.77  

Coefficients:
     Estimate Std. Error t value Pr(>|t|)    
[1,]  5.76232    1.16352   4.952 7.40e-07 ***
[2,]  0.02428    0.13123   0.185    0.853    
[3,]  1.26582    0.22802   5.551 2.88e-08 ***
[4,]  1.10604    0.18841   5.870 4.43e-09 ***
[5,]  1.75821    0.32298   5.444 5.29e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 42366.31)

    Null deviance: 78081855  on 17087  degrees of freedom
Residual deviance: 61989121  on 17083  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 8

> fit4

Call:  y_ppml ~ dist_log + rta + comcur + contig

Coefficients:
             Estimate   Std. Error  z value    Pr(>|z|) 
(Intercept)  5.762e+00  1.164e+00   4.952e+00  7.327e-07
dist_log     2.428e-02  1.312e-01   1.850e-01  8.532e-01
rta          1.266e+00  2.280e-01   5.551e+00  2.836e-08
comcur       1.106e+00  1.884e-01   5.870e+00  4.351e-09
contig       1.758e+00  3.230e-01   5.444e+00  5.216e-08

Degrees of Freedom: 17087 Total (i.e. Null);  17083 Residual
Null Deviance:      78080000 
Residual Deviance: 61990000     AIC: NA