py-econometrics / pyfixest

Fast High-Dimensional Fixed Effects Regression in Python following fixest-syntax
https://py-econometrics.github.io/pyfixest/pyfixest.html
MIT License
119 stars 27 forks source link

Vcov #529

Open s3alfisc opened 4 days ago

s3alfisc commented 4 days ago
  1. Updates small sample adjustments to use (N-1) / (N-K) for heteroskedastic errors when adj = True, 1 if adj=False, which is in line with the fixest docs.
  2. Reorganizes unit tests to skip non-required tests in test_vs_fixest.py.
  3. Adds dedicated tests for exact equality for vcov matrices in test_vcov.py and also in test_vs_fixest.py.

Note that this is labelled a "breaking change" as inference results will slightly deviate. But for medium N and small k, differences will be negligible.

Example:

%load_ext autoreload
%autoreload 2

import numpy as np
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

import pyfixest as pf
from pyfixest.utils.utils import ssc
import pandas as pd

pandas2ri.activate()

fixest = importr("fixest")
stats = importr("stats")

data = pf.get_data(N = 100, model = "Feols")

cluster_adj = False
fml = "Y ~ X1 + X2 | f1"
vcov = "hetero"

py_mod = pf.feols(
    fml, data=data, vcov=vcov, ssc=ssc(adj=adj, cluster_adj=cluster_adj)
)
r_mod = fixest.feols(
    ro.Formula(fml),
    data=data,
    vcov=vcov,
    ssc=fixest.ssc(adj, "none", cluster_adj, "min", "min", False),
)

py_mod_vcov = py_mod._vcov
r_mod_vcov = stats.vcov(r_mod)

py_mod_vcov - r_mod_vcov
# array([[6.93889390e-18, 1.08420217e-19],
#        [3.25260652e-19, 0.00000000e+00]])

adj = False
cluster_adj = False
fml = "Y ~ X1 + X2 | f1"
vcov = "hetero"

py_mod = pf.feols(
    fml, data=data, vcov=vcov, ssc=ssc(adj=adj, cluster_adj=cluster_adj)
)
r_mod = fixest.feols(
    ro.Formula(fml),
    data=data,
    vcov=vcov,
    ssc=fixest.ssc(adj, "none", cluster_adj, "min", "min", False),
)

py_mod_vcov = py_mod._vcov
r_mod_vcov = stats.vcov(r_mod)

py_mod_vcov - r_mod_vcov
# array([[6.93889390e-18, 1.08420217e-19],
#        [3.25260652e-19, 0.00000000e+00]])