vincentarelbundock / pymarginaleffects

GNU General Public License v3.0
58 stars 10 forks source link

`avg_comparisons(model, hypothesis="revpairwise")` does not work in Python without the `by` argument while it works in R #138

Open artiom-matvei opened 2 weeks ago

artiom-matvei commented 2 weeks ago

R code

> library(marginaleffects)
> dat = read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/palmerpenguins/penguins.csv")
> mod = glm(body_mass_g ~ flipper_length_mm * species * bill_length_mm + island, data = dat)
> c = avg_comparisons(mod, hypothesis = "revpairwise")

R output


                                                                                 Term Estimate Std. Error      z Pr(>|z|)    S 2.5 % 97.5 %
 (flipper_length_mm, mean(+1)) - (bill_length_mm, mean(+1))                              -37.3       9.52 -3.921  < 0.001 13.5   -56  -18.7
 (island, mean(Dream) - mean(Biscoe)) - (bill_length_mm, mean(+1))                       -75.0      67.71 -1.108  0.26793  1.9  -208   57.7
 (island, mean(Torgersen) - mean(Biscoe)) - (bill_length_mm, mean(+1))                  -122.5      69.72 -1.757  0.07896  3.7  -259   14.2
 (species, mean(Chinstrap) - mean(Adelie)) - (bill_length_mm, mean(+1))                 -578.3     201.74 -2.867  0.00415  7.9  -974 -182.9
 (species, mean(Gentoo) - mean(Adelie)) - (bill_length_mm, mean(+1))                    -430.5     253.67 -1.697  0.08970  3.5  -928   66.7
 (island, mean(Dream) - mean(Biscoe)) - (flipper_length_mm, mean(+1))                    -37.7      68.00 -0.554  0.57929  0.8  -171   95.6
 (island, mean(Torgersen) - mean(Biscoe)) - (flipper_length_mm, mean(+1))                -85.2      69.97 -1.217  0.22357  2.2  -222   52.0
 (species, mean(Chinstrap) - mean(Adelie)) - (flipper_length_mm, mean(+1))              -541.0     199.72 -2.709  0.00675  7.2  -932 -149.6
 (species, mean(Gentoo) - mean(Adelie)) - (flipper_length_mm, mean(+1))                 -393.2     254.07 -1.548  0.12174  3.0  -891  104.8
 (island, mean(Torgersen) - mean(Biscoe)) - (island, mean(Dream) - mean(Biscoe))         -47.5      65.08 -0.729  0.46577  1.1  -175   80.1
 (species, mean(Chinstrap) - mean(Adelie)) - (island, mean(Dream) - mean(Biscoe))       -503.3     221.75 -2.270  0.02322  5.4  -938  -68.7
 (species, mean(Gentoo) - mean(Adelie)) - (island, mean(Dream) - mean(Biscoe))          -355.5     253.04 -1.405  0.16008  2.6  -851  140.5
 (species, mean(Chinstrap) - mean(Adelie)) - (island, mean(Torgersen) - mean(Biscoe))   -455.9     209.09 -2.180  0.02925  5.1  -866  -46.0
 (species, mean(Gentoo) - mean(Adelie)) - (island, mean(Torgersen) - mean(Biscoe))      -308.0     250.11 -1.231  0.21814  2.2  -798  182.2
 (species, mean(Gentoo) - mean(Adelie)) - (species, mean(Chinstrap) - mean(Adelie))      147.8     219.56  0.673  0.50071  1.0  -282  578.2

Type:  response 
Columns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high 

Python code

import polars as pl
import statsmodels.formula.api as smf
from marginaleffects import *
penguins = pl.read_csv(
   "https://vincentarelbundock.github.io/Rdatasets/csv/palmerpenguins/penguins.csv",
    null_values="NA",
).drop_nulls()
mod = smf.ols(
  "body_mass_g ~ flipper_length_mm * species * bill_length_mm + island",
  penguins.to_pandas(),
).fit()
p = avg_comparisons(penguins, hypothesis='revpairwise')

Python output

# truncated stacktrace
File c:\Users\IBM\Projects\Vincent A.B\pymarginaleffects\marginaleffects\sanitize_model.py:24, in sanitize_model(model)
     [21](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/sanitize_model.py:21) try:
     [22](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/sanitize_model.py:22)     import pyfixest  #  noqa
---> [24](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/sanitize_model.py:24)     return ModelPyfixest(model)
     [25](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/sanitize_model.py:25) except ImportError:
     [26](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/sanitize_model.py:26)     pass

File c:\Users\IBM\Projects\Vincent A.B\pymarginaleffects\marginaleffects\model_pyfixest.py:10, in ModelPyfixest.__init__(self, model)
      [9](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:9) def __init__(self, model):
---> [10](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:10)     super().__init__(model)
     [11](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:11)     if hasattr(self.model, "_fixef"):
     [12](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:12)         if self.model._fixef is not None:

File c:\Users\IBM\Projects\Vincent A.B\pymarginaleffects\marginaleffects\model_abstract.py:10, in ModelAbstract.__init__(self, model)
      [8](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:8) def __init__(self, model):
      [9](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:9)     self.model = model
---> [10](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:10)     self.validate_coef()
     [11](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:11)     self.validate_modeldata()
     [12](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:12)     self.validate_response_name()

File c:\Users\IBM\Projects\Vincent A.B\pymarginaleffects\marginaleffects\model_abstract.py:17, in ModelAbstract.validate_coef(self)
     [16](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:16) def validate_coef(self):
---> [17](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:17)     coef = self.get_coef()
     [18](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:18)     if not isinstance(coef, np.ndarray):
     [19](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_abstract.py:19)         raise ValueError("coef must be a numpy array")

File c:\Users\IBM\Projects\Vincent A.B\pymarginaleffects\marginaleffects\model_pyfixest.py:18, in ModelPyfixest.get_coef(self)
     [17](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:17) def get_coef(self):
---> [18](file:///C:/Users/IBM/Projects/Vincent%20A.B/pymarginaleffects/marginaleffects/model_pyfixest.py:18)     return np.array(self.model._beta_hat)

AttributeError: 'DataFrame' object has no attribute '_beta_hat'
vincentarelbundock commented 2 weeks ago

Thanks for working on this. Good issue to raise.