Closed juandavidgutier closed 11 months ago
It looks like perhaps the method name is incorrect; instead of 'iv.econml.dml.DMLIV'
, try 'econml.iv.dml.dmliv'
.
@kbattocchi I changed the line to: method_name="econml.iv.dml.dmliv", but I get this error message
ImportError: iv is not an existing causal estimator.
Sorry, looks like you need to keep the estimation method ('iv') at the front even though it is also in the class's path. Try 'iv.econml.iv.dml.DMLIV'
.
@kbattocchi Thanks a lot for the suggestion. However, I have a couple of errors when I want to refute the estimation, particularly adding a placebo I get this error: "UnboundLocalError: local variable 'refuter_class' referenced before assignment" and with an unobserved common cause, the first time that I run the line I get this error message: "ValueError: n_splits=5 cannot be greater than the number of members in each class" and the next times running the same line I get this error: "MemoryError: Unable to allocate 26.8 GiB for an array with shape (60024, 60023) and data type float64"
This is the data Data_Nino.csv
This is my code
`
import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az import lightgbm as lgb import scipy.stats as stats
data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()
data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)
Colombia_nino = data_leish_nino
Colombia_nino.darwin = stats.zscore(Colombia_nino.darwin)
Colombia_nino.wpac850 = stats.zscore(Colombia_nino.wpac850)
Colombia_nino.Forest = stats.zscore(Colombia_nino.Forest)
model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables=['darwin', 'wpac850'], effect_modifiers='Forest', graph="digraph {darwin->Treatment;wpac850->Treatment;Treatment->incidence100k;Forest->incidence100k;}")
model_leish.view_model()
identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.iv.dml.DMLIV", method_params={ 'init_params': {'model_y_xw': lgb.LGBMRegressor(), 'model_t_xw': lgb.LGBMClassifier(), 'model_t_xwz': lgb.LGBMClassifier(), 'featurizer': PolynomialFeatures(degree=3, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE
#confidence_intervals=True,
method_name="iv.econml.iv.dml.DMLIV",
method_params={
'init_params': {'model_y_xw': lgb.LGBMRegressor(),
'model_t_xw': lgb.LGBMClassifier(),
'model_t_xwz': lgb.LGBMClassifier(),
'featurizer': PolynomialFeatures(degree=3, include_bias=False),
'model_final': LassoCV(fit_intercept=False),
'discrete_treatment': True,
'cv': 5,
'random_state': 123},
'fit_params': {'inference': BootstrapInference(n_bootstrap_samples=25, n_jobs=-1),
}
})
print(dml_estimate_nino)
ate_Colombia_nino = dml_estimate_nino.value print(ate_Colombia_nino)
p_value_nino = dml_estimate_nino.test_stat_significance(method="bootstrap") print(p_value_nino)
ci_Colombia_boost_nino = dml_estimate_nino.get_confidence_intervals(method="bootstrap", confidence_level=0.95, num_simulations=10, sample_size_fraction=0.7) print(ci_Colombia_boost_nino)
cate_Colombia_nino = dml_estimate_nino.cate_estimates Q1Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q1' mean_Q1Forest_Colombia_nino = Colombia_nino.loc[Q1Forest_Colombia, 'CATE'].mean() Q2Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q2' mean_Q2Forest_Colombia_nino = Colombia_nino.loc[Q2Forest_Colombia, 'CATE'].mean() Q3Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q3' mean_Q3Forest_Colombia_nino = Colombia_nino.loc[Q3Forest_Colombia, 'CATE'].mean() Q4Forest_Colombia = Colombia_nino['Quartile.Forest'] == 'Q4' mean_Q4Forest_Colombia_nino = Colombia_nino.loc[Q4Forest_Colombia, 'CATE'].mean() print(mean_Q1Forest_Colombia_nino, mean_Q2Forest_Colombia_nino, mean_Q3Forest_Colombia_nino, mean_Q4Forest_Colombia_nino)
nino_unobserved_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, method_name="add_unobserved_common_cause", confounders_effect_on_treatment="linear", confounders_effect_on_outcome="linear", effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5, random_state=123) print(nino_unobserved_dml)
nino_placebo_dml = model_leish.refute_estimate(identified_estimand_nino, dml_estimate_nino, random_state=123, #placebo_type="permute", #method_name="placebo_treatment_refuter", num_simulations=10, placebo_type="permute") print(nino_placebo_dml) `
Hi all, I am trying to run a model with DMLIV, of a binary treatment with CATE estimation. However, I have a problem calling the method_name. Additionally, there is any demo for DMLIV to I understand the method?
Here is my dataset: Data_Nino.csv
and here is my code:
`
importing required libraries
import os, warnings, random import dowhy import econml from dowhy import CausalModel import pandas as pd import numpy as np import econml from econml.iv.dml import NonParamDMLIV from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor from econml.inference import BootstrapInference import numpy as np, scipy.stats as st import arviz as az
El Nino vs Neutral
data_nino = pd.read_csv("Data_Nino.csv", encoding='latin-1') data_nino = data_nino.dropna()
data_leish_nino = data_nino.drop(['Code.DANE.period'], axis=1) data_leish_nino.head() data_leish_nino = data_leish_nino.astype({"Treatment":'bool'}, copy=False)
colombia
Colombia_nino = data_leish_nino
Step 1: Modeling the causal mechanism
model_leish=CausalModel( data = Colombia_nino, treatment='Treatment', outcome='incidence100k', intrumental_variables='darwin', effect_modifiers='Forest', graph="digraph {darwin->Treatment;Treatment->incidence100k;Forest->incidence100k;}")
view model
model_leish.view_model()
Step 2: Identifying effects
identified_estimand_nino = model_leish.identify_effect(proceed_when_unidentifiable=True) print(identified_estimand_nino)
Step 3: Estimation of the effect
with DML
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = lambda Colombia_nino: Colombia_nino["Forest"]>1, # condition used for CATE
HERE I HAVE THE ERROR WITH THE METHOD NAME
print(dml_estimate_nino)
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, control_value=0, treatment_value=1, target_units = 1, # condition used for CATE method_name="iv.econml.dml.DMLIV", method_params={ 'init_params': {'model_y_xw':GradientBoostingRegressor(), 'model_t_xw': GradientBoostingRegressor(), 'model_t_xwz': GradientBoostingRegressor(), 'featurizer': PolynomialFeatures(degree=1, include_bias=False), 'model_final': LassoCV(fit_intercept=False), 'discrete_treatment': True, 'cv': 5, 'random_state': 123}, 'fit_params': {} }) print(dml_estimate_nino)#los ITE estan en estimator/outcome
dml_estimate_nino = model_leish.estimate_effect(identified_estimand_nino, target_units = "ate", # condition used for CATE
test_significance=True,
print(dml_estimate_nino) `