interpretml / DiCE

Generate Diverse Counterfactual Explanations for any machine learning model.
https://interpretml.github.io/DiCE/
MIT License
1.34k stars 185 forks source link

How to apply a timeout if counterfactual is not generated for any instance #354

Open Himanshu-1988 opened 1 year ago

Himanshu-1988 commented 1 year ago

Thanks for creating wonderful library and documentation.

I am using diceml for regression model, i have a system where in loop i am trying to use diceml for every instance as ranges are not generic , but the problem is for some instance since its not able to generate counterfactual it keeps on running.

DO we have a timeout which i can set?

Or any suggestion to generate counterfactual where ranges for feature will depend on instance value.

Himanshu-1988 commented 1 year ago

ERROR WHERE IT KEEP ON RUNNING

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/explainer_base.py:161, in ExplainerBase.generate_counterfactuals(self, query_instances, total_CFs, desired_class, desired_range, permitted_range, features_to_vary, stopping_threshold, posthoc_sparsity_param, proximity_weight, sparsity_weight, diversity_weight, categorical_penalty, posthoc_sparsity_algorithm, verbose, kwargs) 159 for query_instance in tqdm(query_instances_list): 160 self.data_interface.set_continuous_feature_indexes(query_instance) --> 161 res = self._generate_counterfactuals( 162 query_instance, total_CFs, 163 desired_class=desired_class, 164 desired_range=desired_range, 165 permitted_range=permitted_range, 166 features_to_vary=features_to_vary, 167 stopping_threshold=stopping_threshold, 168 posthoc_sparsity_param=posthoc_sparsity_param, 169 posthoc_sparsity_algorithm=posthoc_sparsity_algorithm, 170 verbose=verbose, 171 kwargs) 172 cf_examples_arr.append(res) 173 self._check_any_counterfactuals_computed(cf_examples_arr=cf_examples_arr)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:288, in DiceGenetic._generate_counterfactuals(self, query_instance, total_CFs, initialization, desired_range, desired_class, proximity_weight, sparsity_weight, diversity_weight, categorical_penalty, algorithm, features_to_vary, permitted_range, yloss_type, diversity_loss_type, feature_weights, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, maxiterations, thresh, verbose) 285 if col not in query_instance_df_dummies.columns: 286 query_instance_df_dummies[col] = 0 --> 288 self.do_param_initializations(total_CFs, initialization, desired_range, desired_class, query_instance, 289 query_instance_df_dummies, algorithm, features_to_vary, permitted_range, 290 yloss_type, diversity_loss_type, feature_weights, proximity_weight, 291 sparsity_weight, diversity_weight, categorical_penalty, verbose) 293 query_instance_df = self.find_counterfactuals(query_instance, desired_range, desired_class, features_to_vary, 294 maxiterations, thresh, verbose) 296 return exp.CounterfactualExamples(data_interface=self.data_interface, 297 test_instance_df=query_instance_df, 298 final_cfs_df=self.final_cfs_df, (...) 302 desired_class=desired_class, 303 model_type=self.model.model_type)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:194, in DiceGenetic.do_param_initializations(self, total_CFs, initialization, desired_range, desired_class, query_instance, query_instance_df_dummies, algorithm, features_to_vary, permitted_range, yloss_type, diversity_loss_type, feature_weights, proximity_weight, sparsity_weight, diversity_weight, categorical_penalty, verbose) 192 self.feature_range = self.get_valid_feature_range(normalized=False) 193 if len(self.cfs) != total_CFs: --> 194 self.do_cf_initializations( 195 total_CFs, initialization, algorithm, features_to_vary, desired_range, desired_class, 196 query_instance, query_instance_df_dummies, verbose) 197 else: 198 self.total_CFs = total_CFs

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:180, in DiceGenetic.do_cf_initializations(self, total_CFs, initialization, algorithm, features_to_vary, desired_range, desired_class, query_instance, query_instance_df_dummies, verbose) 178 indices = self.KD_tree.query(query_instance_df_dummies, num_queries)[1][0] 179 KD_tree_output = self.dataset_with_predictions.iloc[indices].copy() --> 180 self.do_KD_init(features_to_vary, query_instance, KD_tree_output, desired_class, desired_range) 182 if verbose: 183 print("Initialization complete! Generating counterfactuals...")

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:140, in DiceGenetic.do_KD_init(self, features_to_vary, query_instance, cfs, desired_class, desired_range) 137 uniques = np.unique(new_array, axis=0) 139 if len(uniques) != self.population_size: --> 140 remaining_cfs = self.do_random_init( 141 self.population_size - len(uniques), features_to_vary, query_instance, desired_class, desired_range) 142 self.cfs = np.concatenate([uniques, remaining_cfs])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:98, in DiceGenetic.do_random_init(self, num_inits, features_to_vary, query_instance, desired_class, desired_range) 96 else: 97 one_init[jx] = query_instance[jx] ---> 98 if self.is_cf_valid(self.predict_fn_scores(one_init)): 99 remaining_cfs[kx] = one_init 100 kx += 1

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/explainer_interfaces/dice_genetic.py:308, in DiceGenetic.predict_fn_scores(self, input_instance) 306 """Returns prediction scores.""" 307 input_instance = self.label_decode(input_instance) --> 308 out = self.model.get_output(input_instance) 309 if self.model.model_type == ModelTypes.Classifier and out.shape[1] == 1: 310 # DL models return only 1 for binary classification 311 out = np.hstack((1-out, out))

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dice_ml/model_interfaces/base_model.py:56, in BaseModel.get_output(self, input_instance, model_score) 54 return self.model.predict_proba(input_instance) 55 else: ---> 56 return self.model.predict(input_instance) 57 else: 58 return self.model.predict(input_instance)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/sklearn/utils/metaestimators.py:116, in _IffHasAttrDescriptor.get..(*args, kwargs) 113 attrgetter(self.delegate_names[-1])(obj) 115 # lambda, but not partial, allows help() to work with update_wrapper --> 116 out = lambda *args, *kwargs: self.fn(obj, args, kwargs) 117 # update the docstring of the returned function 118 update_wrapper(out, self.fn)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/sklearn/pipeline.py:420, in Pipeline.predict(self, X, predictparams) 418 for , name, transform in self._iter(with_final=False): 419 Xt = transform.transform(Xt) --> 420 return self.steps[-1][-1].predict(Xt, predict_params)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/xgboost/sklearn.py:651, in XGBModel.predict(self, data, output_margin, ntree_limit, validate_features, base_margin) 649 if ntree_limit is None: 650 ntree_limit = getattr(self, "best_ntree_limit", 0) --> 651 return self.get_booster().predict(test_dmatrix, 652 output_margin=output_margin, 653 ntree_limit=ntree_limit, 654 validate_features=validate_features)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/xgboost/core.py:1489, in Booster.predict(self, data, output_margin, ntree_limit, pred_leaf, pred_contribs, approx_contribs, pred_interactions, validate_features, training) 1487 length = c_bst_ulong() 1488 preds = ctypes.POINTER(ctypes.c_float)() -> 1489 _check_call(_LIB.XGBoosterPredict(self.handle, data.handle, 1490 ctypes.c_int(option_mask), 1491 ctypes.c_uint(ntree_limit), 1492 ctypes.c_int(training), 1493 ctypes.byref(length), 1494 ctypes.byref(preds))) 1495 preds = ctypes2numpy(preds, length.value, np.float32) 1496 if pred_leaf:

mitirmizi commented 1 year ago

Hi, I'm facing a similar issue where I want to stop the .generate_counterfactuals() function if it takes too long. I'm assuming it doesn't output anything anyway if it goes over 3-4 minutes. For the time being I'm using Python wrap-timeout-decorator module. But I'm still get some errors sometimes due to some locking issue, not sure (TBH) to be honest.

PMK1991 commented 1 year ago

@mitirmizi Can you show me an example? I am facing similar issues

mitirmizi commented 1 year ago

Sure, here is the directory containing data and notebook required to produce this state: https://github.com/mitirmizi/Prescriptive-Process-Analytics-using-Counterfactuals/tree/master/reproduce_dice_error

I'm using DiCE version: 0.9 and Pandas version: 1.5.0.