Open Peccer opened 1 year ago
HI @Peccer ,
Thank you for reported the issue. Recent versions of xgboost may not work. Could you try older version released 1-2 years ago?
Besides, Python 3.9 may or may not work with CausalLift. The latest tested version of Python is 3.7.
Running:
print('\n[Create 2 models for treatment and untreatment and estimate CATE (Conditional Average Treatment Effects)]') train_df, test_df = cl.estimate_cate_by_2_models()
gives below error. Ran the example notebook from the github project
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /tmp/ipykernel_121/3555275851.py:5 in │
│ │
│ [Errno 2] No such file or directory: '/tmp/ipykernel_121/3555275851.py' │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/causal_lift.py:654 in │
│ estimate_cate_by_2_models │
│ │
│ 651 │ │ │ ) │
│ 652 │ │ │
│ 653 │ │ if self.runner: │
│ ❱ 654 │ │ │ self.kedro_context.run(tags=["311_fit", "312_bundle_2_models"]) │
│ 655 │ │ │ self.uplift_models_dict = self.kedro_context.catalog.load( │
│ 656 │ │ │ │ "uplift_models_dict" │
│ 657 │ │ │ ) │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:178 in run │
│ │
│ 175 │ │ │ + "only_missing: {}".format(only_missing) │
│ 176 │ │ │ + ")" │
│ 177 │ │ ) │
│ ❱ 178 │ │ return super().run( │
│ 179 │ │ │ tags=tags, runner=runner, node_names=node_names, only_missing=only_missing │
│ 180 │ │ ) │
│ 181 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:141 in run │
│ │
│ 138 │ │ self, kwargs # type: Any │
│ 139 │ ): │
│ 140 │ │ # type: (...) -> Dict[str, Any] │
│ ❱ 141 │ │ d = super().run(kwargs) │
│ 142 │ │ self.catalog.add_feed_dict(d, replace=True) │
│ 143 │ │ return d │
│ 144 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:131 in run │
│ │
│ 128 │ │ │ runner = ( │
│ 129 │ │ │ │ ParallelRunner() if runner == "ParallelRunner" else SequentialRunner() │
│ 130 │ │ │ ) │
│ ❱ 131 │ │ return super().run(runner=runner, **kwargs) │
│ 132 │
│ 133 │
│ 134 class ProjectContext2(ProjectContext1): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:106 in run │
│ │
│ 103 │ │ runner = runner or SequentialRunner() │
│ 104 │ │ if only_missing and _skippable(self.catalog): │
│ 105 │ │ │ return runner.run_only_missing(pipeline, self.catalog) │
│ ❱ 106 │ │ return runner.run(pipeline, self.catalog) │
│ 107 │
│ 108 │
│ 109 def _skippable( │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:88 in run │
│ │
│ 85 │ │ │ self._logger.info( │
│ 86 │ │ │ │ "Asynchronous mode is enabled for loading and saving data" │
│ 87 │ │ │ ) │
│ ❱ 88 │ │ self._run(pipeline, catalog, hook_manager, session_id) │
│ 89 │ │ │
│ 90 │ │ self._logger.info("Pipeline execution completed successfully.") │
│ 91 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/sequential_runner.py:70 in _run │
│ │
│ 67 │ │ │
│ 68 │ │ for exec_index, node in enumerate(nodes): │
│ 69 │ │ │ try: │
│ ❱ 70 │ │ │ │ run_node(node, catalog, hook_manager, self._is_async, session_id) │
│ 71 │ │ │ │ done_nodes.add(node) │
│ 72 │ │ │ except Exception: │
│ 73 │ │ │ │ self._suggest_resume_scenario(pipeline, done_nodes, catalog) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:304 in run_node │
│ │
│ 301 │ if is_async: │
│ 302 │ │ node = _run_node_async(node, catalog, hook_manager, session_id) │
│ 303 │ else: │
│ ❱ 304 │ │ node = _run_node_sequential(node, catalog, hook_manager, session_id) │
│ 305 │ │
│ 306 │ for name in node.confirms: │
│ 307 │ │ catalog.confirm(name) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:398 in _run_node_sequential │
│ │
│ 395 │ ) │
│ 396 │ inputs.update(additional_inputs) │
│ 397 │ │
│ ❱ 398 │ outputs = _call_node_run( │
│ 399 │ │ node, catalog, inputs, is_async, hook_manager, session_id=session_id │
│ 400 │ ) │
│ 401 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:366 in _call_node_run │
│ │
│ 363 │ │ │ is_async=is_async, │
│ 364 │ │ │ session_id=session_id, │
│ 365 │ │ ) │
│ ❱ 366 │ │ raise exc │
│ 367 │ hook_manager.hook.after_node_run( │
│ 368 │ │ node=node, │
│ 369 │ │ catalog=catalog, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:356 in _call_node_run │
│ │
│ 353 ) -> Dict[str, Any]: │
│ 354 │ # pylint: disable=too-many-arguments │
│ 355 │ try: │
│ ❱ 356 │ │ outputs = node.run(inputs) │
│ 357 │ except Exception as exc: │
│ 358 │ │ hook_manager.hook.on_node_error( │
│ 359 │ │ │ error=exc, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:353 in run │
│ │
│ 350 │ │ # purposely catch all exceptions │
│ 351 │ │ except Exception as exc: │
│ 352 │ │ │ self._logger.error("Node '%s' failed with error: \n%s", str(self), str(exc)) │
│ ❱ 353 │ │ │ raise exc │
│ 354 │ │
│ 355 │ def _run_with_no_inputs(self, inputs: Dict[str, Any]): │
│ 356 │ │ if inputs: │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:344 in run │
│ │
│ 341 │ │ │ elif isinstance(self._inputs, str): │
│ 342 │ │ │ │ outputs = self._run_with_one_input(inputs, self._inputs) │
│ 343 │ │ │ elif isinstance(self._inputs, list): │
│ ❱ 344 │ │ │ │ outputs = self._run_with_list(inputs, self._inputs) │
│ 345 │ │ │ elif isinstance(self._inputs, dict): │
│ 346 │ │ │ │ outputs = self._run_with_dict(inputs, self._inputs) │
│ 347 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:384 in _run_with_list │
│ │
│ 381 │ │ │ │ f"{sorted(inputs.keys())}." │
│ 382 │ │ │ ) │
│ 383 │ │ # Ensure the function gets the inputs in the correct order │
│ ❱ 384 │ │ return self._func((inputs[item] for item in node_inputs)) │
│ 385 │ │
│ 386 │ def _run_with_dict(self, inputs: Dict[str, Any], node_inputs: Dict[str, str]): │
│ 387 │ │ # Node inputs and provided run inputs should completely overlap │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:234 in │
│ model_for_treated_fit │
│ │
│ 231 │
│ 232 │
│ 233 def model_for_treated_fit(posargs, kwargs): │
│ ❱ 234 │ return ModelForTreated().fit(*posargs, *kwargs) │
│ 235 │
│ 236 │
│ 237 def model_for_treated_predict_proba(posargs, kwargs): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:94 in fit │
│ │
│ 91 │ │ │ else: │
│ 92 │ │ │ │ log.info("## Feature importances not available.") │
│ 93 │ │ │
│ ❱ 94 │ │ y_pred_train = model.predict(X_train) │
│ 95 │ │ │
│ 96 │ │ y_test = None │
│ 97 │ │ y_pred_test = None │
│ │
│ /shared-libs/python3.9/py/lib/python3.9/site-packages/sklearn/model_selection/_search.py:500 in │
│ predict │
│ │
│ 497 │ │ │ the best found parameters. │
│ 498 │ │ """ │
│ 499 │ │ check_is_fitted(self) │
│ ❱ 500 │ │ return self.bestestimator.predict(X) │
│ 501 │ │
│ 502 │ @available_if(_estimator_has("predict_proba")) │
│ 503 │ def predict_proba(self, X): │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1434 in predict │
│ │
│ 1431 │ │ base_margin: Optional[ArrayLike] = None, │
│ 1432 │ │ iteration_range: Optional[Tuple[int, int]] = None, │
│ 1433 │ ) -> np.ndarray: │
│ ❱ 1434 │ │ class_probs = super().predict( │
│ 1435 │ │ │ X=X, │
│ 1436 │ │ │ output_margin=output_margin, │
│ 1437 │ │ │ ntree_limit=ntree_limit, │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1049 in predict │
│ │
│ 1046 │ │ iteration_range = self._get_iteration_range(iteration_range) │
│ 1047 │ │ if self._can_use_inplace_predict(): │
│ 1048 │ │ │ try: │
│ ❱ 1049 │ │ │ │ predts = self.get_booster().inplace_predict( │
│ 1050 │ │ │ │ │ data=X, │
│ 1051 │ │ │ │ │ iteration_range=iteration_range, │
│ 1052 │ │ │ │ │ predict_type="margin" if output_margin else "value", │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:2147 in inplace_predict │
│ │
│ 2144 │ │ if isinstance(data, np.ndarray): │
│ 2145 │ │ │ from .data import _ensure_npdtype │
│ 2146 │ │ │ data, = _ensure_np_dtype(data, data.dtype) │
│ ❱ 2147 │ │ │ _check_call( │
│ 2148 │ │ │ │ _LIB.XGBoosterPredictFromDense( │
│ 2149 │ │ │ │ │ self.handle, │
│ 2150 │ │ │ │ │ _array_interface(data), │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:246 in _check_call │
│ │
│ 243 │ │ return value from API calls │
│ 244 │ """ │
│ 245 │ if ret != 0: │
│ ❱ 246 │ │ raise XGBoostError(py_str(_LIB.XGBGetLastError())) │
│ 247 │
│ 248 │
│ 249 def _has_categorical(booster: "Booster", data: DataType) -> bool: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
XGBoostError: [12:04:08] ../src/c_api/c_api_utils.h:159: Invalid missing value: null
Stack trace:
[bt] (0) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xbbec9) [0x7f5d31953ec9]
[bt] (1) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xdeb90) [0x7f5d31976b90]
[bt] (2) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xe45d8) [0x7f5d3197c5d8]
[bt] (3) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDense+0x330)
[0x7f5d3195c4d0]
[bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f5dccad38ee]
[bt] (5) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x22f) [0x7f5dccad32bf]
[bt] (6) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x13111) [0x7f5dccaf1111]
[bt] (7) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x81ed) [0x7f5dccae61ed]
[bt] (8) /usr/local/lib/libpython3.9.so.1.0(_PyObject_MakeTpCall+0x79) [0x7f5dcdd1ced9]