microsoft / FLAML

A fast library for AutoML and tuning. Join our Discord: https://discord.gg/Cppx2vSPVP.
https://microsoft.github.io/FLAML/
MIT License
3.91k stars 508 forks source link

Using pandas validation data gives an error #46

Closed nhirschey closed 3 years ago

nhirschey commented 3 years ago

If I leave out X_val and y_val, automl works fine. But if I specify these values, it crashes with the following error:

----> 7 automl.fit(X_train= xtrain,y_train=ytrain,X_val=xvalid,y_val=yvalid,**automl_settings)

~\anaconda3\lib\site-packages\flaml\automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, X_val, y_val, sample_weight_val, retrain_full, split_type, learner_selector, hpo_method, **fit_kwargs)
    832         self._state.fit_kwargs = fit_kwargs
    833         self._state.weight_val = sample_weight_val
--> 834         self._validate_data(X_train, y_train, dataframe, label, X_val, y_val)
    835         self._search_states = {}  #key: estimator name; value: SearchState
    836         self._random = np.random.RandomState(RANDOM_SEED)

~\anaconda3\lib\site-packages\flaml\automl.py in _validate_data(self, X_train_all, y_train_all, dataframe, label, X_val, y_val)
    434             "# rows in X_val must match length of y_val.")
    435             if self._transformer:
--> 436                 self._state.X_val = self._transformer.transform(X_val)
    437             else:
    438                 self._state.X_val = X_val

~\anaconda3\lib\site-packages\flaml\data.py in transform(self, X)
    251                 X[cat_columns] = X[cat_columns].astype('category')
    252             if num_columns:
--> 253                 X[num_columns].fillna(np.nan, inplace=True)
    254                 X[num_columns] = self.transformer.transform(X)
    255         return X

~\anaconda3\lib\site-packages\pandas\core\frame.py in fillna(self, value, method, axis, inplace, limit, downcast)
   4315         downcast=None,
   4316     ) -> Optional["DataFrame"]:
-> 4317         return super().fillna(
   4318             value=value,
   4319             method=method,

~\anaconda3\lib\site-packages\pandas\core\generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
   6086         result = self._constructor(new_data)
   6087         if inplace:
-> 6088             return self._update_inplace(result)
   6089         else:
   6090             return result.__finalize__(self, method="fillna")

~\anaconda3\lib\site-packages\pandas\core\generic.py in _update_inplace(self, result, verify_is_copy)
   3962         self._clear_item_cache()
   3963         self._mgr = result._mgr
-> 3964         self._maybe_update_cacher(verify_is_copy=verify_is_copy)
   3965 
   3966     def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries:

~\anaconda3\lib\site-packages\pandas\core\generic.py in _maybe_update_cacher(self, clear, verify_is_copy)
   3243 
   3244         if verify_is_copy:
-> 3245             self._check_setitem_copy(stacklevel=5, t="referant")
   3246 
   3247         if clear:

~\anaconda3\lib\site-packages\pandas\core\generic.py in _check_setitem_copy(self, stacklevel, t, force)
   3679 
   3680         if value == "raise":
-> 3681             raise com.SettingWithCopyError(t)
   3682         elif value == "warn":
   3683             warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)

SettingWithCopyError: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
sonichi commented 3 years ago

https://github.com/microsoft/FLAML/pull/45/commits/2bfc9cdd3c6d58925b98872878432a406b3e8a43