kearnz / autoimpute

Python package for Imputation Methods
MIT License
242 stars 19 forks source link

ValueError: Found array with 0 sample(s) (shape=(0, 88)) while a minimum of 1 is required. #25

Closed peiyaoli closed 5 years ago

peiyaoli commented 5 years ago

Hi, I tried use this module to impute but not work. The train data data and test data has value, not it says not data error.

ValueError Traceback (most recent call last)

in 1 # proceed the same as the previous examples ----> 2 complex_lm.fit(X_train, y_train).predict(X_test) 3 complex_lm.summary() /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 59 err = f"Neither {d_err} nor {a_err} are of type pd.DataFrame" 60 raise TypeError(err) ---> 61 return func(d, *args, **kwargs) 62 return wrapper 63 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 124 125 # return func if no missingness violations detected, then return wrap --> 126 return func(d, *args, **kwargs) 127 return wrapper 128 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 171 err = f"All values missing in column(s) {nc}. Should be removed." 172 raise ValueError(err) --> 173 return func(d, *args, **kwargs) 174 return wrapper 175 /anaconda3/lib/python3.7/site-packages/autoimpute/analysis/linear_regressor.py in fit(self, X, y) 88 # then fit the analysis models on each of the imputed datasets 89 self.models_ = self._apply_models_to_mi_data( ---> 90 self.linear_models, X, y 91 ) 92 /anaconda3/lib/python3.7/site-packages/autoimpute/analysis/base_regressor.py in _apply_models_to_mi_data(self, model_dict, X, y) 248 model_type = model_dict["type"] 249 regressor = model_dict[self.model_lib] --> 250 mi_data = self._fit_strategy_validator(X, y) 251 models = {} 252 /anaconda3/lib/python3.7/site-packages/autoimpute/analysis/base_regressor.py in _fit_strategy_validator(self, X, y) 205 206 # return the multiply imputed datasets --> 207 return self.mi.fit_transform(X) 208 209 def _fit_model(self, model_type, regressor, X, y): /anaconda3/lib/python3.7/site-packages/autoimpute/imputations/dataframe/multiple_imputer.py in fit_transform(self, X, y) 229 def fit_transform(self, X, y=None): 230 """Convenience method to fit then transform the same dataset.""" --> 231 return self.fit(X, y).transform(X) /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 59 err = f"Neither {d_err} nor {a_err} are of type pd.DataFrame" 60 raise TypeError(err) ---> 61 return func(d, *args, **kwargs) 62 return wrapper 63 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 124 125 # return func if no missingness violations detected, then return wrap --> 126 return func(d, *args, **kwargs) 127 return wrapper 128 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 171 err = f"All values missing in column(s) {nc}. Should be removed." 172 raise ValueError(err) --> 173 return func(d, *args, **kwargs) 174 return wrapper 175 /anaconda3/lib/python3.7/site-packages/autoimpute/imputations/dataframe/multiple_imputer.py in fit(self, X, y) 186 visit=self.visit 187 ) --> 188 imputer.fit(X) 189 self.statistics_[i] = imputer 190 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 59 err = f"Neither {d_err} nor {a_err} are of type pd.DataFrame" 60 raise TypeError(err) ---> 61 return func(d, *args, **kwargs) 62 return wrapper 63 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 124 125 # return func if no missingness violations detected, then return wrap --> 126 return func(d, *args, **kwargs) 127 return wrapper 128 /anaconda3/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 171 err = f"All values missing in column(s) {nc}. Should be removed." 172 raise ValueError(err) --> 173 return func(d, *args, **kwargs) 174 return wrapper 175 /anaconda3/lib/python3.7/site-packages/autoimpute/imputations/dataframe/single_imputer.py in fit(self, X, y) 182 x_ = _one_hot_encode(x_) 183 --> 184 imputer.fit(x_, y_) 185 186 # finally, store imputer for each column as statistics /anaconda3/lib/python3.7/site-packages/autoimpute/imputations/series/pmm.py in fit(self, X, y) 100 101 # get predictions for the data, which will be used for "closest" vals --> 102 y_pred = self.lm.fit(X, y).predict(X) 103 y_df = DataFrame({"y": y, "y_pred": y_pred}) 104 /anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py in fit(self, X, y, sample_weight) 461 n_jobs_ = self.n_jobs 462 X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], --> 463 y_numeric=True, multi_output=True) 464 465 if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: /anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator) 717 ensure_min_features=ensure_min_features, 718 warn_on_dtype=warn_on_dtype, --> 719 estimator=estimator) 720 if multi_output: 721 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False, /anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator) 548 " minimum of %d is required%s." 549 % (n_samples, array.shape, ensure_min_samples, --> 550 context)) 551 552 if ensure_min_features > 0 and array.ndim == 2: ValueError: Found array with 0 sample(s) (shape=(0, 88)) while a minimum of 1 is required.
kearnz commented 5 years ago

Hi - is your input data a pandas DataFrame? If so, are there any columns that are fully missing? Right now the package supports pandas Dataframes only (as it needs to detect column names under the hood). You should also remove any columns that have no data at all, as they cannot be imputed.

Please let me know if any of this helps. If it does not, please provide more information as to what your dataset is.

Thanks!

kearnz commented 5 years ago

Closed because no further conversation. Feel free to reopen if issues persist.

Mariand012 commented 4 years ago

Hi, I'm also getting a similar error, when i'm using either the SingleImputer() or the MultipleImputer() and trying to impute a pandas dataframe.

Here's my code

imp = MultipleImputer()
imp.fit_transform(data_aided)

and here's the error i'm getting

`--------------------------------------------------------------------------- ValueError Traceback (most recent call last)

in 2 3 # fit transform returns a generator by default, calculating each imputation method lazily ----> 4 imp.fit_transform(data_aided) 5 print ("Computing a model took: ", (int((time.time() - start_time)/60)), "m. " ) /usr/local/lib/python3.7/site-packages/autoimpute/imputations/dataframe/multiple_imputer.py in fit_transform(self, X, y) 229 def fit_transform(self, X, y=None): 230 """Convenience method to fit then transform the same dataset.""" --> 231 return self.fit(X, y).transform(X) /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 59 err = f"Neither {d_err} nor {a_err} are of type pd.DataFrame" 60 raise TypeError(err) ---> 61 return func(d, *args, **kwargs) 62 return wrapper 63 /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 124 125 # return func if no missingness violations detected, then return wrap --> 126 return func(d, *args, **kwargs) 127 return wrapper 128 /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 171 err = f"All values missing in column(s) {nc}. Should be removed." 172 raise ValueError(err) --> 173 return func(d, *args, **kwargs) 174 return wrapper 175 /usr/local/lib/python3.7/site-packages/autoimpute/imputations/dataframe/multiple_imputer.py in fit(self, X, y) 186 visit=self.visit 187 ) --> 188 imputer.fit(X) 189 self.statistics_[i] = imputer 190 /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 59 err = f"Neither {d_err} nor {a_err} are of type pd.DataFrame" 60 raise TypeError(err) ---> 61 return func(d, *args, **kwargs) 62 return wrapper 63 /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 124 125 # return func if no missingness violations detected, then return wrap --> 126 return func(d, *args, **kwargs) 127 return wrapper 128 /usr/local/lib/python3.7/site-packages/autoimpute/utils/checks.py in wrapper(d, *args, **kwargs) 171 err = f"All values missing in column(s) {nc}. Should be removed." 172 raise ValueError(err) --> 173 return func(d, *args, **kwargs) 174 return wrapper 175 /usr/local/lib/python3.7/site-packages/autoimpute/imputations/dataframe/single_imputer.py in fit(self, X, y) 182 x_ = _one_hot_encode(x_) 183 --> 184 imputer.fit(x_, y_) 185 186 # finally, store imputer for each column as statistics /usr/local/lib/python3.7/site-packages/autoimpute/imputations/series/default.py in fit(self, X, y) 393 def fit(self, X, y): 394 """Defer fit to the DefaultBaseImputer.""" --> 395 super().fit(X, y) 396 return self 397 /usr/local/lib/python3.7/site-packages/autoimpute/imputations/series/default.py in fit(self, X, y) 184 if not y is None: 185 if is_numeric_dtype(y): --> 186 stats = {"param": self.num_imputer.fit(X, y), 187 "strategy": self.num_imputer.strategy} 188 if is_string_dtype(y): /usr/local/lib/python3.7/site-packages/autoimpute/imputations/series/pmm.py in fit(self, X, y) 115 116 # get predictions for the data, which will be used for "closest" vals --> 117 y_pred = self.lm.fit(X, y).predict(X) 118 y_df = DataFrame({"y": y, "y_pred": y_pred}) 119 /usr/local/lib/python3.7/site-packages/sklearn/linear_model/_base.py in fit(self, X, y, sample_weight) 490 n_jobs_ = self.n_jobs 491 X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], --> 492 y_numeric=True, multi_output=True) 493 494 if sample_weight is not None: /usr/local/lib/python3.7/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator) 753 ensure_min_features=ensure_min_features, 754 warn_on_dtype=warn_on_dtype, --> 755 estimator=estimator) 756 if multi_output: 757 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False, /usr/local/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator) 584 " minimum of %d is required%s." 585 % (n_samples, array.shape, ensure_min_samples, --> 586 context)) 587 588 if ensure_min_features > 0 and array.ndim == 2: ValueError: Found array with 0 sample(s) (shape=(0, 26)) while a minimum of 1 is required. ` data_aided is a pandas data frame with a shape of (30060, 27), all columns have float values and there is no column that contains only missing values. All of my columns contain some nans though. Let me know if you need any more info. ![Screenshot 2020-01-23 at 16 22 36](https://user-images.githubusercontent.com/16853901/73002881-9a6b5600-3dfc-11ea-8360-3f0090d78b2c.png) Thank you :)