sparks-baird / CrabNet

Predict materials properties using only the composition information!
https://crabnet.readthedocs.io/
MIT License
12 stars 4 forks source link

During `crabnet.fit(...)`: `ValueError: Input contains NaN, infinity or a value too large for dtype('float64')` #32

Closed sgbaird closed 2 years ago

sgbaird commented 2 years ago

Reported by @cseeg in internal slack discussion during use of mat-discover with crabnet==2.0.5:

/usr/local/lib/python3.7/dist-packages/mat_discover/mat_discover_.py in group_cross_val(self, df, umap_random_state, dummy_run)
    989         avg_targ = [
    990             self.single_group_cross_val(X, y, train_index, val_index, i)
--> 991             for i, (train_index, val_index) in enumerate(logo.split(X, y, self.labels))
    992         ]
    993         out = np.array(avg_targ).T

/usr/local/lib/python3.7/dist-packages/mat_discover/mat_discover_.py in <listcomp>(.0)
    989         avg_targ = [
    990             self.single_group_cross_val(X, y, train_index, val_index, i)
--> 991             for i, (train_index, val_index) in enumerate(logo.split(X, y, self.labels))
    992         ]
    993         out = np.array(avg_targ).T

/usr/local/lib/python3.7/dist-packages/mat_discover/mat_discover_.py in single_group_cross_val(self, X, y, train_index, val_index, iter)
   1042 
   1043         self.crabnet_model = CrabNet(**self.crabnet_kwargs)
-> 1044         self.crabnet_model.fit(train_df)
   1045 
   1046         # CrabNet predict output format: (act, pred, formulae, uncert)

/usr/local/lib/python3.7/dist-packages/crabnet/crabnet_.py in fit(self, train_df, val_df, extend_features, data_dir, transfer)
    415                 or epoch == 0
    416             ):
--> 417                 self._losscurve_stats(self.epochs, epoch)
    418 
    419                 if self.losscurve:

/usr/local/lib/python3.7/dist-packages/crabnet/crabnet_.py in _losscurve_stats(self, epochs, epoch)
    759             return_true=True,
    760         )
--> 761         mae_t = mean_absolute_error(true_t, pred_t)
    762         self.loss_curve["train"].append(mae_t)
    763         pred_v, true_v = self.predict(loader=self.data_loader, return_true=True)

/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_regression.py in mean_absolute_error(y_true, y_pred, sample_weight, multioutput)
    190     """
    191     y_type, y_true, y_pred, multioutput = _check_reg_targets(
--> 192         y_true, y_pred, multioutput
    193     )
    194     check_consistent_length(y_true, y_pred, sample_weight)

/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_regression.py in _check_reg_targets(y_true, y_pred, multioutput, dtype)
     93     """
     94     check_consistent_length(y_true, y_pred)
---> 95     y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
     96     y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
     97 

/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    798 
    799         if force_all_finite:
--> 800             _assert_all_finite(array, allow_nan=force_all_finite == "allow-nan")
    801 
    802     if ensure_min_samples > 0:

/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
    114             raise ValueError(
    115                 msg_err.format(
--> 116                     type_err, msg_dtype if msg_dtype is not None else X.dtype
    117                 )
    118             )

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').