While pandas supports a column with mixed ints/ floats which are nan, tc does not identify float("nan") as None and thus Xgboost failed on data converted from pandas.
import pandas as pd
data = pd.DataFrame({'a':[1,float("nan"),2],'target':[1,0,1]})
import turicreate as tc
sf_data = tc.SFrame(data)
tc.boosted_trees_classifier.create(sf_data, target='target')
results in this error.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/Users/dbicksokn/g3k/lib/python3.6/site-packages/turicreate/extensions.py in __run_class_function(self, fnname, args, kwargs)
275 try:
--> 276 ret = self._tkclass.call_function(fnname, argument_dict)
277 except RuntimeError as exc:
turicreate/cython/cy_model.pyx in turicreate.cython.cy_model.UnityModel.call_function()
turicreate/cython/cy_model.pyx in turicreate.cython.cy_model.UnityModel.call_function()
RuntimeError: Detected inf/nan values in feature(s) 'a'. Cannot proceed with model training.
During handling of the above exception, another exception occurred:
ToolkitError Traceback (most recent call last)
<ipython-input-7-632ac263ea9e> in <module>()
----> 1 tc.boosted_trees_classifier.create(sf_data, target='target')
/Users/dbicksokn/g3k/lib/python3.6/site-packages/turicreate/toolkits/classifier/boosted_trees_classifier.py in create(dataset, target, features, max_iterations, validation_set, class_weights, max_depth, step_size, min_loss_reduction, min_child_weight, row_subsample, column_subsample, verbose, random_seed, metric, **kwargs)
635 verbose = verbose,
636 metric = metric,
--> 637 **kwargs)
638 return BoostedTreesClassifier(model.__proxy__)
/Users/dbicksokn/g3k/lib/python3.6/site-packages/turicreate/toolkits/_supervised_learning.py in create(dataset, target, model_name, features, validation_set, distributed, verbose, seed, **kwargs)
330 model = _turicreate.extensions.__dict__[model_name]()
331 with QuietProgress(verbose):
--> 332 model.train(dataset, target, validation_set, options)
333
334 return SupervisedLearningModel(model, model_name)
/Users/dbicksokn/g3k/lib/python3.6/site-packages/turicreate/extensions.py in <lambda>(*args, **kwargs)
290 elif name in self._functions:
291 # is it a function?
--> 292 ret = lambda *args, **kwargs: self.__run_class_function(name, args, kwargs)
293 ret.__doc__ = "Name: " + name + "\nParameters: " + str(self._functions[name]) + "\n"
294 try:
/Users/dbicksokn/g3k/lib/python3.6/site-packages/turicreate/extensions.py in __run_class_function(self, fnname, args, kwargs)
277 except RuntimeError as exc:
278 # Expose C++ exceptions using ToolkitError.
--> 279 raise _ToolkitError(exc)
280 ret = _wrap_function_return(ret)
281 return ret
ToolkitError: Detected inf/nan values in feature(s) 'a'. Cannot proceed with model training.
=> boosted trees supports Nones but not float("nan")
While pandas supports a column with mixed ints/ floats which are nan, tc does not identify float("nan") as None and thus Xgboost failed on data converted from pandas.
results in this error.
=> boosted trees supports Nones but not float("nan")