AxeldeRomblay / MLBox

MLBox is a powerful Automated Machine Learning python library.
https://mlbox.readthedocs.io/en/latest/
Other
1.49k stars 274 forks source link

TypeError: 'generator' object has no attribute '__getitem__' #52

Closed jrzaurin closed 6 years ago

jrzaurin commented 6 years ago

reproducing the code directly from Kaggle on the Porto Insurance dataset:

from mlbox.preprocessing import *
from mlbox.optimisation import *
from mlbox.prediction import *

# In[ ]:

paths = ["data/train.csv", "data/test.csv"]
target_name = "target"

# # Now let MLBox do the job !

# ## ... to read and clean all the files

# In[ ]:

rd = Reader(sep = ",")
df = rd.train_test_split(paths, target_name)   #reading and preprocessing (dates, ...)

# In[ ]:

dft = Drift_thresholder()
df = dft.fit_transform(df)   #removing non-stable features (like ID,...)

# ## ... to tune all the hyper-parameters

# In[ ]:

def gini(actual, pred, cmpcol = 0, sortcol = 1):
    assert( len(actual) == len(pred) )
    all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=np.float)
    all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
    totalLosses = all[:,0].sum()
    giniSum = all[:,0].cumsum().sum() / totalLosses

    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual)

def gini_normalized(a, p):
    return np.abs(gini(a, p) / gini(a, a))

opt = Optimiser(scoring = make_scorer(gini_normalized, greater_is_better=True, needs_proba=True), n_folds=2)

# In[ ]:

space = {

        'est__strategy':{"search":"choice",
                                  "space":["LightGBM"]},
        'est__n_estimators':{"search":"choice",
                                  "space":[700]},
        'est__colsample_bytree':{"search":"uniform",
                                  "space":[0.77,0.82]},
        'est__subsample':{"search":"uniform",
                                  "space":[0.73,0.8]},
        'est__max_depth':{"search":"choice",
                                  "space":[5,6,7]},
        'est__learning_rate':{"search":"uniform",
                                  "space":[0.008, 0.02]}

        }

params = opt.optimise(space, df, 7)

it returns

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-801cc68c2cb6> in <module>()
     16         }
     17
---> 18 params = opt.optimise(space, df, 7)

/usr/local/lib/python2.7/site-packages/mlbox/optimisation/optimiser.pyc in optimise(self, space, df, max_evals)
    564                                    space=hyper_space,
    565                                    algo=tpe.suggest,
--> 566                                    max_evals=max_evals)
    567
    568                 # Displaying best_params

/usr/local/lib/python2.7/site-packages/hyperopt/fmin.pyc in fmin(fn, space, algo, max_evals, trials, rstate, allow_trials_fmin, pass_expr_memo_ctrl, catch_eval_exceptions, verbose, return_argmin)
    312
    313     domain = base.Domain(fn, space,
--> 314                          pass_expr_memo_ctrl=pass_expr_memo_ctrl)
    315
    316     rval = FMinIter(algo, domain, trials, max_evals=max_evals,

/usr/local/lib/python2.7/site-packages/hyperopt/base.pyc in __init__(self, fn, expr, workdir, pass_expr_memo_ctrl, name, loss_target)
    784         before = pyll.dfs(self.expr)
    785         # -- raises exception if expr contains cycles
--> 786         pyll.toposort(self.expr)
    787         vh = self.vh = VectorizeHelper(self.expr, self.s_new_ids)
    788         # -- raises exception if v_expr contains cycles

/usr/local/lib/python2.7/site-packages/hyperopt/pyll/base.pyc in toposort(expr)
    713         G.add_edges_from([(n_in, node) for n_in in node.inputs()])
    714     order = nx.topological_sort(G)
--> 715     assert order[-1] == expr
    716     return order
    717

TypeError: 'generator' object has no attribute '__getitem__'

Maybe related with the hyperopt version?

Nice library though 👍

jrzaurin commented 6 years ago

Actually my apologies, has to do with the networkx version, I downgraded:

pip install networkx==1.11

and off we go!