xgboost - Githubissues

def train_xgboost(train_data_path, feature_path):
    import time
    df = pd.read_csv(train_data_path)
    print(df.head())

    x = np.array([np.mean(np.load(feature_path + '%s.npy' % str(id)),\
                          axis=0) for id in df['id'].tolist()]\
                )

    y = df['cancer'].as_matrix()

    trn_x, val_x, trn_y, val_y = cross_validation.train_test_split(x, y, random_state=42, stratify=y,
                                                                   test_size=0.20)#0.20

    clf = xgb.XGBRegressor(max_depth=1,#10
                           n_estimators=20500,
                           min_child_weight=15,#9
                           learning_rate=0.00750,
                           nthread=8,
                           subsample=0.80,#0.8
                           colsample_bytree=0.80,
                           seed=4242)

    clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)],\
            verbose=True,\
            eval_metric='logloss',\
            early_stopping_rounds=300)
    # eval_metric='logloss' or 'error' 

    return clf
yuenshome / yuenshome.github.io

xgboost #25