Open ysh329 opened 5 years ago
def train_xgboost(train_data_path, feature_path):
import time
df = pd.read_csv(train_data_path)
print(df.head())
x = np.array([np.mean(np.load(feature_path + '%s.npy' % str(id)),\
axis=0) for id in df['id'].tolist()]\
)
y = df['cancer'].as_matrix()
trn_x, val_x, trn_y, val_y = cross_validation.train_test_split(x, y, random_state=42, stratify=y,
test_size=0.20)#0.20
clf = xgb.XGBRegressor(max_depth=1,#10
n_estimators=20500,
min_child_weight=15,#9
learning_rate=0.00750,
nthread=8,
subsample=0.80,#0.8
colsample_bytree=0.80,
seed=4242)
clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)],\
verbose=True,\
eval_metric='logloss',\
early_stopping_rounds=300)
# eval_metric='logloss' or 'error'
return clf
XGBoost二阶泰勒展开公式推导 - Spring Herald - CSDN博客 https://blog.csdn.net/bu2_int/article/details/80278857