Open wyxandsj opened 4 years ago
I used 2800W samples to train dask xgboost, but the status of training task always shows no work.
from distributed import Client, progress from dask.distributed import Client as Client2 import dask.dataframe as dd import pandas as pd import dask_xgboost as dxgb filenames = [ '/data//000000_0','/data//000001_0'] global feature global y_name feature=["A","B"] y_name = ["C"] client2 = Client2("xx.xx.xx.xx:xx") def data2dataframe(fn): df = pd.read_csv(fn, names =y_name+feature ,na_values='NULL',header=None,sep=',') df= df.fillna("0") for col in feature+y_name: df[col] = df[col].astype("float64") return (df[feature], df[y_name]) futures2 = client2.map(data2dataframe, filenames) results= client2.gather(iter(futures2)) i=0 for re in results: if i==0: X_trains = re[0] y_trains = re[1] else: X_trains=pd.concat([X_trains,re[0]]) y_trains=pd.concat([y_trains,re[1]]) i=i+1 X_trains=dd.from_pandas(X_trains,npartitions=54) y_trains=dd.from_pandas(y_trains,npartitions=54) dd_train = X_trains dd_train_label = y_trains params = {'objective': 'binary:logistic', 'max_depth': 1, 'eta': 0.01, 'subsample': 0.5, 'min_child_weight': 1} bst = dxgb.train(client2, params, dd_train, dd_train_label,num_boost_round=140) predictions = dxgb.predict(client2, bst, dd_train) print(predictions.persist())
Do you have a minimal example? http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports
I used 2800W samples to train dask xgboost, but the status of training task always shows no work.