ray-project / xgboost_ray

Distributed XGBoost on Ray
Apache License 2.0
139 stars 34 forks source link

ray.io hyperparameter tuning throwing StatusCode.RESOURCE_EXHAUSTED #205

Open NaveenVinayakS opened 2 years ago

NaveenVinayakS commented 2 years ago
from xgboost_ray import RayDMatrix
train_set = RayDMatrix(X_train, y_train)
test_set = RayDMatrix(X_test, y_test)

from xgboost_ray import RayDMatrix, RayParams, train

start = time.time()

num_actors = 1 # num of actors
num_cpus_per_actor = 4

ray_params = RayParams(
num_actors=num_actors,
cpus_per_actor=num_cpus_per_actor)

def train_model(config):
#train_x, train_y = load_breast_cancer(return_X_y=True)
#train_set = RayDMatrix(train_x, train_y)

evals_result = {}
bst = train(
params=config,
dtrain=train_set,
evals_result=evals_result,
evals=[(test_set, "eval")],
verbose_eval=False,
ray_params=ray_params)

joblib.dump(bst,"model.pkl") # Best model?

from ray import tune

# Specify the hyperparameter search space.
config = {
"objective": "reg:squaredlogerror",
"eval_metric": ["rmsle"],
"min_child_weight": tune.randint(1, 3),
"max_depth": tune.randint(5, 10),
"gamma": tune.loguniform(0.1, 0.5),
"subsample": tune.loguniform(0.5, 1.0),
"colsample_bytree": tune.loguniform(0.5, 1.0),
"eta": tune.loguniform(0.1, 0.5),
"seed": tune.randint(110, 135)
}

# Make sure to use the `get_tune_resources` method to set the `resources_per_trial`
analysis = tune.run(

tune.with_parameters(train_model),
#train_model,
config=config,
metric="eval-rmsle",
num_samples=4,
mode="min",
resources_per_trial=ray_params.get_tune_resources())

print('Runtime(HH:MM:SS) :', get_runtime(time.time()-start))

print("Best hyperparameters", analysis.best_config)

_InactiveRpcError: <_InactiveRpcError of RPC that terminated with: status = StatusCode.RESOURCE_EXHAUSTED details = "Sent message larger than max (734768174 vs. 536870912)" debug_error_string = "{"created":"@1648431702.877243956","description":"Sent message larger than max (734768174 vs. 536870912)","file":"src/core/ext/filters/message_size/message_size_filter.cc","file_line":264,"grpc_status":8}"

Yard1 commented 2 years ago

Sorry, I cannot run this code. From what I can gather, the issue is that you are passing the dataset incorrectly. You should pass the data as a dataframe/ray dataset with tune.with_parameters and then create the RayDMatrix inside the training function.