Traceback (most recent call last):
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/experiment.py", line 164, in __init__
self._run_identifier = Experiment.register_if_needed(run)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/experiment.py", line 353, in register_if_needed
register_trainable(name, run_object)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 96, in register_trainable
_global_registry.register(TRAINABLE_CLASS, name, trainable)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 180, in register
self.flush_values()
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 202, in flush_values
_internal_kv_put(
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/experimental/internal_kv.py", line 88, in _internal_kv_put
return global_gcs_client.internal_kv_put(key, value, overwrite, namespace) == 0
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/gcs_utils.py", line 137, in wrapper
return f(self, *args, **kwargs)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/gcs_utils.py", line 228, in internal_kv_put
reply = self._kv_stub.InternalKVPut(req)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/grpc/_channel.py", line 946, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/grpc/_channel.py", line 849, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.RESOURCE_EXHAUSTED
details = "Received message larger than max (182421709 vs. 104857600)"
debug_error_string = "{"created":"@1699947194.101433625","description":"Error received from peer ipv4:192.168.11.28:49081","file":"src/core/lib/surface/call.cc","file_line":1074,"grpc_message":"Received message larger than max (182421709 vs. 104857600)","grpc_status":8}"
>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "../src/run_LSTM.py", line 398, in <module>
result = flaml.tune.run(
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/flaml/tune/tune.py", line 623, in run
analysis = tune.run(
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/tune.py", line 515, in run
experiments[i] = Experiment(
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/experiment.py", line 167, in __init__
raise TuneError(
ray.tune.error.TuneError: The Trainable/training function is too large for grpc resource limit. Check that its definition is not implicitly capturing a large array or other object in scope. Tip: use tune.with_parameters() to put large objects in the Ray object store.
Original exception: Traceback (most recent call last):
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/experiment.py", line 164, in __init__
self._run_identifier = Experiment.register_if_needed(run)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/experiment.py", line 353, in register_if_needed
register_trainable(name, run_object)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 96, in register_trainable
_global_registry.register(TRAINABLE_CLASS, name, trainable)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 180, in register
self.flush_values()
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/tune/registry.py", line 202, in flush_values
_internal_kv_put(
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/experimental/internal_kv.py", line 88, in _internal_kv_put
return global_gcs_client.internal_kv_put(key, value, overwrite, namespace) == 0
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/gcs_utils.py", line 137, in wrapper
return f(self, *args, **kwargs)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/ray/_private/gcs_utils.py", line 228, in internal_kv_put
reply = self._kv_stub.InternalKVPut(req)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/grpc/_channel.py", line 946, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/data/home/scv7343/.conda/envs/timeSeries/lib/python3.8/site-packages/grpc/_channel.py", line 849, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.RESOURCE_EXHAUSTED
details = "Received message larger than max (182421709 vs. 104857600)"
debug_error_string = "{"created":"@1699947194.101433625","description":"Error received from peer ipv4:192.168.11.28:49081","file":"src/core/lib/surface/call.cc","file_line":1074,"grpc_message":"Received message larger than max (182421709 vs. 104857600)","grpc_status":8}"
Code
result = flaml.tune.run(
tune.with_parameters(train),
config=params,
metric="loss",
mode="min",
low_cost_partial_config={"num_epochs": 1},
max_resource=max_num_epoch,
min_resource=1,
scheduler="asha", # Use asha scheduler to perform early stopping based on intermediate results reported
resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
local_dir=logs_root,
num_samples=num_samples,
time_budget_s=time_budget_s,
use_ray=True
)
Error
Code
Version
Question
How to solve this problem?