Closed chamorajg closed 3 years ago
Hey the reproduction script isn't runnable, we can't help unless it's self contained and runs out of the box.
class DQNTorchModel(TorchModelV2, nn.Module):
"""Extension of standard TorchModelV2 to provide dueling-Q functionality.
"""
def __init__(
self,
obs_space,
action_space,
num_outputs,
model_config,
name,
*,
dueling=False,
q_hiddens=(256, ),
dueling_activation="relu",
use_noisy=False,
sigma0=0.5,
# TODO(sven): Move `add_layer_norm` into ModelCatalog as
# generic option, then error if we use ParameterNoise as
# Exploration type and do not have any LayerNorm layers in
# the net.
add_layer_norm=False):
"""Initialize variables of this model.
Extra model kwargs:
dueling (bool): Whether to build the advantage(A)/value(V) heads
for DDQN. If True, Q-values are calculated as:
Q = (A - mean[A]) + V. If False, raw NN output is interpreted
as Q-values.
q_hiddens (List[int]): List of layer-sizes after(!) the
Advantages(A)/Value(V)-split. Hence, each of the A- and V-
branches will have this structure of Dense layers. To define
the NN before this A/V-split, use - as always -
config["model"]["fcnet_hiddens"].
dueling_activation (str): The activation to use for all dueling
layers (A- and V-branch). One of "relu", "tanh", "linear".
use_noisy (bool): use noisy nets
sigma0 (float): initial value of noisy nets
add_layer_norm (bool): Enable layer norm (for param noise).
"""
nn.Module.__init__(self)
super(DQNTorchModel, self).__init__(obs_space, action_space,
num_outputs, model_config, name)
self.dueling = dueling
ins = num_outputs
# Dueling case: Build the shared (advantages and value) fc-network.
self.model = TorchFC(obs_space, action_space,
num_outputs, model_config, name)
advantage_module = nn.Sequential()
value_module = None
if self.dueling:
value_module = nn.Sequential()
for i, n in enumerate(q_hiddens):
advantage_module.add_module("dueling_A_{}".format(i),
nn.Linear(ins, n))
value_module.add_module("dueling_V_{}".format(i),
nn.Linear(ins, n))
# Add activations if necessary.
if dueling_activation == "relu":
advantage_module.add_module("dueling_A_act_{}".format(i),
nn.ReLU())
value_module.add_module("dueling_V_act_{}".format(i),
nn.ReLU())
elif dueling_activation == "tanh":
advantage_module.add_module("dueling_A_act_{}".format(i),
nn.Tanh())
value_module.add_module("dueling_V_act_{}".format(i),
nn.Tanh())
# Add LayerNorm after each Dense.
if add_layer_norm:
advantage_module.add_module("LayerNorm_A_{}".format(i),
nn.LayerNorm(n))
value_module.add_module("LayerNorm_V_{}".format(i),
nn.LayerNorm(n))
ins = n
# Actual Advantages layer (nodes=num-actions) and
# value layer (nodes=1).
advantage_module.add_module("A", nn.Linear(ins, action_space.n))
value_module.add_module("V", nn.Linear(ins, 1))
# Non-dueling:
# Q-value layer (use main module's outputs as Q-values).
else:
pass
self.advantage_module = advantage_module
self.value_module = value_module
def get_advantages_or_q_values(self, model_out):
"""Returns distributional values for Q(s, a) given a state embedding.
Override this in your custom model to customize the Q output head.
Arguments:
model_out (Tensor): embedding from the model layers
Returns:
(action_scores, logits, dist) if num_atoms == 1, otherwise
(action_scores, z, support_logits_per_action, logits, dist)
"""
return self.advantage_module(model_out)
def get_state_value(self, model_out):
"""Returns the state value prediction for the given state embedding."""
return self.value_module(model_out)
def _noisy_layer(self, action_in, out_size, sigma0, non_linear=True):
"""
a common dense layer: y = w^{T}x + b
a noisy layer: y = (w + \\epsilon_w*\\sigma_w)^{T}x +
(b+\\epsilon_b*\\sigma_b)
where \epsilon are random variables sampled from factorized normal
distributions and \\sigma are trainable variables which are expected to
vanish along the training procedure
"""
in_size = int(action_in.shape[1])
epsilon_in = torch.normal(
mean=torch.zeros([in_size]), std=torch.ones([in_size]))
epsilon_out = torch.normal(
mean=torch.zeros([out_size]), std=torch.ones([out_size]))
epsilon_in = self._f_epsilon(epsilon_in)
epsilon_out = self._f_epsilon(epsilon_out)
epsilon_w = torch.matmul(
torch.unsqueeze(epsilon_in, -1),
other=torch.unsqueeze(epsilon_out, 0))
epsilon_b = epsilon_out
sigma_w = torch.Tensor(
data=np.random.uniform(
low=-1.0 / np.sqrt(float(in_size)),
high=1.0 / np.sqrt(float(in_size)),
size=[in_size, out_size]),
dtype=torch.float32,
requires_grad=True)
# TF noise generation can be unreliable on GPU
# If generating the noise on the CPU,
# lowering sigma0 to 0.1 may be helpful
sigma_b = torch.Tensor(
data=np.full(
shape=[out_size], fill_value=sigma0 / np.sqrt(float(in_size))),
requires_grad=True)
w = torch.Tensor(
data=np.full(
shape=[in_size, out_size],
fill_value=6 / np.sqrt(float(in_size) + float(out_size))),
requires_grad=True)
b = torch.Tensor(data=np.zeros([out_size]), requires_grad=True)
action_activation = torch.matmul(action_in, w + sigma_w * epsilon_w) \
+ b + sigma_b * epsilon_b
if not non_linear:
return action_activation
return nn.functional.relu(action_activation)
def _f_epsilon(self, x):
return torch.sign(x) * torch.pow(torch.abs(x), 0.5)
def forward(self, input_dict, state, seq_lens):
input_dict["obs"] = input_dict["obs"].float()
model_out, _ = self.model.forward(input_dict, state, seq_lens)
return model_out, []
def current_weight(agent):
return agent.get_weights()["default_policy"]
if __name__ == "__main__":
ray.init()
video_name = []
for d, ds, fs in os.walk("/data/mouli/predictions/inference_temporal_new"):
for f in fs:
if f.endswith(".csv"):
video_name.append(f[:-4])
save_path = "/data/mouli/weights/ray_weight"
ModelCatalog.register_custom_model(
"my_model", DQNTorchModel)
config = {
"dueling": True,
"double_q": True,
"env": EndoEnv,
# "callbacks": {
# "on_train_result": on_train_result,
# },
"env_config": {
"save_path": save_path,
},
"model": {
"custom_model": "my_model",
},
"output": "/data/mouli/weights/ray_weight",
"hiddens": [256],
"lr": 1e-3,# 1e-4, 1e-6]), # try different lrs
"num_workers": 2, # parallelism
"use_pytorch": True,
"monitor": False,
"num_gpus": 1,
"batch_mode":"complete_episodes",
"rollout_fragment_length": 256,
"train_batch_size": 512,
"log_level": "INFO",
"gamma": 0.2,
"ignore_worker_failures": True,
"no_done_at_end": False,
"collect_metrics_timeout": 30,
}
resource = {
"cpu": 2,
"gpu": 0.1,
}
last_checkpoint = "" # "/data/mouli/weights/ray_weight/checkpoint_145/checkpoint-145"
last_best_checkpoint = ""
for i in range(len(video_name)):
video = video_name[i]
df = pd.read_csv(os.path.join("/data/mouli/predictions/inference_temporal_new", video+".csv"))
config["env_config"]["video_name"] = video
config["env_config"]["df"] = df
config["horizon"] = df.shape[0] - 10
trainer = DQNTrainer(config=config, env=config["env"])
trainer._logdir = "/data/mouli/weights/ray_weight"
weights_before_load = current_weight(trainer)
if i > 0 or last_checkpoint != "":
if last_best_checkpoint != "":
trainer.restore(last_best_checkpoint)
else:
trainer.restore(last_checkpoint)
weights_after_load = current_weight(trainer)
# print(check(weights_before_load, weights_after_load, false=True))
for j in range(1000):
try:
result = trainer.train()
except:
print("-" * 40, "\nSocket Error\n","-" * 40)
print(last_checkpoint,"\n","-" * 40)
ray.shutdown()
ray.init()
trainer.restore(last_checkpoint)
continue
print(pretty_print(result))
if result["episode_reward_mean"] > 7:
last_best_checkpoint = trainer.save(checkpoint_dir="/data/mouli/weights/ray_weight")
# torch.save(trainer.get_policy().model.model.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_model_weights.tar"))
# if config["dueling"]:
# torch.save(trainer.get_policy().model.value_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_value_weights.tar"))
# torch.save(trainer.get_policy().model.advantage_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_advantage_weights.tar"))
break
if j % 5 == 0:
last_checkpoint = trainer.save(checkpoint_dir="/data/mouli/weights/ray_weight")
# torch.save(trainer.get_policy().model.model.state_dict(), os.path.join("/data/mouli/weights/ray_weight","model_weights.tar"))
# if config["dueling"]:
# torch.save(trainer.get_policy().model.value_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","value_weights.tar"))
# torch.save(trainer.get_policy().model.advantage_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","advantage_weights.tar"))
ray.shutdown()
Here is the script I used. The environment is a sample csv based environment.
That script seems to depend on external data. Can you reproduce without external dependencies? It has to be a script that can run by itself on a new machine.
On Wed, Jul 8, 2020, 2:00 AM chandramoulirajagopalan < notifications@github.com> wrote:
class DQNTorchModel(TorchModelV2, nn.Module): """Extension of standard TorchModelV2 to provide dueling-Q functionality. """
def __init__( self, obs_space, action_space, num_outputs, model_config, name, *, dueling=False, q_hiddens=(256, ), dueling_activation="relu", use_noisy=False, sigma0=0.5, # TODO(sven): Move `add_layer_norm` into ModelCatalog as # generic option, then error if we use ParameterNoise as # Exploration type and do not have any LayerNorm layers in # the net. add_layer_norm=False): """Initialize variables of this model. Extra model kwargs: dueling (bool): Whether to build the advantage(A)/value(V) heads for DDQN. If True, Q-values are calculated as: Q = (A - mean[A]) + V. If False, raw NN output is interpreted as Q-values. q_hiddens (List[int]): List of layer-sizes after(!) the Advantages(A)/Value(V)-split. Hence, each of the A- and V- branches will have this structure of Dense layers. To define the NN before this A/V-split, use - as always - config["model"]["fcnet_hiddens"]. dueling_activation (str): The activation to use for all dueling layers (A- and V-branch). One of "relu", "tanh", "linear". use_noisy (bool): use noisy nets sigma0 (float): initial value of noisy nets add_layer_norm (bool): Enable layer norm (for param noise). """ nn.Module.__init__(self) super(DQNTorchModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.dueling = dueling ins = num_outputs # Dueling case: Build the shared (advantages and value) fc-network. self.model = TorchFC(obs_space, action_space, num_outputs, model_config, name) advantage_module = nn.Sequential() value_module = None if self.dueling: value_module = nn.Sequential() for i, n in enumerate(q_hiddens): advantage_module.add_module("dueling_A_{}".format(i), nn.Linear(ins, n)) value_module.add_module("dueling_V_{}".format(i), nn.Linear(ins, n)) # Add activations if necessary. if dueling_activation == "relu": advantage_module.add_module("dueling_A_act_{}".format(i), nn.ReLU()) value_module.add_module("dueling_V_act_{}".format(i), nn.ReLU()) elif dueling_activation == "tanh": advantage_module.add_module("dueling_A_act_{}".format(i), nn.Tanh()) value_module.add_module("dueling_V_act_{}".format(i), nn.Tanh()) # Add LayerNorm after each Dense. if add_layer_norm: advantage_module.add_module("LayerNorm_A_{}".format(i), nn.LayerNorm(n)) value_module.add_module("LayerNorm_V_{}".format(i), nn.LayerNorm(n)) ins = n # Actual Advantages layer (nodes=num-actions) and # value layer (nodes=1). advantage_module.add_module("A", nn.Linear(ins, action_space.n)) value_module.add_module("V", nn.Linear(ins, 1)) # Non-dueling: # Q-value layer (use main module's outputs as Q-values). else: pass self.advantage_module = advantage_module self.value_module = value_module def get_advantages_or_q_values(self, model_out): """Returns distributional values for Q(s, a) given a state embedding. Override this in your custom model to customize the Q output head. Arguments: model_out (Tensor): embedding from the model layers Returns: (action_scores, logits, dist) if num_atoms == 1, otherwise (action_scores, z, support_logits_per_action, logits, dist) """ return self.advantage_module(model_out) def get_state_value(self, model_out): """Returns the state value prediction for the given state embedding.""" return self.value_module(model_out) def _noisy_layer(self, action_in, out_size, sigma0, non_linear=True): """ a common dense layer: y = w^{T}x + b a noisy layer: y = (w + \\epsilon_w*\\sigma_w)^{T}x + (b+\\epsilon_b*\\sigma_b) where \epsilon are random variables sampled from factorized normal distributions and \\sigma are trainable variables which are expected to vanish along the training procedure """ in_size = int(action_in.shape[1]) epsilon_in = torch.normal( mean=torch.zeros([in_size]), std=torch.ones([in_size])) epsilon_out = torch.normal( mean=torch.zeros([out_size]), std=torch.ones([out_size])) epsilon_in = self._f_epsilon(epsilon_in) epsilon_out = self._f_epsilon(epsilon_out) epsilon_w = torch.matmul( torch.unsqueeze(epsilon_in, -1), other=torch.unsqueeze(epsilon_out, 0)) epsilon_b = epsilon_out sigma_w = torch.Tensor( data=np.random.uniform( low=-1.0 / np.sqrt(float(in_size)), high=1.0 / np.sqrt(float(in_size)), size=[in_size, out_size]), dtype=torch.float32, requires_grad=True) # TF noise generation can be unreliable on GPU # If generating the noise on the CPU, # lowering sigma0 to 0.1 may be helpful sigma_b = torch.Tensor( data=np.full( shape=[out_size], fill_value=sigma0 / np.sqrt(float(in_size))), requires_grad=True) w = torch.Tensor( data=np.full( shape=[in_size, out_size], fill_value=6 / np.sqrt(float(in_size) + float(out_size))), requires_grad=True) b = torch.Tensor(data=np.zeros([out_size]), requires_grad=True) action_activation = torch.matmul(action_in, w + sigma_w * epsilon_w) \ + b + sigma_b * epsilon_b if not non_linear: return action_activation return nn.functional.relu(action_activation) def _f_epsilon(self, x): return torch.sign(x) * torch.pow(torch.abs(x), 0.5) def forward(self, input_dict, state, seq_lens): input_dict["obs"] = input_dict["obs"].float() model_out, _ = self.model.forward(input_dict, state, seq_lens) return model_out, []
def current_weight(agent): return agent.get_weights()["default_policy"]
if name == "main": ray.init() video_name = [] for d, ds, fs in os.walk("/data/mouli/predictions/inference_temporal_new"): for f in fs: if f.endswith(".csv"): video_name.append(f[:-4]) save_path = "/data/mouli/weights/ray_weight"
ModelCatalog.register_custom_model( "my_model", DQNTorchModel) config = { "dueling": True, "double_q": True, "env": EndoEnv, # "callbacks": { # "on_train_result": on_train_result, # }, "env_config": { "save_path": save_path, }, "model": { "custom_model": "my_model", }, "output": "/data/mouli/weights/ray_weight", "hiddens": [256], "lr": 1e-3,# 1e-4, 1e-6]), # try different lrs "num_workers": 2, # parallelism "use_pytorch": True, "monitor": False, "num_gpus": 1, "batch_mode":"complete_episodes", "rollout_fragment_length": 256, "train_batch_size": 512, "log_level": "INFO", "gamma": 0.2, "ignore_worker_failures": True, "no_done_at_end": False, "collect_metrics_timeout": 30, } resource = { "cpu": 2, "gpu": 0.1, } last_checkpoint = "" # "/data/mouli/weights/ray_weight/checkpoint_145/checkpoint-145" last_best_checkpoint = "" for i in range(len(video_name)): video = video_name[i] df = pd.read_csv(os.path.join("/data/mouli/predictions/inference_temporal_new", video+".csv")) config["env_config"]["video_name"] = video config["env_config"]["df"] = df config["horizon"] = df.shape[0] - 10 trainer = DQNTrainer(config=config, env=config["env"]) trainer._logdir = "/data/mouli/weights/ray_weight" weights_before_load = current_weight(trainer) if i > 0 or last_checkpoint != "": if last_best_checkpoint != "": trainer.restore(last_best_checkpoint) else: trainer.restore(last_checkpoint) weights_after_load = current_weight(trainer) # print(check(weights_before_load, weights_after_load, false=True)) for j in range(1000): try: result = trainer.train() except: print("-" * 40, "\nSocket Error\n","-" * 40) print(last_checkpoint,"\n","-" * 40) ray.shutdown() ray.init() trainer.restore(last_checkpoint) continue print(pretty_print(result)) if result["episode_reward_mean"] > 7: last_best_checkpoint = trainer.save(checkpoint_dir="/data/mouli/weights/ray_weight") # torch.save(trainer.get_policy().model.model.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_model_weights.tar")) # if config["dueling"]: # torch.save(trainer.get_policy().model.value_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_value_weights.tar")) # torch.save(trainer.get_policy().model.advantage_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","best_advantage_weights.tar")) break if j % 5 == 0: last_checkpoint = trainer.save(checkpoint_dir="/data/mouli/weights/ray_weight") # torch.save(trainer.get_policy().model.model.state_dict(), os.path.join("/data/mouli/weights/ray_weight","model_weights.tar")) # if config["dueling"]: # torch.save(trainer.get_policy().model.value_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","value_weights.tar")) # torch.save(trainer.get_policy().model.advantage_module.state_dict(), os.path.join("/data/mouli/weights/ray_weight","advantage_weights.tar")) ray.shutdown()
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/ray-project/ray/issues/9293#issuecomment-655388748, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAADUSXXXBKEKGHSZBEXQ5DR2QYRXANCNFSM4OPVDNLQ .
Okay I will try to reproduce that without external dependencies and get back to you. Is this some kind of proxy connection issue ?
class DQNTorchModel(TorchModelV2, nn.Module):
"""Extension of standard TorchModelV2 to provide dueling-Q functionality.
"""
def __init__(
self,
obs_space,
action_space,
num_outputs,
model_config,
name,
*,
dueling=False,
q_hiddens=(256, ),
dueling_activation="relu",
use_noisy=False,
sigma0=0.5,
# TODO(sven): Move `add_layer_norm` into ModelCatalog as
# generic option, then error if we use ParameterNoise as
# Exploration type and do not have any LayerNorm layers in
# the net.
add_layer_norm=False):
"""Initialize variables of this model.
Extra model kwargs:
dueling (bool): Whether to build the advantage(A)/value(V) heads
for DDQN. If True, Q-values are calculated as:
Q = (A - mean[A]) + V. If False, raw NN output is interpreted
as Q-values.
q_hiddens (List[int]): List of layer-sizes after(!) the
Advantages(A)/Value(V)-split. Hence, each of the A- and V-
branches will have this structure of Dense layers. To define
the NN before this A/V-split, use - as always -
config["model"]["fcnet_hiddens"].
dueling_activation (str): The activation to use for all dueling
layers (A- and V-branch). One of "relu", "tanh", "linear".
use_noisy (bool): use noisy nets
sigma0 (float): initial value of noisy nets
add_layer_norm (bool): Enable layer norm (for param noise).
"""
nn.Module.__init__(self)
super(DQNTorchModel, self).__init__(obs_space, action_space,
num_outputs, model_config, name)
self.dueling = dueling
ins = num_outputs
# Dueling case: Build the shared (advantages and value) fc-network.
self.model = TorchFC(obs_space, action_space,
num_outputs, model_config, name)
advantage_module = nn.Sequential()
value_module = None
if self.dueling:
value_module = nn.Sequential()
for i, n in enumerate(q_hiddens):
advantage_module.add_module("dueling_A_{}".format(i),
nn.Linear(ins, n))
value_module.add_module("dueling_V_{}".format(i),
nn.Linear(ins, n))
# Add activations if necessary.
if dueling_activation == "relu":
advantage_module.add_module("dueling_A_act_{}".format(i),
nn.ReLU())
value_module.add_module("dueling_V_act_{}".format(i),
nn.ReLU())
elif dueling_activation == "tanh":
advantage_module.add_module("dueling_A_act_{}".format(i),
nn.Tanh())
value_module.add_module("dueling_V_act_{}".format(i),
nn.Tanh())
# Add LayerNorm after each Dense.
if add_layer_norm:
advantage_module.add_module("LayerNorm_A_{}".format(i),
nn.LayerNorm(n))
value_module.add_module("LayerNorm_V_{}".format(i),
nn.LayerNorm(n))
ins = n
# Actual Advantages layer (nodes=num-actions) and
# value layer (nodes=1).
advantage_module.add_module("A", nn.Linear(ins, action_space.n))
value_module.add_module("V", nn.Linear(ins, 1))
# Non-dueling:
# Q-value layer (use main module's outputs as Q-values).
else:
pass
self.advantage_module = advantage_module
self.value_module = value_module
def get_advantages_or_q_values(self, model_out):
"""Returns distributional values for Q(s, a) given a state embedding.
Override this in your custom model to customize the Q output head.
Arguments:
model_out (Tensor): embedding from the model layers
Returns:
(action_scores, logits, dist) if num_atoms == 1, otherwise
(action_scores, z, support_logits_per_action, logits, dist)
"""
return self.advantage_module(model_out)
def get_state_value(self, model_out):
"""Returns the state value prediction for the given state embedding."""
return self.value_module(model_out)
def _noisy_layer(self, action_in, out_size, sigma0, non_linear=True):
"""
a common dense layer: y = w^{T}x + b
a noisy layer: y = (w + \\epsilon_w*\\sigma_w)^{T}x +
(b+\\epsilon_b*\\sigma_b)
where \epsilon are random variables sampled from factorized normal
distributions and \\sigma are trainable variables which are expected to
vanish along the training procedure
"""
in_size = int(action_in.shape[1])
epsilon_in = torch.normal(
mean=torch.zeros([in_size]), std=torch.ones([in_size]))
epsilon_out = torch.normal(
mean=torch.zeros([out_size]), std=torch.ones([out_size]))
epsilon_in = self._f_epsilon(epsilon_in)
epsilon_out = self._f_epsilon(epsilon_out)
epsilon_w = torch.matmul(
torch.unsqueeze(epsilon_in, -1),
other=torch.unsqueeze(epsilon_out, 0))
epsilon_b = epsilon_out
sigma_w = torch.Tensor(
data=np.random.uniform(
low=-1.0 / np.sqrt(float(in_size)),
high=1.0 / np.sqrt(float(in_size)),
size=[in_size, out_size]),
dtype=torch.float32,
requires_grad=True)
# TF noise generation can be unreliable on GPU
# If generating the noise on the CPU,
# lowering sigma0 to 0.1 may be helpful
sigma_b = torch.Tensor(
data=np.full(
shape=[out_size], fill_value=sigma0 / np.sqrt(float(in_size))),
requires_grad=True)
w = torch.Tensor(
data=np.full(
shape=[in_size, out_size],
fill_value=6 / np.sqrt(float(in_size) + float(out_size))),
requires_grad=True)
b = torch.Tensor(data=np.zeros([out_size]), requires_grad=True)
action_activation = torch.matmul(action_in, w + sigma_w * epsilon_w) \
+ b + sigma_b * epsilon_b
if not non_linear:
return action_activation
return nn.functional.relu(action_activation)
def _f_epsilon(self, x):
return torch.sign(x) * torch.pow(torch.abs(x), 0.5)
def forward(self, input_dict, state, seq_lens):
input_dict["obs"] = input_dict["obs"].float()
model_out, _ = self.model.forward(input_dict, state, seq_lens)
return model_out, []
def current_weight(agent):
return agent.get_weights()["default_policy"]
class SimpleCorridor(gym.Env):
"""Example of a custom env in which you have to walk down a corridor.
You can configure the length of the corridor via the env config."""
def __init__(self, config):
self.end_pos = config["corridor_length"]
self.cur_pos = 0
self.action_space = Discrete(2)
self.observation_space = Box(
0.0, self.end_pos, shape=(1, ), dtype=np.float32)
def reset(self):
self.cur_pos = float(0)
return [self.cur_pos]
def step(self, action):
assert action in [0, 1], action
if action == 0 and self.cur_pos > 0:
self.cur_pos -= float(1)
elif action == 1:
self.cur_pos += float(1)
done = self.cur_pos >= self.end_pos
return [self.cur_pos], 1.0 if done else -0.1, done, {}
if __name__ == "__main__":
args = parser.parse_args()
ray.init()
ModelCatalog.register_custom_model(
"my_model", DQNTorchModel)
config = {
"dueling": True,
"double_q": True,
"env": SimpleCorridor,
"env_config": {
"corridor_length": 5,
},
"model": {
"custom_model": "my_model",
},
"output": "/data/mouli/weights/ray_weight",
"hiddens": [256],
"lr": 1e-3,# 1e-4, 1e-6]), # try different lrs
"num_workers": 2, # parallelism
"use_pytorch": True,
"monitor": False,
"num_gpus": 1,
"batch_mode":"complete_episodes",
"rollout_fragment_length": 256,
"train_batch_size": 512,
"log_level": "INFO",
"gamma": 0.2,
"ignore_worker_failures": True,
"no_done_at_end": False,
"collect_metrics_timeout": 30,
}
resource = {
"cpu": 2,
"gpu": 0.1,
}
stop = {
"training_iteration": args.stop_iters,
# "timesteps_total": args.stop_timesteps,
"episode_reward_mean": args.stop_reward,
}
results = tune.run(args.run, config=config, stop=stop)
ray.shutdown()
I have followed the same example from custom environment example except I tweaked it to run indefinitely.
Thanks. One more thing, can you make sure the script runs? For example, I get
Traceback (most recent call last):
File "test.py", line 2, in <module>
class DQNTorchModel(TorchModelV2, nn.Module):
NameError: name 'TorchModelV2' is not defined
import argparse
import gym
from gym.spaces import Discrete, Box
import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
# from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.dqn.dqn import DQNTrainer
from ray.tune.logger import pretty_print
from ray.rllib.utils.test_utils import check
tf = try_import_tf()
torch, nn = try_import_torch()
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="DQN")
parser.add_argument("--torch", action="store_true")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--stop-iters", type=int, default=500000)
parser.add_argument("--stop-timesteps", type=int, default=10000000)
parser.add_argument("--stop-reward", type=float, default=500)
torch.multiprocessing.set_sharing_strategy('file_system')
After multiple failed attempts, the error is because of time limit associated with IO Socket being closed after certain amount of time. There must be some variable limiting the time factor of the socket @ericl
i get the same error with Tune
It's working for me on up to 50 iterations:
== Status ==
Memory usage on this node: 6.8/15.4 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/8 CPUs, 0/0 GPUs, 0.0/6.69 GiB heap, 0.0/2.29 GiB objects
Result logdir: /home/eric/ray_results/DQN
Number of trials: 1 (1 RUNNING)
+--------------------------------+----------+---------------------+--------+------------------+--------+----------+
| Trial name | status | loc | iter | total time (s) | ts | reward |
|--------------------------------+----------+---------------------+--------+------------------+--------+----------|
| DQN_SimpleCorridor_4af7b_00000 | RUNNING | 192.168.5.121:23706 | 51 | 157.748 | 209540 | -132.302 |
+--------------------------------+----------+---------------------+--------+------------------+--------+----------+
I'm guessing it may be some issue with the configuration of that machine, such as the amount of memory available. If low on memory, torch could crash throwing a similar error as above.
With SimpleEnv run it a bit longer until it breaks. It was breaking at 10 iterations with the custom environment I was using. So it is dependant on time rather than number of iterations. After a certain time it breaks. By the way I am using AWS EC2 instance having 32GB RAM, and 16GB GPU. So there should not be much of memory issue with the system.
On Sat, Jul 11, 2020, 07:19 Eric Liang notifications@github.com wrote:
It's working for me on up to 50 iterations:
== Status == Memory usage on this node: 6.8/15.4 GiB Using FIFO scheduling algorithm. Resources requested: 3/8 CPUs, 0/0 GPUs, 0.0/6.69 GiB heap, 0.0/2.29 GiB objects Result logdir: /home/eric/ray_results/DQN Number of trials: 1 (1 RUNNING) +--------------------------------+----------+---------------------+--------+------------------+--------+----------+ | Trial name | status | loc | iter | total time (s) | ts | reward | |--------------------------------+----------+---------------------+--------+------------------+--------+----------| | DQN_SimpleCorridor_4af7b_00000 | RUNNING | 192.168.5.121:23706 | 51 | 157.748 | 209540 | -132.302 | +--------------------------------+----------+---------------------+--------+------------------+--------+----------+
I'm guessing it may be some issue with the configuration of that machine, such as the amount of memory available. If low on memory, torch could crash throwing a similar error as above.
— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/ray-project/ray/issues/9293#issuecomment-656964535, or unsubscribe https://github.com/notifications/unsubscribe-auth/AFCJZDU2E2EVNIMJJBZW3U3R27ALHANCNFSM4OPVDNLQ .
== Status ==
Memory usage on this node: 13.0/30.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects
Result logdir: /home/ubuntu/ray_results/train
Number of trials: 1 (1 RUNNING)
+---------------------+----------+--------------------+--------+------------------+-------+----------+
| Trial name | status | loc | iter | total time (s) | ts | reward |
|---------------------+----------+--------------------+--------+------------------+-------+----------|
| train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 9 | 8974.08 | 92394 | -12644.8 |
+---------------------+----------+--------------------+--------+------------------+-------+----------+
= Status ==
Memory usage on this node: 11.9/30.9 GiB
Using FIFO scheduling algorithm.
Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects
Result logdir: /home/ubuntu/ray_results/train
Number of trials: 1 (1 RUNNING)
+---------------------+----------+--------------------+--------+------------------+--------+----------+
| Trial name | status | loc | iter | total time (s) | ts | reward |
|---------------------+----------+--------------------+--------+------------------+--------+----------|
| train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 12 | 14533.3 | 123192 | -9587.5 |
+---------------------+----------+--------------------+--------+------------------+--------+----------+
This was the last iteration log before breaking. This was on my custom environment.
(pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer
(pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses
Does the actor die after collect metrics timeout ? I have assigned it to 30. So does it rise an error because of that ? Node manger line which raises the error. After tracing it this is where it got me. Is this because some time out metrics or any other issue ?
(pid=15876) *** Aborted at 1594638925 (unix time) try "date -d @1594638925" if you are using GNU date ***
(pid=15876) PC: @ 0x0 (unknown)
(pid=15876) *** SIGSEGV (@0x0) received by PID 15876 (TID 0x7fb8c72bf740) from PID 0; stack trace: ***
(pid=15876) @ 0x7fb8c6eb88a0 (unknown)
(pid=15876) @ 0x7fb63f105340 THRefcountedMapAllocator::initializeAlloc()
(pid=15876) @ 0x7fb63f108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator()
(pid=15876) @ 0x7fb8ac3ccdb0 THManagedMapAllocator::THManagedMapAllocator()
(pid=15876) @ 0x7fb8ac3cce30 THManagedMapAllocator::makeDataPtr()
(pid=15876) @ 0x7fb681da8b80 THPLongStorage_newSharedFilename()
(pid=15876) @ 0x55ce239e9ad1 _PyCFunction_FastCallDict
(pid=15876) @ 0x55ce23a7967c call_function
(pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault
(pid=15876) @ 0x55ce23a74459 PyEval_EvalCodeEx
(pid=15876) @ 0x55ce23a75264 function_call
(pid=15876) @ 0x55ce239e999e PyObject_Call
(pid=15875) *** Aborted at 1594638926 (unix time) try "date -d @1594638926" if you are using GNU date ***
(pid=15876) @ 0x7fb8c3d85731 load
(pid=15875) PC: @ 0x0 (unknown)
(pid=15875) *** SIGSEGV (@0x0) received by PID 15875 (TID 0x7fc73d755740) from PID 0; stack trace: ***
(pid=15876) @ 0x7fb8c3d87712 _pickle_loads
(pid=15876) @ 0x55ce239e9c20 _PyCFunction_FastCallDict
(pid=15876) @ 0x55ce23a7967c call_function
(pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault
(pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName
(pid=15876) @ 0x55ce23a73941 fast_function
(pid=15876) @ 0x55ce23a79755 call_function
(pid=15876) @ 0x55ce23a9ca7a _PyEval_EvalFrameDefault
(pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName
(pid=15876) @ 0x55ce23a73941 fast_function
(pid=15876) @ 0x55ce23a79755 call_function
(pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault
(pid=15876) @ 0x55ce23a7370b fast_function
(pid=15876) @ 0x55ce23a79755 call_function
(pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault
(pid=15876) @ 0x55ce23a73d7b _PyFunction_FastCallDict
(pid=15876) @ 0x55ce239e9f5f _PyObject_FastCallDict
(pid=15876) @ 0x55ce239eea03 _PyObject_Call_Prepend
(pid=15876) @ 0x55ce239e9d7b _PyObject_FastCallDict
(pid=15875) @ 0x7fc73d34e8a0 (unknown)
(pid=15875) @ 0x7fc4b3105340 THRefcountedMapAllocator::initializeAlloc()
(pid=15875) @ 0x7fc4b3108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator()
(pid=15875) @ 0x7fc721860db0 THManagedMapAllocator::THManagedMapAllocator()
(pid=15875) @ 0x7fc721860e30 THManagedMapAllocator::makeDataPtr()
(pid=15875) @ 0x7fc720f86b80 THPLongStorage_newSharedFilename()
(pid=15875) @ 0x55cbba861ad1 _PyCFunction_FastCallDict
(pid=15875) @ 0x55cbba8f167c call_function
(pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault
(pid=15875) @ 0x55cbba8ec459 PyEval_EvalCodeEx
(pid=15875) @ 0x55cbba8ed264 function_call
(pid=15875) @ 0x55cbba86199e PyObject_Call
(pid=15875) @ 0x7fc73a21b731 load
(pid=15875) @ 0x7fc73a21d712 _pickle_loads
(pid=15875) @ 0x55cbba861c20 _PyCFunction_FastCallDict
(pid=15875) @ 0x55cbba8f167c call_function
(pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault
(pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName
(pid=15875) @ 0x55cbba8eb941 fast_function
(pid=15875) @ 0x55cbba8f1755 call_function
(pid=15875) @ 0x55cbba914a7a _PyEval_EvalFrameDefault
(pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName
(pid=15875) @ 0x55cbba8eb941 fast_function
(pid=15875) @ 0x55cbba8f1755 call_function
(pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault
(pid=15875) @ 0x55cbba8eb70b fast_function
(pid=15875) @ 0x55cbba8f1755 call_function
(pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault
(pid=15875) @ 0x55cbba8ebd7b _PyFunction_FastCallDict
(pid=15875) @ 0x55cbba861f5f _PyObject_FastCallDict
(pid=15875) @ 0x55cbba866a03 _PyObject_Call_Prepend
(pid=15875) @ 0x55cbba861d7b _PyObject_FastCallDict
(pid=15882) E0713 11:15:30.381654 15982 task_manager.cc:306] Task failed: IOError: 14: Socket closed: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=e8651da9e26803b1156ccce50100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38}
(pid=15882) 2020-07-13 11:15:30,382 ERROR trainer.py:488 -- Error in train call, attempting to recover
(pid=15882) Traceback (most recent call last):
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 484, in train
(pid=15882) result = Trainable.train(self)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/tune/trainable.py", line 261, in train
(pid=15882) result = self._train()
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 142, in _train
(pid=15882) return self._train_exec_impl()
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 177, in _train_exec_impl
(pid=15882) res = next(self.train_exec_impl)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in __next__
(pid=15882) return next(self.built_iterator)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 888, in build_union
(pid=15882) item = next(it)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in __next__
(pid=15882) return next(self.built_iterator)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach
(pid=15882) for item in it:
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 396, in base_iterator
(pid=15882) yield ray.get(futures, timeout=timeout)
(pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/worker.py", line 1517, in get
(pid=15882) raise value
(pid=15882) ray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.
(pid=15882) 2020-07-13 11:15:30,383 INFO trainer.py:968 -- Health checking all workers...
(pid=15882) E0713 11:15:30.383968 15982 task_manager.cc:306] Task failed: IOError: 14: failed to connect to all addresses: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=d695763a22fb4d0c156ccce50100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39}
(pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer
(pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses
(pid=15882) E0713 11:15:31.656106 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=4ad6f4424b7be803ad04959e0100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38}
(pid=15882) E0713 11:15:31.656229 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=e18e08bc394e9510ad04959e0100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39}
I cannot reproduce the crash on my laptop, it runs forever without error.
What if you try num_cpus: 0? Maybe it is a GPU only issue?
On Mon, Jul 13, 2020, 4:29 AM chandramoulirajagopalan < notifications@github.com> wrote:
== Status == Memory usage on this node: 13.0/30.9 GiB Using FIFO scheduling algorithm. Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects Result logdir: /home/ubuntu/ray_results/train Number of trials: 1 (1 RUNNING) +---------------------+----------+--------------------+--------+------------------+-------+----------+ | Trial name | status | loc | iter | total time (s) | ts | reward | |---------------------+----------+--------------------+--------+------------------+-------+----------| | train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 9 | 8974.08 | 92394 | -12644.8 | +---------------------+----------+--------------------+--------+------------------+-------+----------+ = Status == Memory usage on this node: 11.9/30.9 GiB Using FIFO scheduling algorithm. Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects Result logdir: /home/ubuntu/ray_results/train Number of trials: 1 (1 RUNNING) +---------------------+----------+--------------------+--------+------------------+--------+----------+ | Trial name | status | loc | iter | total time (s) | ts | reward | |---------------------+----------+--------------------+--------+------------------+--------+----------| | train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 12 | 14533.3 | 123192 | -9587.5 | +---------------------+----------+--------------------+--------+------------------+--------+----------+
This was the last iteration log before breaking. This was on my custom environment.
(pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer (pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses
Does the actor die after certain time of update ?
(pid=15876) Aborted at 1594638925 (unix time) try "date -d @1594638925" if you are using GNU date (pid=15876) PC: @ 0x0 (unknown) (pid=15876) SIGSEGV (@0x0) received by PID 15876 (TID 0x7fb8c72bf740) from PID 0; stack trace: (pid=15876) @ 0x7fb8c6eb88a0 (unknown) (pid=15876) @ 0x7fb63f105340 THRefcountedMapAllocator::initializeAlloc() (pid=15876) @ 0x7fb63f108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator() (pid=15876) @ 0x7fb8ac3ccdb0 THManagedMapAllocator::THManagedMapAllocator() (pid=15876) @ 0x7fb8ac3cce30 THManagedMapAllocator::makeDataPtr() (pid=15876) @ 0x7fb681da8b80 THPLongStorage_newSharedFilename() (pid=15876) @ 0x55ce239e9ad1 _PyCFunction_FastCallDict (pid=15876) @ 0x55ce23a7967c call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a74459 PyEval_EvalCodeEx (pid=15876) @ 0x55ce23a75264 function_call (pid=15876) @ 0x55ce239e999e PyObject_Call (pid=15875) Aborted at 1594638926 (unix time) try "date -d @1594638926" if you are using GNU date (pid=15876) @ 0x7fb8c3d85731 load (pid=15875) PC: @ 0x0 (unknown) (pid=15875) SIGSEGV (@0x0) received by PID 15875 (TID 0x7fc73d755740) from PID 0; stack trace: (pid=15876) @ 0x7fb8c3d87712 _pickle_loads (pid=15876) @ 0x55ce239e9c20 _PyCFunction_FastCallDict (pid=15876) @ 0x55ce23a7967c call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName (pid=15876) @ 0x55ce23a73941 fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9ca7a _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName (pid=15876) @ 0x55ce23a73941 fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a7370b fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a73d7b _PyFunction_FastCallDict (pid=15876) @ 0x55ce239e9f5f _PyObject_FastCallDict (pid=15876) @ 0x55ce239eea03 _PyObject_Call_Prepend (pid=15876) @ 0x55ce239e9d7b _PyObject_FastCallDict (pid=15875) @ 0x7fc73d34e8a0 (unknown) (pid=15875) @ 0x7fc4b3105340 THRefcountedMapAllocator::initializeAlloc() (pid=15875) @ 0x7fc4b3108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator() (pid=15875) @ 0x7fc721860db0 THManagedMapAllocator::THManagedMapAllocator() (pid=15875) @ 0x7fc721860e30 THManagedMapAllocator::makeDataPtr() (pid=15875) @ 0x7fc720f86b80 THPLongStorage_newSharedFilename() (pid=15875) @ 0x55cbba861ad1 _PyCFunction_FastCallDict (pid=15875) @ 0x55cbba8f167c call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8ec459 PyEval_EvalCodeEx (pid=15875) @ 0x55cbba8ed264 function_call (pid=15875) @ 0x55cbba86199e PyObject_Call (pid=15875) @ 0x7fc73a21b731 load (pid=15875) @ 0x7fc73a21d712 _pickle_loads (pid=15875) @ 0x55cbba861c20 _PyCFunction_FastCallDict (pid=15875) @ 0x55cbba8f167c call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName (pid=15875) @ 0x55cbba8eb941 fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba914a7a _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName (pid=15875) @ 0x55cbba8eb941 fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eb70b fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8ebd7b _PyFunction_FastCallDict (pid=15875) @ 0x55cbba861f5f _PyObject_FastCallDict (pid=15875) @ 0x55cbba866a03 _PyObject_Call_Prepend (pid=15875) @ 0x55cbba861d7b _PyObject_FastCallDict (pid=15882) E0713 11:15:30.381654 15982 task_manager.cc:306] Task failed: IOError: 14: Socket closed: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=e8651da9e26803b1156ccce50100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38} (pid=15882) 2020-07-13 11:15:30,382 ERROR trainer.py:488 -- Error in train call, attempting to recover (pid=15882) Traceback (most recent call last): (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 484, in train (pid=15882) result = Trainable.train(self) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/tune/trainable.py", line 261, in train (pid=15882) result = self._train() (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 142, in _train (pid=15882) return self._train_exec_impl() (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 177, in _train_exec_impl (pid=15882) res = next(self.train_exec_impl) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in next (pid=15882) return next(self.built_iterator) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 888, in build_union (pid=15882) item = next(it) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in next (pid=15882) return next(self.built_iterator) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 396, in base_iterator (pid=15882) yield ray.get(futures, timeout=timeout) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/worker.py", line 1517, in get (pid=15882) raise value (pid=15882) ray.exceptions.RayActorError: The actor died unexpectedly before finishing this task. (pid=15882) 2020-07-13 11:15:30,383 INFO trainer.py:968 -- Health checking all workers... (pid=15882) E0713 11:15:30.383968 15982 task_manager.cc:306] Task failed: IOError: 14: failed to connect to all addresses: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=d695763a22fb4d0c156ccce50100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39} (pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer (pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses (pid=15882) E0713 11:15:31.656106 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=4ad6f4424b7be803ad04959e0100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38} (pid=15882) E0713 11:15:31.656229 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=e18e08bc394e9510ad04959e0100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39}
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/ray-project/ray/issues/9293#issuecomment-657501159, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAADUSVZ2JFN3GHEPA2R3BTR3LVX7ANCNFSM4OPVDNLQ .
Sorry try num_gpus: 0?
On Mon, Jul 13, 2020, 9:49 AM Eric Liang ekhliang@gmail.com wrote:
I cannot reproduce the crash on my laptop, it runs forever without error.
What if you try num_cpus: 0? Maybe it is a GPU only issue?
On Mon, Jul 13, 2020, 4:29 AM chandramoulirajagopalan < notifications@github.com> wrote:
== Status == Memory usage on this node: 13.0/30.9 GiB Using FIFO scheduling algorithm. Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects Result logdir: /home/ubuntu/ray_results/train Number of trials: 1 (1 RUNNING) +---------------------+----------+--------------------+--------+------------------+-------+----------+ | Trial name | status | loc | iter | total time (s) | ts | reward | |---------------------+----------+--------------------+--------+------------------+-------+----------| | train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 9 | 8974.08 | 92394 | -12644.8 | +---------------------+----------+--------------------+--------+------------------+-------+----------+ = Status == Memory usage on this node: 11.9/30.9 GiB Using FIFO scheduling algorithm. Resources requested: 2/8 CPUs, 0.1/1 GPUs, 0.0/16.6 GiB heap, 0.0/5.71 GiB objects Result logdir: /home/ubuntu/ray_results/train Number of trials: 1 (1 RUNNING) +---------------------+----------+--------------------+--------+------------------+--------+----------+ | Trial name | status | loc | iter | total time (s) | ts | reward | |---------------------+----------+--------------------+--------+------------------+--------+----------| | train_EndoEnv_00000 | RUNNING | 172.31.8.135:15882 | 12 | 14533.3 | 123192 | -9587.5 | +---------------------+----------+--------------------+--------+------------------+--------+----------+
This was the last iteration log before breaking. This was on my custom environment.
(pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer (pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses
Does the actor die after certain time of update ?
(pid=15876) Aborted at 1594638925 (unix time) try "date -d @1594638925" if you are using GNU date (pid=15876) PC: @ 0x0 (unknown) (pid=15876) SIGSEGV (@0x0) received by PID 15876 (TID 0x7fb8c72bf740) from PID 0; stack trace: (pid=15876) @ 0x7fb8c6eb88a0 (unknown) (pid=15876) @ 0x7fb63f105340 THRefcountedMapAllocator::initializeAlloc() (pid=15876) @ 0x7fb63f108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator() (pid=15876) @ 0x7fb8ac3ccdb0 THManagedMapAllocator::THManagedMapAllocator() (pid=15876) @ 0x7fb8ac3cce30 THManagedMapAllocator::makeDataPtr() (pid=15876) @ 0x7fb681da8b80 THPLongStorage_newSharedFilename() (pid=15876) @ 0x55ce239e9ad1 _PyCFunction_FastCallDict (pid=15876) @ 0x55ce23a7967c call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a74459 PyEval_EvalCodeEx (pid=15876) @ 0x55ce23a75264 function_call (pid=15876) @ 0x55ce239e999e PyObject_Call (pid=15875) Aborted at 1594638926 (unix time) try "date -d @1594638926" if you are using GNU date (pid=15876) @ 0x7fb8c3d85731 load (pid=15875) PC: @ 0x0 (unknown) (pid=15875) SIGSEGV (@0x0) received by PID 15875 (TID 0x7fc73d755740) from PID 0; stack trace: (pid=15876) @ 0x7fb8c3d87712 _pickle_loads (pid=15876) @ 0x55ce239e9c20 _PyCFunction_FastCallDict (pid=15876) @ 0x55ce23a7967c call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName (pid=15876) @ 0x55ce23a73941 fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9ca7a _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a72a94 _PyEval_EvalCodeWithName (pid=15876) @ 0x55ce23a73941 fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a7370b fast_function (pid=15876) @ 0x55ce23a79755 call_function (pid=15876) @ 0x55ce23a9bcba _PyEval_EvalFrameDefault (pid=15876) @ 0x55ce23a73d7b _PyFunction_FastCallDict (pid=15876) @ 0x55ce239e9f5f _PyObject_FastCallDict (pid=15876) @ 0x55ce239eea03 _PyObject_Call_Prepend (pid=15876) @ 0x55ce239e9d7b _PyObject_FastCallDict (pid=15875) @ 0x7fc73d34e8a0 (unknown) (pid=15875) @ 0x7fc4b3105340 THRefcountedMapAllocator::initializeAlloc() (pid=15875) @ 0x7fc4b3108bc5 THRefcountedMapAllocator::THRefcountedMapAllocator() (pid=15875) @ 0x7fc721860db0 THManagedMapAllocator::THManagedMapAllocator() (pid=15875) @ 0x7fc721860e30 THManagedMapAllocator::makeDataPtr() (pid=15875) @ 0x7fc720f86b80 THPLongStorage_newSharedFilename() (pid=15875) @ 0x55cbba861ad1 _PyCFunction_FastCallDict (pid=15875) @ 0x55cbba8f167c call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8ec459 PyEval_EvalCodeEx (pid=15875) @ 0x55cbba8ed264 function_call (pid=15875) @ 0x55cbba86199e PyObject_Call (pid=15875) @ 0x7fc73a21b731 load (pid=15875) @ 0x7fc73a21d712 _pickle_loads (pid=15875) @ 0x55cbba861c20 _PyCFunction_FastCallDict (pid=15875) @ 0x55cbba8f167c call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName (pid=15875) @ 0x55cbba8eb941 fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba914a7a _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eaa94 _PyEval_EvalCodeWithName (pid=15875) @ 0x55cbba8eb941 fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8eb70b fast_function (pid=15875) @ 0x55cbba8f1755 call_function (pid=15875) @ 0x55cbba913cba _PyEval_EvalFrameDefault (pid=15875) @ 0x55cbba8ebd7b _PyFunction_FastCallDict (pid=15875) @ 0x55cbba861f5f _PyObject_FastCallDict (pid=15875) @ 0x55cbba866a03 _PyObject_Call_Prepend (pid=15875) @ 0x55cbba861d7b _PyObject_FastCallDict (pid=15882) E0713 11:15:30.381654 15982 task_manager.cc:306] Task failed: IOError: 14: Socket closed: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=e8651da9e26803b1156ccce50100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38} (pid=15882) 2020-07-13 11:15:30,382 ERROR trainer.py:488 -- Error in train call, attempting to recover (pid=15882) Traceback (most recent call last): (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 484, in train (pid=15882) result = Trainable.train(self) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/tune/trainable.py", line 261, in train (pid=15882) result = self._train() (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 142, in _train (pid=15882) return self._train_exec_impl() (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 177, in _train_exec_impl (pid=15882) res = next(self.train_exec_impl) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in next (pid=15882) return next(self.built_iterator) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 685, in apply_filter (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 888, in build_union (pid=15882) item = next(it) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 634, in next (pid=15882) return next(self.built_iterator) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 644, in apply_foreach (pid=15882) for item in it: (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/util/iter.py", line 396, in base_iterator (pid=15882) yield ray.get(futures, timeout=timeout) (pid=15882) File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/ray/worker.py", line 1517, in get (pid=15882) raise value (pid=15882) ray.exceptions.RayActorError: The actor died unexpectedly before finishing this task. (pid=15882) 2020-07-13 11:15:30,383 INFO trainer.py:968 -- Health checking all workers... (pid=15882) E0713 11:15:30.383968 15982 task_manager.cc:306] Task failed: IOError: 14: failed to connect to all addresses: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=d695763a22fb4d0c156ccce50100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=156ccce50100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39} (pid=raylet) E0713 11:15:30.381047 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: Connection reset by peer (pid=raylet) E0713 11:15:30.566490 15860 node_manager.cc:3537] Failed to send get core worker stats request: IOError: 14: failed to connect to all addresses (pid=15882) E0713 11:15:31.656106 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=par_iter_next, function_hash=}, task_id=4ad6f4424b7be803ad04959e0100, job_id=0100, num_args=0, num_returns=2, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=38} (pid=15882) E0713 11:15:31.656229 15982 task_manager.cc:306] Task failed: IOError: 14: Connection reset by peer: Type=ACTOR_TASK, Language=PYTHON, function_descriptor={type=PythonFunctionDescriptor, module_name=ray.rllib.evaluation.rollout_worker, class_name=RolloutWorker, function_name=sample_with_count, function_hash=}, task_id=e18e08bc394e9510ad04959e0100, job_id=0100, num_args=0, num_returns=3, actor_task_spec={actor_id=ad04959e0100, actor_caller_id=ffffffffffffffff45b95b1c0100, actor_counter=39}
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/ray-project/ray/issues/9293#issuecomment-657501159, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAADUSVZ2JFN3GHEPA2R3BTR3LVX7ANCNFSM4OPVDNLQ .
Yeah I will try that with num_gpus : 0
It breaks even with num_gpus: 0, Is there any command to be run before running the rllib model ? Connection getting closed or reset after certain constant amount of time. Is there any other issue because of which it is failing ? @ericl . Any configuration in my system that generates this error ?
Hi! When I train an agent with
for _ in range(10):
trainer.train()
how many time steps are taken in one iteration? In offline RL does an iteration mean going through all the episodes that exist in the dataset? If anyone can help me, I appreciate it very much!
Hi, I'm a bot from the Ray team :)
To help human contributors to focus on more relevant issues, I will automatically add the stale label to issues that have had no activity for more than 4 months.
If there is no further activity in the 14 days, the issue will be closed!
You can always ask for help on our discussion forum or Ray's public slack channel.
Hi again! The issue will be closed because there has been no more activity in the 14 days since the last message.
Please feel free to reopen or open a new issue if you'd still like it to be addressed.
Again, you can always ask for help on our discussion forum or Ray's public slack channel.
Thanks again for opening the issue!
trainer automatically dies after certain time?
Ray version (0.8.5), and other system information (Python version(3.6.0), Pytorch version, Ubuntu - 16.04):
Reproduction:
If we cannot run your script, we cannot fix your issue.