FinRL_Ensemble_StockTrading error for NIFTY_50

Soumadip-Saha commented 2 years ago

I tried to solve the error for the NaN value according to this reference but after the preprocessing is done correctly I got this error in training. Can anyone please help me out?

`ValueError Traceback (most recent call last)

in () 2 PPO_model_kwargs, 3 DDPG_model_kwargs, ----> 4 timesteps_dict) 9 frames /usr/local/lib/python3.7/dist-packages/finrl/drl_agents/stablebaselines3/models.py in run_ensemble_strategy(self, A2C_model_kwargs, PPO_model_kwargs, DDPG_model_kwargs, timesteps_dict) 468 tb_log_name="a2c_{}".format(i), 469 iter_num=i, --> 470 total_timesteps=timesteps_dict["a2c"], 471 ) # 100_000 472 /usr/local/lib/python3.7/dist-packages/finrl/drl_agents/stablebaselines3/models.py in train_model(model, model_name, tb_log_name, iter_num, total_timesteps) 202 total_timesteps=total_timesteps, 203 tb_log_name=tb_log_name, --> 204 callback=TensorboardCallback(), 205 ) 206 model.save( /usr/local/lib/python3.7/dist-packages/stable_baselines3/a2c/a2c.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps) 199 tb_log_name=tb_log_name, 200 eval_log_path=eval_log_path, --> 201 reset_num_timesteps=reset_num_timesteps, 202 ) /usr/local/lib/python3.7/dist-packages/stable_baselines3/common/on_policy_algorithm.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps) 235 while self.num_timesteps < total_timesteps: 236 --> 237 continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps) 238 239 if continue_training is False: /usr/local/lib/python3.7/dist-packages/stable_baselines3/common/on_policy_algorithm.py in collect_rollouts(self, env, callback, rollout_buffer, n_rollout_steps) 167 # Convert to pytorch tensor or to TensorDict 168 obs_tensor = obs_as_tensor(self._last_obs, self.device) --> 169 actions, values, log_probs = self.policy.forward(obs_tensor) 170 actions = actions.cpu().numpy() 171 /usr/local/lib/python3.7/dist-packages/stable_baselines3/common/policies.py in forward(self, obs, deterministic) 588 # Evaluate the values for the given observations 589 values = self.value_net(latent_vf) --> 590 distribution = self._get_action_dist_from_latent(latent_pi) 591 actions = distribution.get_actions(deterministic=deterministic) 592 log_prob = distribution.log_prob(actions) /usr/local/lib/python3.7/dist-packages/stable_baselines3/common/policies.py in _get_action_dist_from_latent(self, latent_pi) 603 604 if isinstance(self.action_dist, DiagGaussianDistribution): --> 605 return self.action_dist.proba_distribution(mean_actions, self.log_std) 606 elif isinstance(self.action_dist, CategoricalDistribution): 607 # Here mean_actions are the logits before the softmax /usr/local/lib/python3.7/dist-packages/stable_baselines3/common/distributions.py in proba_distribution(self, mean_actions, log_std) 150 """ 151 action_std = th.ones_like(mean_actions) * log_std.exp() --> 152 self.distribution = Normal(mean_actions, action_std) 153 return self 154 /usr/local/lib/python3.7/dist-packages/torch/distributions/normal.py in __init__(self, loc, scale, validate_args) 48 else: 49 batch_shape = self.loc.size() ---> 50 super(Normal, self).__init__(batch_shape, validate_args=validate_args) 51 52 def expand(self, batch_shape, _instance=None): /usr/local/lib/python3.7/dist-packages/torch/distributions/distribution.py in __init__(self, batch_shape, event_shape, validate_args) 54 if not valid.all(): 55 raise ValueError( ---> 56 f"Expected parameter {param} " 57 f"({type(value).__name__} of shape {tuple(value.shape)}) " 58 f"of distribution {repr(self)} " ValueError: Expected parameter loc (Tensor of shape (1, 47)) of distribution Normal(loc: torch.Size([1, 47]), scale: torch.Size([1, 47])) to satisfy the constraint Real(), but found invalid values: tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])`

rayrui312 commented 2 years ago

Thanks for reporting it. We will check it.

Lan911 commented 2 years ago

I have the same issue.

`Using cuda device

| time/ | | | fps | 49 | | iterations | 1 | | time_elapsed | 41 | | total_timesteps | 2048 | | train/ | | | reward | -10553.424 |

Traceback (most recent call last): File "/home/FinRL-master/test_traing_and_testing.py", line 93, in total_timesteps=50000) File "/home/FinRL-master/finrl/drl_agents/stablebaselines3/models.py", line 105, in train_model callback=TensorboardCallback(), File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/ppo/ppo.py", line 308, in learn reset_num_timesteps=reset_num_timesteps, File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 257, in learn self.train() File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/ppo/ppo.py", line 199, in train values, log_prob, entropy = self.policy.evaluate_actions(rollout_data.observations, actions) File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/common/policies.py", line 643, in evaluate_actions distribution = self._get_action_dist_from_latent(latent_pi) File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/common/policies.py", line 605, in _get_action_dist_from_latent return self.action_dist.proba_distribution(mean_actions, self.log_std) File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/stable_baselines3/common/distributions.py", line 152, in proba_distribution self.distribution = Normal(mean_actions, action_std) File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/torch/distributions/normal.py", line 50, in init super(Normal, self).init(batch_shape, validate_args=validate_args) File "/opt/conda/envs/FinRL/lib/python3.7/site-packages/torch/distributions/distribution.py", line 56, in init f"Expected parameter {param} " ValueError: Expected parameter loc (Tensor of shape (128, 41)) of distribution Normal(loc: torch.Size([128, 41]), scale: torch.Size([128, 41])) to satisfy the constraint Real(), but found invalid values: tensor([[nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], ..., [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan]], device='cuda:0', grad_fn=) `

Soumadip-Saha commented 2 years ago

Thanks for reporting it. We will check it.

Is there any update regarding the issue?

ashmitsharma commented 1 year ago

@XiaoYangLiu-FinRL @rayrui312 @Soumadip-Saha @Lan911 @ndronen I am also facing the same issue. Is there any update regarding it ?

AI4Finance-Foundation / FinRL

FinRL_Ensemble_StockTrading error for NIFTY_50 #384

`Using cuda device

| time/ | | | fps | 49 | | iterations | 1 | | time_elapsed | 41 | | total_timesteps | 2048 | | train/ | | | reward | -10553.424 |