AI4Finance-Foundation / FinRL

FinRL: Financial Reinforcement Learning. 🔥
https://ai4finance.org
MIT License
9.72k stars 2.36k forks source link

FinRL - RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn #576

Open khanhphan1311 opened 2 years ago

khanhphan1311 commented 2 years ago

I tried FinRL and got this error: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Here's the code:

from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

env_kwargs = {
    "stock_dim": stock_dimension,
    "hmax": 10000, 
    "initial_amount": 1000000,
    "num_stock_shares": [0] * stock_dimension,
    "buy_cost_pct":[1e-3] * stock_dimension,
    "sell_cost_pct":[1e-3] * stock_dimension,
    "reward_scaling": 1e-4,
    "state_space": state_space, 
    "action_space": stock_dimension,
    "tech_indicator_list": indicators, 
    "print_verbosity": 1,
}

e_train_gym = StockTradingEnv(df=train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env=env_train)
DDPG_PARAMS = {
               "batch_size": 256, 
               "buffer_size": 50000, 
               "learning_rate": 0.0005,
               "action_noise":"normal",
                }
model_ddpg = agent.get_model("ddpg", model_kwargs = DDPG_PARAMS)
trained_ddpg = agent.train_model(model=model_ddpg, 
                              tb_log_name='ddpg',
                              total_timesteps=10000)

The error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [228], in <cell line: 1>()
----> 1 trained_ddpg = agent.train_model(model=model_ddpg, 
      2                               tb_log_name='ddpg',
      3                               total_timesteps=10000)

File D:\00_PYTHON_PROJECT\_1_Practice\finrl_clone\FinRL\finrl\agents\stablebaselines3\models.py:100, in DRLAgent.train_model(self, model, tb_log_name, total_timesteps)
     99 def train_model(self, model, tb_log_name, total_timesteps=5000):
--> 100     model = model.learn(
    101         total_timesteps=total_timesteps,
    102         tb_log_name=tb_log_name,
    103         callback=TensorboardCallback(),
    104     )
    105     return model

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\stable_baselines3\ddpg\ddpg.py:130, in DDPG.learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
    117 def learn(
    118     self,
    119     total_timesteps: int,
   (...)
    127     reset_num_timesteps: bool = True,
    128 ) -> OffPolicyAlgorithm:
--> 130     return super(DDPG, self).learn(
    131         total_timesteps=total_timesteps,
    132         callback=callback,
    133         log_interval=log_interval,
    134         eval_env=eval_env,
    135         eval_freq=eval_freq,
    136         n_eval_episodes=n_eval_episodes,
    137         tb_log_name=tb_log_name,
    138         eval_log_path=eval_log_path,
    139         reset_num_timesteps=reset_num_timesteps,
    140     )

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\stable_baselines3\td3\td3.py:205, in TD3.learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
    192 def learn(
    193     self,
    194     total_timesteps: int,
   (...)
    202     reset_num_timesteps: bool = True,
    203 ) -> OffPolicyAlgorithm:
--> 205     return super(TD3, self).learn(
    206         total_timesteps=total_timesteps,
    207         callback=callback,
    208         log_interval=log_interval,
    209         eval_env=eval_env,
    210         eval_freq=eval_freq,
    211         n_eval_episodes=n_eval_episodes,
    212         tb_log_name=tb_log_name,
    213         eval_log_path=eval_log_path,
    214         reset_num_timesteps=reset_num_timesteps,
    215     )

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\stable_baselines3\common\off_policy_algorithm.py:366, in OffPolicyAlgorithm.learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
    364         # Special case when the user passes `gradient_steps=0`
    365         if gradient_steps > 0:
--> 366             self.train(batch_size=self.batch_size, gradient_steps=gradient_steps)
    368 callback.on_training_end()
    370 return self

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\stable_baselines3\td3\td3.py:170, in TD3.train(self, gradient_steps, batch_size)
    168 # Optimize the critics
    169 self.critic.optimizer.zero_grad()
--> 170 critic_loss.backward()
    171 self.critic.optimizer.step()
    173 # Delayed policy updates

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\torch\_tensor.py:363, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    354 if has_torch_function_unary(self):
    355     return handle_torch_function(
    356         Tensor.backward,
    357         (self,),
   (...)
    361         create_graph=create_graph,
    362         inputs=inputs)
--> 363 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)

File D:\1. APPS\Anaconda\envs\finrl\lib\site-packages\torch\autograd\__init__.py:173, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    168     retain_graph = create_graph
    170 # The reason we repeat same the comment below is that
    171 # some Python versions print out the first line of a multi-line function
    172 # calls in the traceback and some print out the last line
--> 173 Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    174     tensors, grad_tensors_, retain_graph, create_graph, inputs,
    175     allow_unreachable=True, accumulate_grad=True)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
Athe-kunal commented 2 years ago

Hi @khanhphan1311 I can see here that the DDPG_PARAMS for action_noise is actually noise_type. Can you pass noise_type instead of action_noise? Please let me know if it is fixed or you are still facing some errors