AI4Finance-Foundation / FinRL

FinRL: Financial Reinforcement Learning. 🔥
https://ai4finance.org
MIT License
9.71k stars 2.35k forks source link

why action not any change after episode 3? #1156

Open maga110 opened 8 months ago

maga110 commented 8 months ago

Describe the bug

i want to record training action logs, and check transaction process, but i found action logs(buy or sell) not any change after episode 3.

To Reproduce

in order to record action logs, i add a patch:

--- env_stocktrading.py 2024-01-16 10:10:50
+++ my_env_stocktrading.py  2024-01-16 10:11:03
@@ -74,6 +74,7 @@ class StockTradingEnv(gym.Env):
         self.iteration = iteration
         # initalize state
         self.state = self._initiate_state()
+        self.action_logs = []

         # initialize reward
         self.reward = 0
\ No newline at end of file
@@ -117,6 +118,9 @@ class StockTradingEnv(gym.Env):
                         * sell_num_shares
                         * (1 - self.sell_cost_pct[index])
                     )
+                    op_log = f"{self.data.iloc[index]['date']} sell {self.data.iloc[index]['tic']} {sell_num_shares} {self.data.iloc[index]['close']}, cost {sell_amount}"
+                    self.action_logs.append(op_log)
+
                     # update balance
                     self.state[0] += sell_amount

\ No newline at end of file
@@ -187,6 +191,9 @@ class StockTradingEnv(gym.Env):
                     * buy_num_shares
                     * (1 + self.buy_cost_pct[index])
                 )
+                op_log = f"{self.data.iloc[index]['date']} buy {self.data.iloc[index]['tic']} {buy_num_shares} {self.data.iloc[index]['close']}, cost {buy_amount}"
+                self.action_logs.append(op_log)
+
                 self.state[0] -= buy_amount

                 self.state[index + self.stock_dim + 1] += buy_num_shares
\ No newline at end of file
@@ -220,6 +227,12 @@ class StockTradingEnv(gym.Env):
     def step(self, actions):
         self.terminal = self.day >= len(self.df.index.unique()) - 1
         if self.terminal:
+            act_log_fn = f'./results/episode_{self.episode}_actions.log'
+            with open(act_log_fn, 'w') as f:
+                tmp = '\n'.join(self.action_logs)
+                f.write(tmp)
+            self.action_logs = []
+
             # print(f"Episode: {self.episode}")
             if self.make_plots:
                 self._make_plot()
\ No newline at end of file

check the files under ./results/episode_{self.episode}_actions.log after training is complete, i found agent actions not any changs after episode 3:

MD5 (episode_10_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_11_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_12_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_13_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_14_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_15_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_16_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_17_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_18_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_19_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_2_actions.log) = ff7ef83f4b43d9fe443bfea9da1961f3
MD5 (episode_3_actions.log) = 94ef84d7b36636a4a4284aa72708456f
MD5 (episode_4_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_5_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_6_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_7_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_8_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6
MD5 (episode_9_actions.log) = 9ef06f3bb23b879346aa8db04dff93a6

Expected behavior

The agent should continuously learn during the training process and generate different transaction logs.

maga110 commented 8 months ago

use code:

import os
import pandas as pd

# from my_env_stocktrading import StockTradingEnv
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

check_and_make_directories([TRAINED_MODEL_DIR])

fn = 'train_data.csv'
# fn = 'my_train.csv'
train = pd.read_csv(fn, dtype={'tic': str})
print(train)

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']

print(train)

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    "model_name": "ddpg",
    "mode": f"{stock_dimension}stocks",
    "make_plots": True,
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = False
if_using_td3 = False
if_using_sac = False

agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None