Open maga110 opened 8 months ago
use code:
import os
import pandas as pd
# from my_env_stocktrading import StockTradingEnv
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
check_and_make_directories([TRAINED_MODEL_DIR])
fn = 'train_data.csv'
# fn = 'my_train.csv'
train = pd.read_csv(fn, dtype={'tic': str})
print(train)
# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
print(train)
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
env_kwargs = {
"hmax": 100,
"initial_amount": 1000000,
"num_stock_shares": num_stock_shares,
"buy_cost_pct": buy_cost_list,
"sell_cost_pct": sell_cost_list,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": INDICATORS,
"action_space": stock_dimension,
"reward_scaling": 1e-4,
"model_name": "ddpg",
"mode": f"{stock_dimension}stocks",
"make_plots": True,
}
e_train_gym = StockTradingEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = False
if_using_td3 = False
if_using_sac = False
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")
if if_using_ddpg:
# set up logger
tmp_path = RESULTS_DIR + '/ddpg'
new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
# Set new logger
model_ddpg.set_logger(new_logger_ddpg)
trained_ddpg = agent.train_model(model=model_ddpg,
tb_log_name='ddpg',
total_timesteps=50000) if if_using_ddpg else None
trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None
Describe the bug
i want to record training action logs, and check transaction process, but i found action logs(buy or sell) not any change after episode 3.
To Reproduce
in order to record action logs, i add a patch:
check the files under ./results/episode_{self.episode}_actions.log after training is complete, i found agent actions not any changs after episode 3:
Expected behavior
The agent should continuously learn during the training process and generate different transaction logs.