AI4Finance-Foundation / FinRL

FinRL: Financial Reinforcement Learning. 🔥
https://ai4finance.org
MIT License
9.65k stars 2.34k forks source link

ValueError: cannot copy sequence with size 840 to array axis with dimension 831 #137

Closed Youbadawy closed 3 years ago

Youbadawy commented 3 years ago

for some reason when I work on Crypto and try to run the following code

env_train, _ = e_train_gym.get_sb_env()

I get this error ValueError: cannot copy sequence with size 840 to array axis with dimension 831

I remember solving it but I do not recall exactly why,

the environment used below:

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config["TECHNICAL_INDICATORS_LIST"])*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config["TECHNICAL_INDICATORS_LIST"], 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4

}
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

Thanks,

Tarun-Khilani commented 3 years ago

@Youbadawy I believe the cause to be time series mismatch. Can you share snippet or code after fetching the data and before this state space.

Youbadawy commented 3 years ago

@Tarun-Khilani you are absolutely right, I have the issue between my observation_space = (-inf, inf, (state_space,)) and my initial state which receives a value of 673 due to the environment equation.

I am using the env_stocktrading example for multi stock, as shown below,

class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, 
                df, 
                stock_dim,
                hmax,                
                initial_amount,
                buy_cost_pct,
                sell_cost_pct,
                reward_scaling,
                state_space,
                action_space,
                tech_indicator_list,
                turbulence_threshold=None,
                make_plots = False, 
                print_verbosity = 10,
                day = 0, 
                initial=True,
                previous_state=[],
                model_name = '',
                mode='',
                iteration=''):
        self.day = day
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list
        self.action_space = spaces.Box(low = -1, high = 1,shape = (self.action_space,)) 
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.state_space,))
        self.data = self.df.loc[self.day,:]
        self.terminal = False     
        self.make_plots = make_plots
        self.print_verbosity = print_verbosity
        self.turbulence_threshold = turbulence_threshold
        self.initial = initial
        self.previous_state = previous_state
        self.model_name=model_name
        self.mode=mode 
        self.iteration=iteration
        # initalize state
        self.state = self._initiate_state()

        # initialize reward
        self.reward = 0
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.episode = 0
        # memorize all the total balance change
        self.asset_memory = [self.initial_amount]
        self.rewards_memory = []
        self.actions_memory=[]
        self.date_memory=[self._get_date()]
        #self.reset()
        self._seed()

    def reset(self):  
        #initiate state
        self.state = self._initiate_state()

        if self.initial:
            self.asset_memory = [self.initial_amount]
        else:
            previous_total_asset = self.previous_state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            self.asset_memory = [previous_total_asset]

        self.day = 0
        self.data = self.df.loc[self.day,:]
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False 
        # self.iteration=self.iteration
        self.rewards_memory = []
        self.actions_memory=[]
        self.date_memory=[self._get_date()]

        self.episode+=1

        return self.state

    def _initiate_state(self):
        if self.initial:
            # For Initial State
            if len(self.df.tic.unique())>1:
                # for multiple stock
                state = [self.initial_amount] + \
                         self.data.close.values.tolist() + \
                         [0]*self.stock_dim  + \
                         sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])
            else:
                # for single stock
                state = [self.initial_amount] + \
                        [self.data.close] + \
                        [0]*self.stock_dim  + \
                        sum([[self.data[tech]] for tech in self.tech_indicator_list ], [])
        else:
            #Using Previous State
            if len(self.df.tic.unique())>1:
                # for multiple stock
                state = [self.previous_state[0]] + \
                         self.data.close.values.tolist() + \
                         self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]  + \
                         sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])
            else:
                # for single stock
                state = [self.previous_state[0]] + \
                        [self.data.close] + \
                        self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]  + \
                        sum([[self.data[tech]] for tech in self.tech_indicator_list ], [])
        return state

    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs`

it is in this part of the code, that I am trying to figure out is why the state is defined as this, a series of lists added together like that?

            state = [self.initial_amount] + \
                      self.data.close.values.tolist() + \
                      [0]*self.stock_dim  + \
                      sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])

the above state definition is the one with length 673. What I am currently trying to figure out is do we define the order we input these variables, like how does the agent know what each variable within our list corresponds? and why does my state space give another length then the 673?

All these questions are meant to further explain the way we define our environment, and how the agent interacts with it, As much as I would like to fix the issue, I am seeking revelation so to speak!

for readability purposes I only kept the methods in question, but if you would like to see the rest of the class, it is the same as the one in the repository!

Youbadawy commented 3 years ago

@Tarun-Khilani

So basically to answer my own question about where they identify the variables from the state, we have in the step function the extraction of each piece of information. which in my opinion seems like an error prone way of defining your state, yet I am not an expert in the subject so I don't know the norms in the field.

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique())-1
        if self.terminal:
            # print(f"Episode: {self.episode}")
            if self.make_plots:
                self._make_plot()            
            end_total_asset = self.state[0]+ \
                sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            df_total_value = pd.DataFrame(self.asset_memory)
            tot_reward = self.state[0]+sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))- self.initial_amount 
            df_total_value.columns = ['account_value']
            df_total_value['date'] = self.date_memory
            df_total_value['daily_return']=df_total_value['account_value'].pct_change(1)
            if df_total_value['daily_return'].std() !=0:
                sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \
                      df_total_value['daily_return'].std()
            df_rewards = pd.DataFrame(self.rewards_memory)
            df_rewards.columns = ['account_rewards']
            df_rewards['date'] = self.date_memory[:-1]
            if self.episode % self.print_verbosity == 0:
                print(f"day: {self.day}, episode: {self.episode}")
                print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
                print(f"end_total_asset: {end_total_asset:0.2f}")
                print(f"total_reward: {tot_reward:0.2f}")
                print(f"total_cost: {self.cost:0.2f}")
                print(f"total_trades: {self.trades}")
                if df_total_value['daily_return'].std() != 0:
                    print(f"Sharpe: {sharpe:0.3f}")
                print("=================================")

            if (self.model_name!='') and (self.mode!=''):
                df_actions = self.save_action_memory()
                df_actions.to_csv('results/actions_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration))
                df_total_value.to_csv('results/account_value_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False)
                df_rewards.to_csv('results/account_rewards_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False)
                plt.plot(self.asset_memory,'r')
                plt.savefig('results/account_value_{}_{}_{}.png'.format(self.mode,self.model_name, self.iteration),index=False)
                plt.close()

            # Add outputs to logger interface
            logger.record("environment/portfolio_value", end_total_asset)
            logger.record("environment/total_reward", tot_reward)
            logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
            logger.record("environment/total_cost", self.cost)
            logger.record("environment/total_trades", self.trades)

            return self.state, self.reward, self.terminal, {}

        else:

            actions = actions * self.hmax #actions initially is scaled between 0 to 1
            actions = (actions.astype(int)) #convert into integer because we can't by fraction of shares
            if self.turbulence_threshold is not None:
                if self.turbulence>=self.turbulence_threshold:
                    actions=np.array([-self.hmax]*self.stock_dim)
            begin_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            #print("begin_total_asset:{}".format(begin_total_asset))

            argsort_actions = np.argsort(actions)

            sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]]

            for index in sell_index:
                # print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
                # print(f'take sell action before : {actions[index]}')
                actions[index] = self._sell_stock(index, actions[index]) * (-1)
                # print(f'take sell action after : {actions[index]}')
                # print(f"Num shares after: {self.state[index+self.stock_dim+1]}")

            for index in buy_index:
                # print('take buy action: {}'.format(actions[index]))
                actions[index] = self._buy_stock(index, actions[index])

            self.actions_memory.append(actions)

            self.day += 1
            self.data = self.df.loc[self.day,:]    
            if self.turbulence_threshold is not None:     
                self.turbulence = self.data['turbulence'].values[0]
            self.state =  self._update_state()

            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            self.asset_memory.append(end_total_asset)
            self.date_memory.append(self._get_date())
            self.reward = end_total_asset - begin_total_asset            
            self.rewards_memory.append(self.reward)
            self.reward = self.reward*self.reward_scaling

        return self.state, self.reward, self.terminal, {}
Youbadawy commented 3 years ago

@Tarun-Khilani

So I pinpointed the main issue, is due to the first "day", or in my case should be the first 5 minutes, I don't have the data of the full stock lists I inputted, which is causing the problem,

since my initial state is trying to acquire the list of close prices from all my 78 stocks, while it is only receiving 66.

in the init field I defining my self.data = self.df.loc[self.day,:] which takes into account what day we are in. and since my first day some of the stocks are missing it causes the error!

Thanks for the help @Tarun-Khilani I believe that I shall go on creating a new envrionment for a faster timeframe like the 5min, if you have any advice?

Tarun-Khilani commented 3 years ago

@Youbadawy yeah so I believe you can do some pre-processing to extract only that much common available data, as in ignore the first day for all stocks if that is only missing data for some stocks, rather than new environment.

Youbadawy commented 3 years ago

@Tarun-Khilani absolutely, it was an issue where I dropped all NA features, instead if I fill them with 0 it will work as well,

but wouldnt the increase in 0s affect the training and agent learning,

like a fraud detection system with mostly non fraud training dataset will just focus on predicting non fraud everytime?

Tarun-Khilani commented 3 years ago

@Youbadawy That is why it would be better to drop for other stocks as well or you can fill with previous value rather than 0.

Youbadawy commented 3 years ago

Or a knn imputation would definitely do the trick! ;)

CodingWookie commented 3 years ago

I got a similar error

Successfully added technical indicators Successfully added turbulence index Traceback (most recent call last): File "d:\Dropbox\AIML EDUCATION\FinRL-Library-master\FinRL-Library-master\main.py", line 51, in main() File "d:\Dropbox\AIML EDUCATION\FinRL-Library-master\FinRL-Library-master\main.py", line 38, in main finrl.autotrain.training.train_one() File "d:\Dropbox\AIML EDUCATION\FinRL-Library-master\FinRL-Library-master\finrl\autotrain\training.py", line 66, in train_one envtrain, = e_train_gym.get_sb_env() File "d:\Dropbox\AIML EDUCATION\FinRL-Library-master\FinRL-Library-master\finrl\env\env_stocktrading.py", line 379, in get_sb_env obs = e.reset() File "C:\Users\Denis\AppData\Roaming\Python\Python37\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 62, in reset self._save_obs(env_idx, obs) File "C:\Users\Denis\AppData\Roaming\Python\Python37\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 92, in _save_obs self.buf_obs[key][env_idx] = obs ValueError: cannot copy sequence with size 283 to array axis with dimension 301

What do I do to fix this?

Youbadawy commented 3 years ago

Hello @CodingWookie, you need to make sure of the data inputted, that you have the following items asserted,

1- no values that are NA, 2- no Date values duplicated

this is a preprocessing error, so reprocess the input DF that you have

CodingWookie commented 3 years ago

Does it have

Hello @CodingWookie, you need to make sure of the data inputted, that you have the following items asserted,

1- no values that are NA, 2- no Date values duplicated

this is a preprocessing error, so reprocess the input DF that you have

Does this have anything to do with posts above? I just ran the main.py file... but do I have to update the config.py file to account for ticker updates or do I just need to include a statement to ignore NA values?

Youbadawy commented 3 years ago

It would be best if you showcase the error with some snippets of the DataFrame, code ran, and error code!

On Tue, Mar 2, 2021 at 11:53 AM CodingWookie notifications@github.com wrote:

Does it have

Hello @CodingWookie https://github.com/CodingWookie, you need to make sure of the data inputted, that you have the following items asserted,

1- no values that are NA, 2- no Date values duplicated

this is a preprocessing error, so reprocess the input DF that you have

Does this have anything to do with posts above? I just ran the main.py file... but do I have to update the config.py file to account for ticker updates or do I just need to include a statement to ignore NA values?

— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/AI4Finance-LLC/FinRL-Library/issues/137#issuecomment-788817866, or unsubscribe https://github.com/notifications/unsubscribe-auth/AKU53C7KFLZTCPJ75WXVHBDTBS7S3ANCNFSM4XVB4C5A .

--

Thank you, Youssef Badawy,

YBadawy

CodingWookie commented 3 years ago

` (finrl) PS D:\Dropbox\AIMLEDUCATION\FinRL-Library-master\FinRL-Library-master> python main.py --mode=train

C:\Users\Denis\anaconda3\envs\finrl\lib\site-packages\pyfolio\pos.py:28: UserWarning: Module "zipline.assets" not found; mutltipliers will not be applied to position notionals. ' to position notionals.' ==============Start Fetching Data=========== [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed [*100%***] 1 of 1 completed Shape of DataFrame: (156457, 8) ==============Start Feature Engineering=========== Successfully added technical indicators Successfully added turbulence index Traceback (most recent call last): File "main.py", line 51, in main() File "main.py", line 38, in main finrl.autotrain.training.train_one() File "D:\Dropbox\AIMLEDUCATION\FinRL-Library-master\FinRL-Library-master\finrl\autotrain\training.py", line 66, in train_one envtrain, = e_train_gym.get_sb_env() File "D:\Dropbox\AIMLEDUCATION\FinRL-Library-master\FinRL-Library-master\finrl\env\env_stocktrading.py", line 379, in get_sb_env obs = e.reset() File "C:\Users\Denis\anaconda3\envs\finrl\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 62, in reset self._save_obs(env_idx, obs) File "C:\Users\Denis\anaconda3\envs\finrl\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 92, in _save_obs self.buf_obs[key][env_idx] = obs ValueError: cannot copy sequence with size 292 to array axis with dimension 301 (finrl) PS D:\Dropbox\AIMLEDUCATION\FinRL-Library-master\FinRL-Library-master> `