108

Referring to https://github.com/Kismuz/btgym/issues/108, I have been trying to use btgym as a standalone custom environment with stable baselines. However, the BTgymEnv cannot pass the environment checking in check_env and I got the following error,

reference: https://stable-baselines3.readthedocs.io/en/master/guide/custom_env.html

/home/PycharmProjects/btgym/venv/lib/python3.7/site-packages/stable_baselines3/common/env_checker.py:53: UserWarning: The observation space is a Dict but the environment is not a gym.GoalEnv (cf https://github.com/openai/gym/blob/master/gym/core.py), this is currently not supported by Stable Baselines (cf https://github.com/hill-a/stable-baselines/issues/133), you will need to use a custom policy. 
  "The observation space is a Dict but the environment is not a gym.GoalEnv "
/home/PycharmProjects/btgym/venv/lib/python3.7/site-packages/stable_baselines3/common/env_checker.py:70: UserWarning: The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the action using a wrapper.
  "The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. "
Traceback (most recent call last):
  File "/home/PycharmProjects/btgym/dev/SB3_testing.py", line 160, in <module>
    check_env(env, warn=True)
  File "/home/PycharmProjects/btgym/venv/lib/python3.7/site-packages/stable_baselines3/common/env_checker.py", line 237, in check_env
    _check_returned_values(env, observation_space, action_space)
  File "/home/PycharmProjects/btgym/venv/lib/python3.7/site-packages/stable_baselines3/common/env_checker.py", line 130, in _check_returned_values
    assert isinstance(info, dict), "The `info` returned by `step()` must be a python dictionary"
AssertionError: The `info` returned by `step()` must be a python dictionary

May I ask that is anyone might have a solution to address this issue?

Steps to reproduce:


import sys
sys.path.insert(0, '../../../..')

import IPython.display as Display
import PIL.Image as Image
import numpy as np
import backtrader as bt
import random

from gym import spaces
from btgym import BTgymEnv, BTgymBaseStrategy
from btgym.datafeed.derivative import BTgymDataset2

def show_rendered_image(rgb_array):

    Display.display(Image.fromarray(rgb_array))

def render_all_modes(env):

    for mode in env.metadata['render.modes']:
        print('[{}] mode:'.format(mode))
        show_rendered_image(env.render(mode))

def take_some_steps(env, some_steps):

    for step in range(some_steps):
        rnd_action = env.action_space.sample()
        o, r, d, i = env.step(rnd_action)
        if d:
            print('Episode finished,')
            break
    print(step + 1, 'actions made.\n')

def under_the_hood(env):

    for attr in ['dataset', 'strategy', 'engine', 'renderer', 'network_address']:
        print('\nEnv.{}: {}'.format(attr, getattr(env, attr)))

    for params_name, params_dict in env.params.items():
        print('\nParameters [{}]: '.format(params_name))
        for key, value in params_dict.items():
            print('{} : {}'.format(key, value))

class MyStrategy(BTgymBaseStrategy):

    def get_price_gradients_state(self):

        sigmoid = lambda x: 1 / (1 + np.exp(-x))

        T = 1.2e+4

        X = self.raw_state

        dX = np.gradient(X)[0]

        return sigmoid(dX * T)

    def get_reward(self):

        return float(np.log(self.stats.broker.value[0] / self.env.broker.startingcash))

MyDataset = BTgymDataset2(
    filename=r'/home/PycharmProjects/btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
     start_weekdays=[0, 1,],
     episode_duration={'days': 2, 'hours': 23, 'minutes': 55},

)

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape={
        'raw': spaces.Box(low=-10, high=10, shape=(4,4)),
        'price_gradients': spaces.Box(low=0, high=1, shape=(4,4))
    },
    drawdown_call=99,
    skip_frame=5,
)

MyCerebro.broker.setcash(1000.0)
MyCerebro.broker.setcommission(commission=0.002)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=20)
MyCerebro.addanalyzer(bt.analyzers.DrawDown)

env = BTgymEnv(
    dataset=MyDataset,
    episode_duration={'days': 0, 'hours': 5, 'minutes': 55}, # ignored!
    engine=MyCerebro,
    strategy='NotUsed',  # ignored!
    state_shape=(9, 99), # ignored!
    start_cash=1.0,  # ignored!
    render_modes=['episode', 'human', 'price_gradients'],
    render_state_as_image=True,
    render_ylabel='Price Gradient',
    render_size_human=(10,4),
    render_size_state=(10,4),
    render_plotstyle='ggplot',
    verbose=0,
)

under_the_hood(env)
env.reset()
take_some_steps(env, 100)
render_all_modes(env)

print('-------------------------checking spec-------------------------------')
print("Observation space:", env.observation_space)
print("Shape:", env.observation_space.shape)
print("Action space:", env.action_space)
obs = env.reset()
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
print(reward, done, info)

################################################## If comment out the following 2 lines, the code would be fine.
from stable_baselines3.common.env_checker import check_env
check_env(env, warn=True)

env.close()

Kismuz / btgym

Use btgym custom environment #146

108

Steps to reproduce: