opendilab / DI-engine

OpenDILab Decision AI Engine. The Most Comprehensive Reinforcement Learning Framework B.P.
https://di-engine-docs.readthedocs.io
Apache License 2.0
2.82k stars 352 forks source link

NameNotFound: Environment stocks doesn't exist. #710

Closed prowgrammmer closed 8 months ago

prowgrammmer commented 11 months ago

I've modified cartpole_c51_deploy.py for deployment of trading model as follows:

import gym
import torch
from easydict import EasyDict
from ding.config import compile_config
from ding.envs import DingEnvWrapper
from ding.policy import single_env_forward_wrapper, DQNPolicy
from ding.model import DQN
# from dizoo.classic_control.cartpole.config.cartpole_c51_config import cartpole_c51_config, cartpole_c51_create_config
from dizoo.gym_anytrading.envs.stocks_env import StocksEnv

stocks_dqn_config = dict(
    exp_name='stocks_dqn_seed0',
    env=dict(
        # Whether to use shared memory. Only effective if "env_manager_type" is 'subprocess'
        # Env number respectively for collector and evaluator.
        collector_env_num=8,
        evaluator_env_num=8,
        env_id='stocks-v0',
        n_evaluator_episode=8,
        stop_value=2,
        # one trading year.
        eps_length=253,
        # associated with the feature length.
        window_size=20,
        # the path to save result image.
        save_path='./fig/',
        # the raw data file name
        stocks_data_filename='STOCKS_GOOGL',
        # the stocks range percentage used by train/test.
        # if one of them is None, train & test set will use all data by default.
        train_range=0.8,
        test_range=-0.2,
    ),
    policy=dict(
        # Whether to use cuda for network.
        cuda=True,
        model=dict(
            obs_shape=62,
            action_shape=5,
            encoder_hidden_size_list=[128],
            head_layer_num=1,
            # Whether to use dueling head.
            dueling=True,
        ),
        # Reward's future discount factor, aka. gamma.
        discount_factor=0.99,
        # How many steps in td error.
        nstep=5,
        # learn_mode config
        learn=dict(
            update_per_collect=10,
            batch_size=64,
            learning_rate=0.001,
            # Frequency of target network update.
            target_update_freq=100,
            ignore_done=True,
        ),
        # collect_mode config
        collect=dict(
            # You can use either "n_sample" or "n_episode" in collector.collect.
            # Get "n_sample" samples per collect.
            n_sample=64,
            # Cut trajectories into pieces with length "unroll_len".
            unroll_len=1,
        ),
        # command_mode config
        other=dict(
            # Epsilon greedy with decay.
            eps=dict(
                # Decay type. Support ['exp', 'linear'].
                type='exp',
                start=0.95,
                end=0.1,
                decay=50000,
            ),
            replay_buffer=dict(replay_buffer_size=100000, )
        ),
    ),
)
stocks_dqn_config = EasyDict(stocks_dqn_config)
main_config = stocks_dqn_config

stocks_dqn_create_config = dict(
    env=dict(
        type='stocks-v0',
        import_names=['dizoo.gym_anytrading.envs.stocks_env'],
    ),
    env_manager=dict(type='base'),
    policy=dict(
        type='dqn',
    ),
    evaluator=dict(
        type='trading_interaction',
        import_names=['dizoo.gym_anytrading.worker'],
        ),
)
stocks_dqn_create_config = EasyDict(stocks_dqn_create_config)
create_config = stocks_dqn_create_config

def main(main_config: EasyDict, create_config: EasyDict, ckpt_path: str):
    main_config.exp_name = 'stocks_dqn_deploy'
    cfg = compile_config(main_config, create_cfg=create_config, auto=True)
    env = DingEnvWrapper(gym.make('stocks-v0'), EasyDict(env_wrapper='default'))
    model = DQN(**cfg.policy.model)
    state_dict = torch.load(ckpt_path, map_location='cpu')
    model.load_state_dict(state_dict['model'])
    policy = DQNPolicy(cfg.policy, model=model).eval_mode
    forward_fn = single_env_forward_wrapper(policy.forward)

    obs = env.reset()
    returns = 0.
    while True:
        action = forward_fn(obs)
        obs, rew, done, info = env.step(action)
        returns += rew
        if done:
            break
    print(f'Deploy is finished, final epsiode return is: {returns}')

if __name__ == "__main__":
    main(main_config, create_config, '/home/user/anaconda3/envs/tradingg2/lib/python3.10/site-packages/dizoo/gym_anytrading/config/stocks_dqn_seed0/ckpt/ckpt_best.pth.tar')

However I get the following error:

NameNotFound: Environment stocks doesn't exist.

Cloud-Pku commented 11 months ago

Can you provide more error logs? I can run the code above normally.

prowgrammmer commented 11 months ago

I have just tried running it on a different machine but I'm experiencing the same issue. Here's the full error:

Traceback (most recent call last):

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
    exec(code, globals, locals)

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 124, in <module>
    main(main_config, create_config, '/home/user/anaconda3/envs/tradingg2/lib/python3.10/site-packages/dizoo/gym_anytrading/config/stocks_dqn_seed0/ckpt/ckpt_best.pth.tar')

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 105, in main
    env = DingEnvWrapper(gym.make('stocks-v0'), EasyDict(env_wrapper='default'))

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\gym\envs\registration.py", line 607, in make
    _check_version_exists(ns, name, version)

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\gym\envs\registration.py", line 234, in _check_version_exists
    _check_name_exists(ns, name)

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\gym\envs\registration.py", line 213, in _check_name_exists
    f"Environment {name} doesn't exist{namespace_msg}. {suggestion_msg}"
Cloud-Pku commented 11 months ago

Instead of using gym.make, you might create an environment instance as follows from dizoo.gym_anytrading.envs import StocksEnv env = StocksEnv(cfg) env = DingEnvWrapper(env, EasyDict(env_wrapper='default'))

And there is a useful case at "DI-engine/dizoo/gym_anytrading/envs/test_stocks_env.py".

prowgrammmer commented 11 months ago

When creating an environment as you said I now get the following error:

Traceback (most recent call last):

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
    exec(code, globals, locals)

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 130, in <module>
    main(main_config, create_config, 'C:/Users/user/anaconda3/envs/py37/Lib/site-packages/dizoo/gym_anytrading/config/stocks_dqn_seed0_230823_120603/ckpt/ckpt_best.pth.tar')

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 109, in main
    env = StocksEnv(cfg)

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\dizoo\gym_anytrading\envs\stocks_env.py", line 16, in __init__
    super().__init__(cfg)

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\dizoo\gym_anytrading\envs\trading_env.py", line 90, in __init__
    self._env_id = cfg.env_id

AttributeError: 'EasyDict' object has no attribute 'env_id'

Also how is test_stocks_env.py supposed to be used? Is it just a unit test?

Cloud-Pku commented 11 months ago

You need to configure parameters in cfg. There is a case in test_stocks_env.py image You can find their meaning at "DI-engine/dizoo/gym_anytrading/config/stocks_dqn_config.py" image

prowgrammmer commented 11 months ago

Ok now I get the following error:

Traceback (most recent call last):

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
    exec(code, globals, locals)

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 131, in <module>
    main(main_config, create_config, 'C:/Users/user/anaconda3/envs/py37/Lib/site-packages/dizoo/gym_anytrading/config/stocks_dqn_seed0_230823_120603/ckpt/ckpt_best.pth.tar')

  File "c:\users\user\documents\python scripts\rl_trading\trading_deploy.py", line 111, in main
    env = DingEnvWrapper(env, EasyDict(env_wrapper='default'))

  File "C:\Users\user\anaconda3\envs\py37\lib\site-packages\ding\envs\env\ding_env_wrapper.py", line 45, in __init__
    self._action_space.seed(0)  # default seed

AttributeError: 'NoneType' object has no attribute 'seed'
Cloud-Pku commented 10 months ago

It looks like StocksEnv doesn't fit DingEnvWrapper. But there is no need to use DingEnvWrapper in your pipeline unless you want to make changes between agent and environment's IO. you can just write env = StocksEnv(your_env_cfg)

SUSHANTH009 commented 8 months ago

I've modified cartpole_c51_deploy.py for deployment of trading model as follows:

import gym
import torch
from easydict import EasyDict
from ding.config import compile_config
from ding.envs import DingEnvWrapper
from ding.policy import single_env_forward_wrapper, DQNPolicy
from ding.model import DQN
# from dizoo.classic_control.cartpole.config.cartpole_c51_config import cartpole_c51_config, cartpole_c51_create_config
from dizoo.gym_anytrading.envs.stocks_env import StocksEnv

stocks_dqn_config = dict(
    exp_name='stocks_dqn_seed0',
    env=dict(
        # Whether to use shared memory. Only effective if "env_manager_type" is 'subprocess'
        # Env number respectively for collector and evaluator.
        collector_env_num=8,
        evaluator_env_num=8,
        env_id='stocks-v0',
        n_evaluator_episode=8,
        stop_value=2,
        # one trading year.
        eps_length=253,
        # associated with the feature length.
        window_size=20,
        # the path to save result image.
        save_path='./fig/',
        # the raw data file name
        stocks_data_filename='STOCKS_GOOGL',
        # the stocks range percentage used by train/test.
        # if one of them is None, train & test set will use all data by default.
        train_range=0.8,
        test_range=-0.2,
    ),
    policy=dict(
        # Whether to use cuda for network.
        cuda=True,
        model=dict(
            obs_shape=62,
            action_shape=5,
            encoder_hidden_size_list=[128],
            head_layer_num=1,
            # Whether to use dueling head.
            dueling=True,
        ),
        # Reward's future discount factor, aka. gamma.
        discount_factor=0.99,
        # How many steps in td error.
        nstep=5,
        # learn_mode config
        learn=dict(
            update_per_collect=10,
            batch_size=64,
            learning_rate=0.001,
            # Frequency of target network update.
            target_update_freq=100,
            ignore_done=True,
        ),
        # collect_mode config
        collect=dict(
            # You can use either "n_sample" or "n_episode" in collector.collect.
            # Get "n_sample" samples per collect.
            n_sample=64,
            # Cut trajectories into pieces with length "unroll_len".
            unroll_len=1,
        ),
        # command_mode config
        other=dict(
            # Epsilon greedy with decay.
            eps=dict(
                # Decay type. Support ['exp', 'linear'].
                type='exp',
                start=0.95,
                end=0.1,
                decay=50000,
            ),
            replay_buffer=dict(replay_buffer_size=100000, )
        ),
    ),
)
stocks_dqn_config = EasyDict(stocks_dqn_config)
main_config = stocks_dqn_config

stocks_dqn_create_config = dict(
    env=dict(
        type='stocks-v0',
        import_names=['dizoo.gym_anytrading.envs.stocks_env'],
    ),
    env_manager=dict(type='base'),
    policy=dict(
        type='dqn',
    ),
    evaluator=dict(
        type='trading_interaction',
        import_names=['dizoo.gym_anytrading.worker'],
        ),
)
stocks_dqn_create_config = EasyDict(stocks_dqn_create_config)
create_config = stocks_dqn_create_config

def main(main_config: EasyDict, create_config: EasyDict, ckpt_path: str):
    main_config.exp_name = 'stocks_dqn_deploy'
    cfg = compile_config(main_config, create_cfg=create_config, auto=True)
    env = DingEnvWrapper(gym.make('stocks-v0'), EasyDict(env_wrapper='default'))
    model = DQN(**cfg.policy.model)
    state_dict = torch.load(ckpt_path, map_location='cpu')
    model.load_state_dict(state_dict['model'])
    policy = DQNPolicy(cfg.policy, model=model).eval_mode
    forward_fn = single_env_forward_wrapper(policy.forward)

    obs = env.reset()
    returns = 0.
    while True:
        action = forward_fn(obs)
        obs, rew, done, info = env.step(action)
        returns += rew
        if done:
            break
    print(f'Deploy is finished, final epsiode return is: {returns}')

if __name__ == "__main__":
    main(main_config, create_config, '/home/user/anaconda3/envs/tradingg2/lib/python3.10/site-packages/dizoo/gym_anytrading/config/stocks_dqn_seed0/ckpt/ckpt_best.pth.tar')

However I get the following error:

NameNotFound: Environment stocks doesn't exist.

can you please share the final code to run

Cloud-Pku commented 8 months ago

Hi, this is the code I run


import torch
from easydict import EasyDict
from ding.config import compile_config
from ding.policy import single_env_forward_wrapper, DQNPolicy
from ding.model import DQN
from dizoo.gym_anytrading.envs import StocksEnv

stocks_dqn_config = dict(
    exp_name='stocks_dqn_seed0',
    env=dict(
        # Whether to use shared memory. Only effective if "env_manager_type" is 'subprocess'
        # Env number respectively for collector and evaluator.
        collector_env_num=8,
        evaluator_env_num=8,
        env_id='stocks-v0',
        n_evaluator_episode=8,
        stop_value=2,
        # one trading year.
        eps_length=1,
        # associated with the feature length.
        window_size=20,
        # the path to save result image.
        save_path='./fig/',
        # the raw data file name
        stocks_data_filename='STOCKS_GOOGL',
        # the stocks range percentage used by train/test.
        # if one of them is None, train & test set will use all data by default.
        train_range=None,
        test_range=None,
    ),
    policy=dict(
        # Whether to use cuda for network.
        cuda=True,
        model=dict(
            obs_shape=62,
            action_shape=5,
            encoder_hidden_size_list=[128],
            head_layer_num=1,
            # Whether to use dueling head.
            dueling=True,
        ),
        # Reward's future discount factor, aka. gamma.
        discount_factor=0.99,
        # How many steps in td error.
        nstep=5,
        # learn_mode config
        learn=dict(
            update_per_collect=10,
            batch_size=64,
            learning_rate=0.001,
            # Frequency of target network update.
            target_update_freq=100,
            ignore_done=True,
        ),
        # collect_mode config
        collect=dict(
            # You can use either "n_sample" or "n_episode" in collector.collect.
            # Get "n_sample" samples per collect.
            n_sample=64,
            # Cut trajectories into pieces with length "unroll_len".
            unroll_len=1,
        ),
        # command_mode config
        other=dict(
            # Epsilon greedy with decay.
            eps=dict(
                # Decay type. Support ['exp', 'linear'].
                type='exp',
                start=0.95,
                end=0.1,
                decay=50000,
            ),
            replay_buffer=dict(replay_buffer_size=100000, )
        ),
    ),
)
env_config = stocks_dqn_config["env"]
stocks_dqn_config = EasyDict(stocks_dqn_config)
main_config = stocks_dqn_config

stocks_dqn_create_config = dict(
    env=dict(
        type='stocks-v0',
        import_names=['dizoo.gym_anytrading.envs.stocks_env'],
    ),
    env_manager=dict(type='base'),
    policy=dict(
        type='dqn',
    ),
    evaluator=dict(
        type='trading_interaction',
        import_names=['dizoo.gym_anytrading.worker'],
        ),
)
stocks_dqn_create_config = EasyDict(stocks_dqn_create_config)
create_config = stocks_dqn_create_config

def main(main_config: EasyDict, create_config: EasyDict, ckpt_path: str):
    main_config.exp_name = 'stocks_dqn_deploy'
    cfg = compile_config(main_config, create_cfg=create_config, auto=True)

    # env = DingEnvWrapper(gym.make('stocks-v0'), EasyDict(env_wrapper='default'))
    env = StocksEnv(EasyDict(env_config))

    model = DQN(**cfg.policy.model)
    # state_dict = torch.load(ckpt_path, map_location='cpu')
    # model.load_state_dict(state_dict['model'])
    policy = DQNPolicy(cfg.policy, model=model).eval_mode
    forward_fn = single_env_forward_wrapper(policy.forward)

    obs = env.reset()
    returns = 0.
    counter = 0
    while True:
        counter += 1
        action = forward_fn(obs)
        print(action)
        obs, rew, done, info = env.step(action)
        # print(obs, rew, done, info)
        returns += rew
        if done:
            break
    print(f'Deploy is finished, final epsiode return is: {returns}')
    # print(counter)

if __name__ == "__main__":
    main(main_config, create_config, 'dizoo/gym_anytrading/config/stocks_dqn_seed0_230910_115331/ckpt/ckpt_best.pth.tar')
SUSHANTH009 commented 8 months ago

'dizoo/gym_anytrading/config/stocks_dqn_seed0_230910_115331/ckpt/ckpt_best.pth.tar' I don't have this file

Cloud-Pku commented 8 months ago

This parameter ‘ckpt_path’ is useless because it is not called in the main function. In fact, this is the path where your trained model is saved.