Closed zlw21gxy closed 4 years ago
def breakout(config, params):
action_repeat = params.get('action_repeat', REPEATE)
max_length = EPISODE_LEN // action_repeat
state_components = ['reward']
env_ctor = functools.partial(
_dm_control_env_gym_atari, action_repeat, max_length, 'Breakout-v0')
return Task('breakout', env_ctor, max_length, state_components)
def car_race(config, params):
action_repeat = params.get('action_repeat', REPEATE)
max_length = EPISODE_LEN // action_repeat
state_components = ['reward']
env_ctor = functools.partial(
_dm_control_env_gym_atari, action_repeat, max_length, 'CarRacing-v0')
return Task('car_race', env_ctor, max_length, state_components)
class DeepMindWrapper_gym_atari(object):
"""Wraps a Gym environment into an interface for downstream process"""
metadata = {'render.modes': ['rgb_array']}
reward_range = (-np.inf, np.inf)
def __init__(self, env, render_size, camera_id=0):
self._env = env
self._render_size = render_size
self._camera_id = camera_id
self.observation_space = gym.spaces.Dict({'state':gym.spaces.Box(low=-1,high=1,shape=(1,))})
self.action_space = gym.spaces.Box(low=-1,high=1,shape=(2,))
def __getattr__(self, name):
return getattr(self._env, name)
def discrete_action(self, a):
condition_list = [a>0.5, a>0.0, a>-0.5, True]
choice_list = [0 ,1, 2 ,3]
return np.select(condition_list, choice_list)
def step(self, action):
self._env.render()
throttle = float(np.clip(action[1], 0, 1))
brake = float(np.abs(np.clip(action[1], -1, 0)))
steer = float(np.clip(action[0], -1, 1))
action = (steer, throttle, brake)
s_img, reward, done, info = self._env.step(action)
self.img = cv2.resize(s_img, IMG_SIZE,interpolation=cv2.INTER_AREA)
obs = {'state':np.array([0.0])}
return obs, reward, False, {} # done can be set to always False.
def reset(self):
s_img, info = self._env.reset()
self.img = cv2.resize(s_img, IMG_SIZE, interpolation=cv2.INTER_AREA)
return {'state': np.array([0.0])}
def render(self, *args, **kwargs):
if kwargs.get('mode', 'rgb_array') != 'rgb_array':
raise ValueError("Only render mode 'rgb_array' is supported.")
del args # Unused
del kwargs # Unused
return self.img
def _dm_control_env_gym_atari(action_repeat, max_length, env_name):
import gym
def env_ctor():
env = gym.make(env_name) # 'Breakout-v0'
env = env.env # 'remove the TimeLimit wrapper
env = DeepMindWrapper_gym_atari(env, IMG_SIZE)
env = control.wrappers.ActionRepeat(env, action_repeat)
env = control.wrappers.LimitDuration(env, max_length)
env = control.wrappers.PixelObservations(env, IMG_SIZE, np.uint8, 'image')
env = control.wrappers.ConvertTo32Bit(env)
return env
env = control.wrappers.ExternalProcess(env_ctor)
return env
This is my code of task, everything is fine, but when I use the latest version of planet which have car racing default, the agent behave weird
Thanks for your message! It's great to see that you got CarRacing to work. The existing environment wasn't tested well, since I just started to play around with it. The tested environments are the dm_control tasks from our paper.
If you like, it would be great if you would clean up your code a bit and then create a pull request to add it to the repository. This way, other people could easily train PlaNet on CarRacing and Breakout. Let me know if I can help with it.
I try to run the default car racing, but the result is weird,I follow the process in readme totally.May I ask why? but if I read my own wrapper for car racing, everything seems fine Maybe I misunderstand some details of the latest code... (ubuntu16.04 python3.5 tensorflow1.12.0)