openai / gym

A toolkit for developing and comparing reinforcement learning algorithms.
https://www.gymlibrary.dev
Other
34.77k stars 8.61k forks source link

Parallel versions of classic control environment doesn't work ! #165

Closed darshanhegde closed 8 years ago

darshanhegde commented 8 years ago

I have implemented synchronous parallel DQN, and wanted to test it on classic control environments. I want to test it on rgb_array observation space that are images instead of Box(n,) (joint angles etc. ), but my algorithm requires that I implement a parallel version of the classic environments (k such environments) which takes list of k actions and returns k (observation, reward, done, info) tuples and I'm using env.render() method to get the rgb_array observation that I need. My implementation looks something like this -

import gym
from multiprocessing import Process, Pipe

class EnvWorker(Process):

    def __init__(self, env_name, pipe, name=None):
        Process.__init__(self, name=name)
        self.env = gym.make(env_name)
        self.pipe = pipe
        self.name = name
        print "Environment initialized. ", self.name

    def run(self):
        while True:
            action = self.pipe.recv()
            _, reward, done, _ = self.env.step(action)
            observation = self.env.render(mode="rgb_array")
            self.pipe.send((observation, reward, done))
            if done:
                print "Done with an epsidode for %s" % self.name
                self.env.reset()

class ParallelEnvironment(object):

    def __init__(self, env_name, num_env):
        assert num_env > 0, "Number of environments must be postive."
        self.num_env = num_env
        self.workers = []
        self.pipes = []
        self.sample_env = gym.make(env_name)
        for env_idx in range(num_env):
            p_start, p_end = Pipe()
            env_worker = EnvWorker(env_name, p_end, name=str(env_idx))
            env_worker.start()
            self.workers.append(env_worker)
            self.pipes.append(p_start)

    def step(self, actions):
        observations, rewards, dones = [], [], []
        for idx in range(self.num_env):
            self.pipes[idx].send(actions[idx])
        for idx in range(self.num_env):
            observation, reward, done = self.pipes[idx].recv()
            observations.append(observation)
            rewards.append(reward)
            dones.append(done)
        return observations, rewards, dones

    def get_action_space(self):
        return self.sample_env.action_space

    def __del__(self):
        """
        Terminate all spawned processes.
        """
        for worker in self.workers:
            worker.terminate()
            worker.join()

# Works fine if I use Atari environment 
num_envs = 4
p_env = ParallelEnvironment("Breakout-v0", num_envs)
action_space = p_env.get_action_space()
for i in range(100):
    actions = [action_space.sample() for _ in range(num_envs)]
    obs, rwds, dones = p_env.step(actions)
    print obs[0].shape

# Just hangs and throws following error if I use classic control environment 
num_envs = 4
p_env = ParallelEnvironment("MountainCar-v0", num_envs)
action_space = p_env.get_action_space()
for i in range(100):
    actions = [action_space.sample() for _ in range(num_envs)]
    obs, rwds, dones = p_env.step(actions)
    print obs[0].shape

Here is the error it throws -

XIO:  fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
      after 141 requests (141 known processed) with 0 events remaining.
XIO:  fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
      after 141 requests (141 known processed) with 0 events remaining.
XIO:  fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
      after 141 requests (141 known processed) with 0 events remaining.
XIO:  fatal IO error 11 (Resource temporarily unavailable) on X server ":0"
      after 142 requests (142 known processed) with 2 events remaining.

Can someone suggest a better way of doing this ?

mthrok commented 8 years ago

I run your code on my MacBookPro and it worked.

my env is gym version 0.1.3 installed with anaconda pip + git

darshanhegde commented 8 years ago

Thanks. That is really strange ! I was running this inside a docker container, if I run on my local machine (Ubuntu 14.04) with 0.1.3 gym version, it actually works !

empyreanx commented 6 years ago

Did you every resolve this when using the docker container?