Closed icompute386 closed 5 years ago
Hi!
In function iterate_batches
, tensor obs_v
is still on CPU, you need to copy it to GPU backend with
obs_v = torch.FloatTensor([obs]).to(device)
Hi Shmuma, that did the trick. I was anticipating a performance boost, but didn't see an improvement. Should that be expected here?
Chris
import random import gym import gym.spaces import argparse from collections import namedtuple import numpy as np from tensorboardX import SummaryWriter
import torch import torch.nn as nn import torch.optim as optim
HIDDEN_SIZE = 128 BATCH_SIZE = 100 PERCENTILE = 30 GAMMA = 0.9
class DiscreteOneHotWrapper(gym.ObservationWrapper): def init(self, env): super(DiscreteOneHotWrapper, self).init(env) assert isinstance(env.observation_space, gym.spaces.Discrete) self.observation_space = gym.spaces.Box(0.0, 1.0, (env.observation_space.n, ), dtype=np.float32)
def observation(self, observation):
res = np.copy(self.observation_space.low)
res[observation] = 1.0
return res
class Net(nn.Module): def init(self, obs_size, hidden_size, n_actions): super(Net, self).init() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, n_actions) )
def forward(self, x):
return self.net(x)
Episode = namedtuple('Episode', field_names=['reward', 'steps']) EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action'])
def iterate_batches(env, net, batch_size): batch = [] episode_reward = 0.0 episode_steps = [] obs = env.reset() sm = nn.Softmax(dim=1) while True: obs_v = torch.FloatTensor([obs]).to(device) act_probs_v = sm(net(obs_v)).cpu() act_probs = act_probs_v.data.numpy()[0] action = np.random.choice(len(act_probs), p=act_probs) next_obs, reward, isdone, = env.step(action) episode_reward += reward episode_steps.append(EpisodeStep(observation=obs, action=action)) if is_done: batch.append(Episode(reward=episode_reward, steps=episode_steps)) episode_reward = 0.0 episode_steps = [] next_obs = env.reset() if len(batch) == batch_size: yield batch batch = [] obs = next_obs
def filter_batch(batch, percentile): disc_rewards = list(map(lambda s: s.reward * (GAMMA ** len(s.steps)), batch)) reward_bound = np.percentile(disc_rewards, percentile)
train_obs = []
train_act = []
elite_batch = []
for example, discounted_reward in zip(batch, disc_rewards):
if discounted_reward > reward_bound:
train_obs.extend(map(lambda step: step.observation, example.steps))
train_act.extend(map(lambda step: step.action, example.steps))
elite_batch.append(example)
return elite_batch, train_obs, train_act, reward_bound
if name == "main": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action='store_true', help="Enable cuda computation") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu")
random.seed(12345)
env = DiscreteOneHotWrapper(gym.make("FrozenLake-v0"))
# env = gym.wrappers.Monitor(env, directory="mon", force=True)
obs_size = env.observation_space.shape[0]
n_actions = env.action_space.n
net = Net(obs_size, HIDDEN_SIZE, n_actions).to(device)
objective = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=net.parameters(), lr=0.001)
writer = SummaryWriter(comment="-frozenlake-tweaked")
full_batch = []
for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)):
reward_mean = float(np.mean(list(map(lambda s: s.reward, batch))))
full_batch, obs, acts, reward_bound = filter_batch(full_batch + batch, PERCENTILE)
if not full_batch:
continue
#obs_v = torch.FloatTensor(obs)#, device=device)
#acts_v = torch.LongTensor(acts)#, device=device)
obs_v = torch.tensor(obs).to(device)
acts_v = torch.tensor(acts).to(device)
full_batch = full_batch[-500:]
optimizer.zero_grad()
action_scores_v = net(obs_v)
loss_v = objective(action_scores_v, acts_v)
loss_v.backward()
optimizer.step()
print("%d: loss=%.3f, reward_mean=%.3f, reward_bound=%.3f, batch=%d" % (
iter_no, loss_v.item(), reward_mean, reward_bound, len(full_batch)))
writer.add_scalar("loss", loss_v.item(), iter_no)
writer.add_scalar("reward_mean", reward_mean, iter_no)
writer.add_scalar("reward_bound", reward_bound, iter_no)
if reward_mean > 0.8:
print("Solved!")
break
writer.close()
No, the net is too small to benefit from gpu parallelization
ср, 6 мар. 2019 г., 20:53 icompute386 notifications@github.com:
Hi Shmuma, that did the trick. I was anticipating a performance boost, but didn't see an improvement. Should that be expected here?
Chris
!/usr/bin/env python3
import random import gym import gym.spaces import argparse from collections import namedtuple import numpy as np from tensorboardX import SummaryWriter
import torch import torch.nn as nn import torch.optim as optim
HIDDEN_SIZE = 128 BATCH_SIZE = 100 PERCENTILE = 30 GAMMA = 0.9
class DiscreteOneHotWrapper(gym.ObservationWrapper): def init(self, env): super(DiscreteOneHotWrapper, self).init(env) assert isinstance(env.observation_space, gym.spaces.Discrete) self.observation_space = gym.spaces.Box(0.0, 1.0, (env.observation_space.n, ), dtype=np.float32)
def observation(self, observation): res = np.copy(self.observation_space.low) res[observation] = 1.0 return res
class Net(nn.Module): def init(self, obs_size, hidden_size, n_actions): super(Net, self).init() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, n_actions) )
def forward(self, x): return self.net(x)
Episode = namedtuple('Episode', field_names=['reward', 'steps']) EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action'])
def iterate_batches(env, net, batch_size): batch = [] episode_reward = 0.0 episode_steps = [] obs = env.reset() sm = nn.Softmax(dim=1) while True: obs_v = torch.FloatTensor([obs]).to(device) act_probs_v = sm(net(obs_v)).cpu() act_probs = act_probs_v.data.numpy()[0] action = np.random.choice(len(act_probs), p=act_probs) next_obs, reward, isdone, = env.step(action) episode_reward += reward episode_steps.append(EpisodeStep(observation=obs, action=action)) if is_done: batch.append(Episode(reward=episode_reward, steps=episode_steps)) episode_reward = 0.0 episode_steps = [] next_obs = env.reset() if len(batch) == batch_size: yield batch batch = [] obs = next_obs
def filter_batch(batch, percentile): disc_rewards = list(map(lambda s: s.reward * (GAMMA ** len(s.steps)), batch)) reward_bound = np.percentile(disc_rewards, percentile)
train_obs = [] train_act = [] elite_batch = [] for example, discounted_reward in zip(batch, disc_rewards): if discounted_reward > reward_bound: train_obs.extend(map(lambda step: step.observation, example.steps)) train_act.extend(map(lambda step: step.action, example.steps)) elite_batch.append(example)
return elite_batch, train_obs, train_act, reward_bound
if name == "main": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action='store_true', help="Enable cuda computation") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu")
random.seed(12345) env = DiscreteOneHotWrapper(gym.make("FrozenLake-v0"))
env = gym.wrappers.Monitor(env, directory="mon", force=True)
obs_size = env.observation_space.shape[0] n_actions = env.action_space.n
net = Net(obs_size, HIDDEN_SIZE, n_actions).to(device) objective = nn.CrossEntropyLoss() optimizer = optim.Adam(params=net.parameters(), lr=0.001) writer = SummaryWriter(comment="-frozenlake-tweaked")
full_batch = [] for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)): reward_mean = float(np.mean(list(map(lambda s: s.reward, batch)))) full_batch, obs, acts, reward_bound = filter_batch(full_batch + batch, PERCENTILE) if not full_batch: continue
obs_v = torch.FloatTensor(obs)#, device=device)
#acts_v = torch.LongTensor(acts)#, device=device) obs_v = torch.tensor(obs).to(device) acts_v = torch.tensor(acts).to(device) full_batch = full_batch[-500:] optimizer.zero_grad() action_scores_v = net(obs_v) loss_v = objective(action_scores_v, acts_v) loss_v.backward() optimizer.step() print("%d: loss=%.3f, reward_mean=%.3f, reward_bound=%.3f, batch=%d" % ( iter_no, loss_v.item(), reward_mean, reward_bound, len(full_batch))) writer.add_scalar("loss", loss_v.item(), iter_no) writer.add_scalar("reward_mean", reward_mean, iter_no) writer.add_scalar("reward_bound", reward_bound, iter_no) if reward_mean > 0.8: print("Solved!") break
writer.close()
— You are receiving this because you modified the open/close state. Reply to this email directly, view it on GitHub https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On/issues/35#issuecomment-470208448, or mute the thread https://github.com/notifications/unsubscribe-auth/AAECaj6mqJOmKXrVWcYmTmcHpCD3ePeJks5vUAChgaJpZM4bfjq7 .
Hi, I've got a question regarding the code in chapter 3 of (Deep Reinforcement Learning Hands-On). Can you explain how to make this run on the GPU? I've tried to implement this myself but the code crashes.
Crashed with the error:
(python36) c:\Anaconda\Deep-Reinforcement-Learning-Hands-On-master\Chapter04>python 03_frozenlake_tweaked.py --cuda Traceback (most recent call last): File "03_frozenlake_tweaked.py", line 109, in
for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)):
File "03_frozenlake_tweaked.py", line 58, in iterate_batches
act_probs_v = sm(net(obs_v))
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\modules\module.py", line 489, in call
result = self.forward(*input, kwargs)
File "03_frozenlake_tweaked.py", line 43, in forward
return self.net(x)
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\modules\module.py", line 489, in call
result = self.forward(*input, *kwargs)
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\modules\container.py", line 92, in forward
input = module(input)
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\modules\module.py", line 489, in call
result = self.forward(input, kwargs)
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\modules\linear.py", line 67, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Anaconda\envs\python36\lib\site-packages\torch\nn\functional.py", line 1352, in linear
ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
RuntimeError: Expected object of backend CUDA but got backend CPU for argument #4 'mat1'
Added/Made the following changes to: 03_frozenlake_tweaked.py
!/usr/bin/env python3
import random import gym import gym.spaces import argparse from collections import namedtuple import numpy as np from tensorboardX import SummaryWriter
import torch import torch.nn as nn import torch.optim as optim
HIDDEN_SIZE = 128 BATCH_SIZE = 100 PERCENTILE = 30 GAMMA = 0.9
class DiscreteOneHotWrapper(gym.ObservationWrapper): def init(self, env): super(DiscreteOneHotWrapper, self).init(env) assert isinstance(env.observation_space, gym.spaces.Discrete) self.observation_space = gym.spaces.Box(0.0, 1.0, (env.observation_space.n, ), dtype=np.float32)
class Net(nn.Module): def init(self, obs_size, hidden_size, n_actions): super(Net, self).init() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, n_actions) )
Episode = namedtuple('Episode', field_names=['reward', 'steps']) EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action'])
def iterate_batches(env, net, batch_size): batch = [] episode_reward = 0.0 episode_steps = [] obs = env.reset() sm = nn.Softmax(dim=1) while True: obs_v = torch.FloatTensor([obs]) act_probs_v = sm(net(obs_v)) act_probs = act_probs_v.data.numpy()[0] action = np.random.choice(len(act_probs), p=act_probs) next_obs, reward, isdone, = env.step(action) episode_reward += reward episode_steps.append(EpisodeStep(observation=obs, action=action)) if is_done: batch.append(Episode(reward=episode_reward, steps=episode_steps)) episode_reward = 0.0 episode_steps = [] next_obs = env.reset() if len(batch) == batch_size: yield batch batch = [] obs = next_obs
def filter_batch(batch, percentile): disc_rewards = list(map(lambda s: s.reward * (GAMMA ** len(s.steps)), batch)) reward_bound = np.percentile(disc_rewards, percentile)
if name == "main": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action='store_true', help="Enable cuda computation") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu")