Currently, I am trying to create my own network for reinforcement learning. To this end, I have adapted the Q network from
Playing Atari with Deep Reinforcement Learning
Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis
Antonoglou, Daan Wierstra, Martin Riedmiller
and
Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533.
When theano tries to compile function for loss and q_val (),
it keeps returning
UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: <CudaNdarrayType(float32, 4D)>.
To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.
I have been debugging the code many many times, but I cannot understand why the inputs (from givens) are not used as part of the function/ calculation.
Many thanks in advance for your explanation.
Here is my full source code for the network:
"""
import lasagne
import numpy as np
import theano
import theano.tensor as T
from updates import deepmind_rmsprop
import logging
q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
# massage/ unpack states into the right form for multi input network
q_vals = lasagne.layers.get_output(self.l_out, {'l_in':imgs,
'l_loc1':locs, 'l_his':hiss,
'l_dis': sds})
if self.freeze_interval > 0:
next_q_vals = lasagne.layers.get_output(self.next_l_out,
{'l_in':next_imgs,
'l_loc1':next_locs, 'l_his':next_hiss,
'l_dis': next_sds})
else:
next_q_vals = lasagne.layers.get_output(self.l_out,
{'l_in':next_imgs,
'l_loc1':next_locs, 'l_his':next_hiss,
'l_dis': next_sds})
next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
target = (rewards +
(T.ones_like(terminals) - terminals) *
self.discount * T.max(next_q_vals, axis=1, keepdims=True))
diff = target - q_vals[T.arange(batch_size),
actions.reshape((-1,))].reshape((-1, 1))
if self.clip_delta > 0:
# If we simply take the squared clipped diff as our loss,
# then the gradient will be zero whenever the diff exceeds
# the clip bounds. To avoid this, we extend the loss
# linearly past the clip point to keep the gradient constant
# in that regime.
#
# This is equivalent to declaring d loss/d q_vals to be
# equal to the clipped diff, then backpropagating from
# there, which is what the DeepMind implementation does.
quadratic_part = T.minimum(abs(diff), self.clip_delta)
linear_part = abs(diff) - quadratic_part
loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
else:
loss = 0.5 * diff ** 2
if batch_accumulator == 'sum':
loss = T.sum(loss)
elif batch_accumulator == 'mean':
loss = T.mean(loss)
else:
raise ValueError("Bad accumulator: {}".format(batch_accumulator))
params = lasagne.layers.helper.get_all_params(self.l_out)
givens_train = {
Hi,
Thanks for reading this post.
Currently, I am trying to create my own network for reinforcement learning. To this end, I have adapted the Q network from Playing Atari with Deep Reinforcement Learning Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, Martin Riedmiller and Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533.
When theano tries to compile function for loss and q_val (),
self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens_train) self._q_vals = theano.function([], q_vals, givens=givens_q_val)
it keeps returning UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: <CudaNdarrayType(float32, 4D)>. To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.
I have been debugging the code many many times, but I cannot understand why the inputs (from givens) are not used as part of the function/ calculation.
Many thanks in advance for your explanation.
Here is my full source code for the network:
""" import lasagne import numpy as np import theano import theano.tensor as T from updates import deepmind_rmsprop import logging
class DeepQLearner: """ Deep Q-learning network using Lasagne. """ def init(self, width_img, height_img, width_loc, height_loc, width_his, height_his, target_dis_size, num_actions, num_frames, discount, learning_rate, rho, rms_epsilon, momentum, clip_delta, freeze_interval, batch_size, network_type, update_rule, batch_accumulator, rng, input_scale=8.0):
target_distribution = T.tensor('target_distribution')
next_target_distribution = T.tensor('next_target_distribution')
self.states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
self.next_states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
next_q_vals = lasagne.layers.get_output(self.next_l_out,
next_states / input_scale)
states: self.states_shared,
next_states: self.next_states_shared,
states: self.states_shared,
next_states: self.next_states_shared,
rewards: self.rewards_shared,
actions: self.actions_shared,
terminals: self.terminals_shared
self.states_shared.set_value(states)
self.next_states_shared.set_value(next_states)
states = np.zeros((self.batch_size, self.num_frames, self.input_height,
self.input_width), dtype=theano.config.floatX)
states[0, ...] = state
self.states_shared.set_value(states)
def main():
net = DeepQLearner(84, 84, 16, 4, .99, .00025, .95, .95, 10000,
32, 'nature_cuda')
if name == 'main': main()