openai / baselines

OpenAI Baselines: high-quality implementations of reinforcement learning algorithms
MIT License
15.71k stars 4.87k forks source link

ValueError: Variable ppo2_model/pi/c1/w/Adam/ already exists #548

Open williamjqk opened 6 years ago

williamjqk commented 6 years ago

When I attempt to train ppo2 model in Jupyter notebook, there is this error.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-1aa62686dc55> in <module>()
     12     model.save(save_path)
     13 
---> 14 model, _ = train(args, extra_args)
     15 
     16 if args.save_path is not None and rank == 0:

~/mywork/baselines/baselines/run.py in train(args, extra_args)
     69         seed=seed,
     70         total_timesteps=total_timesteps,
---> 71         **alg_kwargs
     72     )
     73 

~/mywork/baselines/baselines/ppo2/ppo2.py in learn(network, env, total_timesteps, seed, nsteps, ent_coef, lr, vf_coef, max_grad_norm, gamma, lam, log_interval, nminibatches, noptepochs, cliprange, save_interval, load_path, **network_kwargs)
    232         with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh:
    233             fh.write(cloudpickle.dumps(make_model))
--> 234     model = make_model()
    235     if load_path is not None:
    236         model.load(load_path)

~/mywork/baselines/baselines/ppo2/ppo2.py in <lambda>()
    227     make_model = lambda : Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs, nbatch_train=nbatch_train,
    228                     nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
--> 229                     max_grad_norm=max_grad_norm)
    230     if save_interval and logger.get_dir():
    231         import cloudpickle

~/mywork/baselines/baselines/ppo2/ppo2.py in __init__(self, policy, ob_space, ac_space, nbatch_act, nbatch_train, nsteps, ent_coef, vf_coef, max_grad_norm)
     58         grads_and_var = list(zip(grads, var))
     59 
---> 60         _train = trainer.apply_gradients(grads_and_var)
     61 
     62         def train(lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py in apply_gradients(self, grads_and_vars, global_step, name)
    583                        ([str(v) for _, _, v in converted_grads_and_vars],))
    584     with ops.init_scope():
--> 585       self._create_slots(var_list)
    586     update_ops = []
    587     with ops.name_scope(name, self._name) as name:

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/adam.py in _create_slots(self, var_list)
    125     # Create slots for the first and second moments.
    126     for v in var_list:
--> 127       self._zeros_slot(v, "m", self._name)
    128       self._zeros_slot(v, "v", self._name)
    129 

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py in _zeros_slot(self, var, slot_name, op_name)
   1128     named_slots = self._slot_dict(slot_name)
   1129     if _var_key(var) not in named_slots:
-> 1130       new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
   1131       self._restore_slot_variable(
   1132           slot_name=slot_name, variable=var,

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in create_zeros_slot(primary, name, dtype, colocate_with_primary)
    179     return create_slot_with_initializer(
    180         primary, initializer, slot_shape, dtype, name,
--> 181         colocate_with_primary=colocate_with_primary)
    182   else:
    183     if isinstance(primary, variables.Variable):

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in create_slot_with_initializer(primary, initializer, shape, dtype, name, colocate_with_primary)
    153       with distribution_strategy.colocate_vars_with(primary):
    154         return _create_slot_var(primary, initializer, "", validate_shape, shape,
--> 155                                 dtype)
    156     else:
    157       return _create_slot_var(primary, initializer, "", validate_shape, shape,

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in _create_slot_var(primary, val, scope, validate_shape, shape, dtype)
     63       use_resource=resource_variable_ops.is_resource_variable(primary),
     64       shape=shape, dtype=dtype,
---> 65       validate_shape=validate_shape)
     66   variable_scope.get_variable_scope().set_partitioner(current_partitioner)
     67 

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint)
   1326       partitioner=partitioner, validate_shape=validate_shape,
   1327       use_resource=use_resource, custom_getter=custom_getter,
-> 1328       constraint=constraint)
   1329 get_variable_or_local_docstring = (
   1330     """%s

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint)
   1088           partitioner=partitioner, validate_shape=validate_shape,
   1089           use_resource=use_resource, custom_getter=custom_getter,
-> 1090           constraint=constraint)
   1091 
   1092   def _get_partitioned_variable(self,

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint)
    433           caching_device=caching_device, partitioner=partitioner,
    434           validate_shape=validate_shape, use_resource=use_resource,
--> 435           constraint=constraint)
    436 
    437   def _get_partitioned_variable(

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, constraint)
    402           trainable=trainable, collections=collections,
    403           caching_device=caching_device, validate_shape=validate_shape,
--> 404           use_resource=use_resource, constraint=constraint)
    405 
    406     if custom_getter is not None:

~/.opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in _get_single_variable(self, name, shape, dtype, initializer, regularizer, partition_info, reuse, trainable, collections, caching_device, validate_shape, use_resource, constraint)
    741                          "reuse=tf.AUTO_REUSE in VarScope? "
    742                          "Originally defined at:\n\n%s" % (
--> 743                              name, "".join(traceback.format_list(tb))))
    744       found_var = self._vars[name]
    745       if not shape.is_compatible_with(found_var.get_shape()):

ValueError: Variable ppo2_model/pi/c1/w/Adam/ already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "/home/tom/mywork/baselines/baselines/ppo2/ppo2.py", line 60, in __init__
    _train = trainer.apply_gradients(grads_and_var)
  File "/home/tom/mywork/baselines/baselines/ppo2/ppo2.py", line 229, in <lambda>
    max_grad_norm=max_grad_norm)
  File "/home/tom/mywork/baselines/baselines/ppo2/ppo2.py", line 234, in learn
    model = make_model()
williamjqk commented 6 years ago

The discussion in #547 can explain this.