Closed fastalgo closed 4 years ago
It is worth noting that your code worked well in MNIST training with lenet.
Hi, the problem is related with not implemented parts of the optimizer methods, so it is not a bug but just a result of not implemented stuff:
def _apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
def _resource_apply_dense(self, grad, handle):
raise NotImplementedError("Resource apply dense not implemented.")
def _resource_apply_sparse(self, grad, handle, indices):
raise NotImplementedError("Resource apply sparce not implemented.")
This means that Resnet requires some sparse operations to work, which I did not implement for my demos.
Additionally, from my observation the AdaptiveNormalizedSGD
does not work very well for more complex task. It tends to decrease the lr
to fast and the learning stops.
**Hi, I want to use your solver in CIFAR10 training with ResNet-32. I used the tensorflow official code (https://github.com/tensorflow/models/tree/master/official/resnet).
I only changed one line of code in https://github.com/tensorflow/models/blob/master/official/resnet/resnet_run_loop.py**
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum) optimizer = tf_opt.AdaptiveNormalizedSGD(lr=0.1, norm_type='std')
but I got the following errors:
Traceback (most recent call last): File "cifar10_main.py", line 260, in
absl_app.run(main)
File "/home/user/tensorflow3/lib/python3.5/site-packages/absl/app.py", line 274, in run
_run_main(main, argv)
File "/home/user/tensorflow3/lib/python3.5/site-packages/absl/app.py", line 238, in _run_main
sys.exit(main(argv))
File "cifar10_main.py", line 254, in main
run_cifar(flags.FLAGS)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/models_base/models/official/utils/logs/logger.py", line 100, in benchmark_context
yield
File "cifar10_main.py", line 254, in main
run_cifar(flags.FLAGS)
File "cifar10_main.py", line 249, in run_cifar
shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
File "/home/user/models_base/models/official/resnet/resnet_run_loop.py", line 415, in resnet_main
max_steps=flags_obj.max_train_steps)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 363, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 841, in _train_model
return self._train_model_distributed(input_fn, hooks, saving_listeners)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 977, in _train_model_distributed
saving_listeners)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5265, in get_controller
yield g
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5060, in get_controller
yield default
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5265, in get_controller
yield g
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 977, in _train_model_distributed
saving_listeners)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/distribute.py", line 304, in exit
self._var_creator_scope.exit(exception_type, exception_value, traceback)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 2283, in variable_creator_scope
yield
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2939, in _variable_creator_scope
yield
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 2283, in variable_creator_scope
yield
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 884, in _train_model_distributed
self.config)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/distribute.py", line 756, in call_for_each_tower
return self._call_for_each_tower(fn, *args, kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/contrib/distribute/python/one_device_strategy.py", line 78, in _call_for_each_tower
return fn(*args, *kwargs)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 4338, in device
yield
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/contrib/distribute/python/one_device_strategy.py", line 78, in _call_for_each_tower
return fn(args, kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 831, in _call_model_fn
model_fn_results = self._model_fn(features=features, kwargs)
File "cifar10_main.py", line 224, in cifar10_model_fn
dtype=params['dtype']
File "/home/user/models_base/models/official/resnet/resnet_run_loop.py", line 296, in resnet_model_fn
minimize_op = optimizer.minimize(loss, global_step)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 424, in minimize
name=name)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 572, in apply_gradients
self._distributed_apply, grads_and_vars, global_step, name)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/distribute.py", line 1045, in merge_call
return self._merge_call(merge_fn, *args, *kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/distribute.py", line 1052, in _merge_call
return merge_fn(self._distribution_strategy, args, kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 729, in _distributed_apply
return apply_updates
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5991, in exit
self._name_scope.exit(type_arg, value_arg, traceback_arg)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 4115, in name_scope
yield "" if new_stack is None else new_stack + "/"
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 702, in _distributed_apply
for grad, var in grads_and_vars
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 703, in
for op in distribution.unwrap(distribution.update(var, update, grad))
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/distribute.py", line 838, in update
return self._update(var, fn, *args, kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/contrib/distribute/python/one_device_strategy.py", line 99, in _update
return fn(var, *args, *kwargs)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 4338, in device
yield
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/contrib/distribute/python/one_device_strategy.py", line 99, in _update
return fn(var, args, kwargs)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 695, in update
return p.update_op(self, g)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5991, in exit
self._name_scope.exit(type_arg, value_arg, traceback_arg)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 4115, in name_scope
yield "" if new_stack is None else new_stack + "/"
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 695, in update
return p.update_op(self, g)
File "/usr/lib/python3.5/contextlib.py", line 77, in exit
self.gen.throw(type, value, traceback)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/eager/context.py", line 514, in device_policy
yield
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 695, in update
return p.update_op(self, g)
File "/home/user/tensorflow3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 165, in update_op
update_op = optimizer._resource_apply_dense(g, self._v)
File "/home/user/deep-learning-notes/max-normed-optimizer/src/tf_optimizer.py", line 350, in _resource_apply_dense
raise NotImplementedError("Resource apply dense not implemented.")
NotImplementedError: Resource apply dense not implemented.