Closed sounansu closed 7 years ago
This project is using it's own snapshot of tensorpack, which is not up-to-date with the upstream tensorpack you would've installed with pip.
Thank you Yuxin!
I changed my PYTHONPATH tensorpack to ternarynet for using ternary's tensorpack.
And I changed some code for tensorflow 1.1.
Finally, I encounter another Traceback
Traceback (most recent call last):
File "./tw-cifar10-resnet.py", line 220, in <module>
SyncMultiGPUTrainer(config).train()
File "/home/sounansu/anaconda3/ternarynet/tensorpack/train/multigpu.py", line 80, in train
self.config.optimizer.apply_gradients(grads, get_global_step_var()),
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 446, in apply_gradients
self._create_slots([_get_variable_for(v) for v in var_list])
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/momentum.py", line 63, in _create_slots
self._zeros_slot(v, "momentum", self._name)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 766, in _zeros_slot
named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 174, in create_zeros_slot
colocate_with_primary=colocate_with_primary)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 146, in create_slot_with_initializer
dtype)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 66, in _create_slot_var
validate_shape=validate_shape)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1049, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 948, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 356, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 341, in _true_getter
use_resource=use_resource)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 671, in _get_single_variable
"VarScope?" % name)
ValueError: Variable conv0/W/Momentum/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
Perhaps, tensorflow optimizer APIs were changed. But SyncMultiGPUTrainer in ternary/tensorpack was not changed. Was is true?
This old version of tensorpack seems to be for TF 0.8, and as you may know there are a lot of incompatible changes since TF1.0. So old tensorpack + new tensorflow
probably won't work.
Upgrading to new tensorpack API is not too hard. There is a doc about it. I've done the following changes to ternarynet to at least make it runnable with latest tensorpack + latest tensorflow:
diff --git i/examples/Ternary-Net/ternary.py w/examples/Ternary-Net/ternary.py
index a1db7f5..ac2d937 100644
--- i/examples/Ternary-Net/ternary.py
+++ w/examples/Ternary-Net/ternary.py
@@ -36,20 +36,19 @@ def tw_ternarize(x, thre):
w_p = tf.get_variable('Wp', collections=[tf.GraphKeys.VARIABLES, 'positives'], initializer=1.0)
w_n = tf.get_variable('Wn', collections=[tf.GraphKeys.VARIABLES, 'negatives'], initializer=1.0)
- tf.scalar_summary(w_p.name, w_p)
- tf.scalar_summary(w_n.name, w_n)
+ tf.summary.scalar(w_p.name, w_p)
+ tf.summary.scalar(w_n.name, w_n)
mask = tf.ones(shape)
- mask_p = tf.select(x > thre_x, tf.ones(shape) * w_p, mask)
- mask_np = tf.select(x < -thre_x, tf.ones(shape) * w_n, mask_p)
- mask_z = tf.select((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)
+ mask_p = tf.where(x > thre_x, tf.ones(shape) * w_p, mask)
+ mask_np = tf.where(x < -thre_x, tf.ones(shape) * w_n, mask_p)
+ mask_z = tf.where((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)
with G.gradient_override_map({"Sign": "Identity", "Mul": "Add"}):
w = tf.sign(x) * tf.stop_gradient(mask_z)
w = w * mask_np
- tf.histogram_summary(w.name, w)
return w
diff --git i/examples/Ternary-Net/tw-cifar10-resnet.py w/examples/Ternary-Net/tw-cifar10-resnet.py
index 8bda57d..d2fe302 100755
--- i/examples/Ternary-Net/tw-cifar10-resnet.py
+++ w/examples/Ternary-Net/tw-cifar10-resnet.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
-# File: cifar10-resnet.py
+# File: tw-cifar10-resnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
import numpy as np
@@ -34,9 +34,9 @@ class Model(ModelDesc):
super(Model, self).__init__()
self.n = n
- def _get_input_vars(self):
- return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
- InputVar(tf.int32, [None], 'label')
+ def _get_inputs(self):
+ return [InputDesc(tf.float32, [None, 32, 32, 3], 'input'),
+ InputDesc(tf.int32, [None], 'label')
]
def _build_graph(self, input_vars):
@@ -112,7 +112,8 @@ class Model(ModelDesc):
tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output')
- cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+ cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
+ labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
wrong = prediction_incorrect(logits, label)
@@ -123,14 +124,20 @@ class Model(ModelDesc):
# weight decay on all W of fc layers
wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
480000, 0.2, True)
- wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
+ wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
add_moving_summary(cost, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W
- writer = tf.train.SummaryWriter('log', graph=tf.get_default_graph())
self.cost = tf.add_n([cost, wd_cost], name='cost')
+ def _get_optimizer(self):
+ lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
+ tf.summary.scalar('learning_rate', lr)
+
+ optimizer = tf.train.MomentumOptimizer(lr, 0.9)
+ return optimizer
+
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
@@ -176,26 +183,20 @@ def get_config(nsize):
sess_config = get_default_sess_config(0.9)
get_global_step_var()
- lr = tf.Variable(0.1, trainable=False, name='learning_rate')
- tf.scalar_summary('learning_rate', lr)
-
- optimizer = tf.train.MomentumOptimizer(lr, 0.9)
return TrainConfig(
- dataset=dataset_train,
- optimizer=optimizer,
- callbacks=Callbacks([
- StatPrinter(),
+ dataflow=dataset_train,
+ callbacks=[
ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError()]),
# PruneRunner(),
ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (82, 0.01), (123, 0.001), (150, 0.0001)])
- ]),
+ ],
session_config=sess_config,
model=Model(n=nsize),
- step_per_epoch=step_per_epoch,
+ steps_per_epoch=step_per_epoch,
max_epoch=400,
)
You are very thank you!
I changed my files same as you.
And I installed newest tensorpack with this commad,
pip install -U git+https://github.com/ppwwyyxx/tensorpack.git
$ export PYTHONPATH=
$ python
Python 3.6.1 |Continuum Analytics, Inc.| (default, May 11 2017, 13:09:58)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import tensorpack
>>> print(tensorpack.__version__)
0.1.9
>>>
But another trace-back was occurred.
$ python tw-cifar10-resnet.py --gpus 0
Traceback (most recent call last):
File "tw-cifar10-resnet.py", line 219, in <module>
SyncMultiGPUTrainer(config).train()
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/base.py", line 93, in train
self.setup()
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/base.py", line 108, in setup
self._setup() # subclass will setup the graph
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/multigpu.py", line 180, in _setup
self.config.tower, lambda: self._get_cost_and_grad()[1], devices)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/multigpu.py", line 79, in build_on_multi_tower
ret.append(func())
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/multigpu.py", line 180, in <lambda>
self.config.tower, lambda: self._get_cost_and_grad()[1], devices)
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/train/feedfree.py", line 67, in _get_cost_and_grad
opt = self.model.get_optimizer()
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/models/model_desc.py", line 143, in get_optimizer
return self._get_optimizer()
File "/home/sounansu/anaconda3/envs/ternary/lib/python3.6/site-packages/tensorpack/models/model_desc.py", line 146, in _get_optimizer
raise NotImplementedError()
NotImplementedError
I changed as blow.
diff --git a/examples/Ternary-Net/ternary.py b/examples/Ternary-Net/ternary.py
index a1db7f5..ac2d937 100644
--- a/examples/Ternary-Net/ternary.py
+++ b/examples/Ternary-Net/ternary.py
@@ -36,20 +36,19 @@ def tw_ternarize(x, thre):
w_p = tf.get_variable('Wp', collections=[tf.GraphKeys.VARIABLES, 'positives'], initializer=1.0)
w_n = tf.get_variable('Wn', collections=[tf.GraphKeys.VARIABLES, 'negatives'], initializer=1.0)
- tf.scalar_summary(w_p.name, w_p)
- tf.scalar_summary(w_n.name, w_n)
+ tf.summary.scalar(w_p.name, w_p)
+ tf.summary.scalar(w_n.name, w_n)
mask = tf.ones(shape)
- mask_p = tf.select(x > thre_x, tf.ones(shape) * w_p, mask)
- mask_np = tf.select(x < -thre_x, tf.ones(shape) * w_n, mask_p)
- mask_z = tf.select((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)
+ mask_p = tf.where(x > thre_x, tf.ones(shape) * w_p, mask)
+ mask_np = tf.where(x < -thre_x, tf.ones(shape) * w_n, mask_p)
+ mask_z = tf.where((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)
with G.gradient_override_map({"Sign": "Identity", "Mul": "Add"}):
w = tf.sign(x) * tf.stop_gradient(mask_z)
w = w * mask_np
- tf.histogram_summary(w.name, w)
return w
diff --git a/examples/Ternary-Net/tw-cifar10-resnet.py b/examples/Ternary-Net/tw-cifar10-resnet.py
index 8bda57d..7423598 100755
--- a/examples/Ternary-Net/tw-cifar10-resnet.py
+++ b/examples/Ternary-Net/tw-cifar10-resnet.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
-# File: cifar10-resnet.py
+# File: tw-cifar10-resnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
import numpy as np
@@ -34,9 +34,9 @@ class Model(ModelDesc):
super(Model, self).__init__()
self.n = n
- def _get_input_vars(self):
- return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
- InputVar(tf.int32, [None], 'label')
+ def _get_inputs(self):
+ return [InputDesc(tf.float32, [None, 32, 32, 3], 'input'),
+ InputDesc(tf.int32, [None], 'label')
]
def _build_graph(self, input_vars):
@@ -112,7 +112,7 @@ class Model(ModelDesc):
tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output')
- cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+ cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
wrong = prediction_incorrect(logits, label)
@@ -123,14 +123,20 @@ class Model(ModelDesc):
# weight decay on all W of fc layers
wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
480000, 0.2, True)
- wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
+ wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
add_moving_summary(cost, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W
- writer = tf.train.SummaryWriter('log', graph=tf.get_default_graph())
self.cost = tf.add_n([cost, wd_cost], name='cost')
+ def _get_optimizer(self):
+ lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
+ tf.summary.scalar('learning_rate', lr)
+
+ optimizer = tf.train.MomentumOptimizer(lr, 0.9)
+ return optimizer
+
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
@@ -176,26 +182,20 @@ def get_config(nsize):
sess_config = get_default_sess_config(0.9)
get_global_step_var()
- lr = tf.Variable(0.1, trainable=False, name='learning_rate')
- tf.scalar_summary('learning_rate', lr)
-
- optimizer = tf.train.MomentumOptimizer(lr, 0.9)
return TrainConfig(
- dataset=dataset_train,
- optimizer=optimizer,
- callbacks=Callbacks([
- StatPrinter(),
+ dataflow=dataset_train,
+ callbacks=[
ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError()]),
# PruneRunner(),
ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (82, 0.01), (123, 0.001), (150, 0.0001)])
- ]),
+ ],
session_config=sess_config,
model=Model(n=nsize),
- step_per_epoch=step_per_epoch,
+ steps_per_epoch=step_per_epoch,
max_epoch=400,
)
Your diff is different from mine.
Your _get_optimizer
function is wrongly indented.
Ohhhhh.
I missed indent.
Very very thanks!
Now, It works fine!
Again, Thank You!
Ahhh, who's on the earth the owner of this repo?!
Hi Chenzhuo!
I tried to train cifar10 by
But, Traceback was occured.
My environment were below. by conda install.
by pip install