Closed ilkarman closed 6 years ago
Hi ilkarman~
Seems to complain about finding biases:
I turned off bias with _slim.argscope. Try like this, plz.
with slim.arg_scope(densenet.densenet_arg_scope):
logits, endpoints = densenet.densenet121(
X,
num_classes=1000,
[is_training=True,
reuse=None)
Also was curious if there was a reason that shape is channels-last, since I thought channels-first is faster for cuDNN training?
You're right. But I just used slim's default data format, NHWC. If you want to use NCHW, you can do it simply adding data_format argument to slim.arg_scope.
Thanks~
Thanks that has seemed to do the trick!
I noticed that I wasn't able to pass num_classes=None, however. I want to add a fully-connected layer which I later train with sigmoid_cross_entropy() loss and I think in the code this defaults to softmax activation.
Wanted to ask if this approach looks ok:
# Extract checkpoint
CHKPT_DIR = 'tfdensenet/'
if not os.path.isdir(CHKPT_DIR):
with tarfile.open("tf-densenet121.tar.gz") as t:
t.extractall(CHKPT_DIR)
# Load variables into model (without this nothing is restored)
tf.train.get_or_create_global_step()
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, WIDTH, HEIGHT, CHANNELS])
y = tf.placeholder(tf.float32, shape=[None, CLASSES])
# Import symbol
dense_args = densenet.densenet_arg_scope()
print(dense_args) # Add NCHW later
with slim.arg_scope(dense_args):
logits, _ = densenet.densenet121(X, num_classes=CLASSES, is_training=True, reuse=None)
# Collect variables to restore from checkpoint
variables_to_restore = slim.get_variables_to_restore(exclude=['densenet121/logits', 'predictions'])
#print(variables_to_restore)
model_path = os.path.join(CHKPT_DIR, "tf-densenet121.ckpt")
print(model_path)
init_fn = slim.assign_from_checkpoint_fn(model_path, variables_to_restore)
# Reshape logits to (None, CLASSES) since my label is (None, CLASSES)
sym = tf.reshape(logits, shape=[-1, CLASSES])
# Loss
loss_fn = tf.nn.sigmoid_cross_entropy_with_logits(logits=sym, labels=y)
loss = tf.reduce_mean(loss_fn)
optimizer = tf.train.AdamOptimizer(LR, beta1=0.9, beta2=0.999)
training_op = optimizer.minimize(loss)
print("Loading pre-trained weights")
sess = tf.Session()
init_fn(sess) # Load from checkpoint
# Initialise uninitialised vars (FC layer & Adam)
init_uninitialized(sess)
Previously with resnet50 I was able to do it like so:
def get_symbol(model_name, in_tensor, chkpoint=CHKPOINT, out_features=CLASSES):
if model_name == 'resnet50':
# Load variables into model (without this nothing is restored)
tf.train.get_or_create_global_step()
# Import symbol
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
base_model, _ = resnet_v1.resnet_v1_50(X, None, is_training=True)
# Collect variables to restore from checkpoint
variables_to_restore = slim.get_variables_to_restore()
#print(variables_to_restore)
init_fn = slim.assign_from_checkpoint_fn(chkpoint, variables_to_restore)
# Attach extra layers
fc = tf.layers.dense(base_model, out_features, name='output')
# Activation function will be included in loss
sym = tf.reshape(fc, shape=[-1, out_features])
elif model_name == 'densenet121':
raise ValueError("Densenet is not yet implemented")
# TODO: https://github.com/pudae/tensorflow-densenet/issues/10
else:
raise ValueError("Unknown model-name")
return sym, init_fn
def init_symbol(sym, out_tensor, lr=LR):
loss_fn = tf.nn.sigmoid_cross_entropy_with_logits(logits=sym, labels=y)
loss = tf.reduce_mean(loss_fn)
optimizer = tf.train.AdamOptimizer(lr, beta1=0.9, beta2=0.999)
training_op = optimizer.minimize(loss)
return training_op, loss
def init_uninitialized(sess):
global_vars = tf.global_variables()
is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f]
if len(not_initialized_vars):
#print("Initialising: ", not_initialized_vars)
sess.run(tf.variables_initializer(not_initialized_vars))
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, WIDTH, HEIGHT, CHANNELS])
y = tf.placeholder(tf.float32, shape=[None, CLASSES])
# Create symbol
sym, init_fn = get_symbol(model_name='resnet50', in_tensor=X)
# Create training operation
model, loss = init_symbol(sym=sym, out_tensor=y)
# Launch session and load model from checkpoint
sess = tf.Session()
# Temp
if PRETRAINED_WEIGHTS:
print("Loading pre-trained weights")
init_fn(sess) # Load from checkpoint
# Initialise uninitialised vars (FC layer & Adam)
init_uninitialized(sess)
Edit: When I later want to use this model for scoring (having trained it on my data-set), would it be possible to pass in a training-flag (for dropout and batch-norm to act correctly) to feed-dict or would the process be a bit more complicated?
Thanks every much! Ilia
Hi Pudae, thanks a lot for this super-neat implementation. I am trying to load the model (without the pre-processing functions, etc) so that I can chop off the last fc layer and stick my own (to re-train on my data-set). However, I was having a bit of an issue loading the model without any of the functions attached:
Seems to complain about finding biases:
However, the end-points seem ok:
('densenet121/dense_block4', <tf.Tensor 'densenet121/dense_block4/conv_block16/concat:0' shape=(?, 7, 7, 1024) dtype=float32>), ('densenet121/logits', <tf.Tensor 'densenet121/logits/Relu:0' shape=(?, 1, 1, 1000) dtype=float32>), ('predictions', <tf.Tensor 'densenet121/predictions/Reshape_1:0' shape=(?, 1, 1, 1000) dtype=float32>)])
So that I can just extract: 'densenet121/dense_block4/conv_block16/concat:0' and add my own: ('densenet121/logits', <tf.Tensor 'densenet121/logits/Relu:0' shape=(?, 1, 1, 16) dtype=float32>), ('predictions', <tf.Tensor 'densenet121/predictions/Reshape_1:0' shape=(?, 1, 1, 16) dtype=float32>)])
Also was curious if there was a reason that shape is channels-last, since I thought channels-first is faster for cuDNN training?
Thanks