DrSleep / tensorflow-deeplab-resnet

DeepLab-ResNet rebuilt in TensorFlow
MIT License
1.25k stars 431 forks source link

[use the npy2ckpt.py to transfer my own resnet50 pre-train model] #182

Open y-kl8 opened 6 years ago

y-kl8 commented 6 years ago

I want to use the npy2ckpt.py to transfer my own resnet50 pre-train model:

the layer name in my pre-train resnet50 model are: bn4c_branch2c

bn5b_branch2b

res3d_branch2b

res2b_branch2b

res2c_branch2a

bn4b_branch2a

res4b_branch2a

bn4a_branch2c

bn4e_branch2a

res3b_branch2c

bn3d_branch2c

res5a_branch1

res4f_branch2b

bn4d_branch2b

res5a_branch2c

bn3c_branch2c

res3c_branch2a

bn5b_branch2a

res4a_branch2a

res4a_branch1

res4a_branch2b

res4a_branch2c

res4e_branch2c

bn5b_branch2c

res4c_branch2c

bn5a_branch2b

bn4b_branch2c

res4d_branch2c

bn3d_branch2b

bn4a_branch1

res5c_branch2b

bn5a_branch2c

fc1000

bn3a_branch2b

conv1

res5a_branch2b

res2b_branch2c

res2a_branch1

res5b_branch2c

bn5a_branch2a

res5a_branch2a

bn3b_branch2c

bn4d_branch2c

bn5c_branch2a

res3b_branch2a

bn4f_branch2c

bn3c_branch2b

res3c_branch2c

bn3a_branch2c

bn2b_branch2b

res4e_branch2b

res5c_branch2c

res2a_branch2a

bn5c_branch2b

bn4a_branch2a

res2a_branch2b

res4b_branch2b

bn3b_branch2b

res2c_branch2c

res4c_branch2a ........... so, I change the tensorflow-deeplab-resnet/deeplab_resnet/model.py like:

from kaffe.tensorflow import Network import tensorflow as tf

class DeepLabResNetModel(Network): def setup(self, is_training, num_classes): '''Network definition.

    Args:
      is_training: whether to update the running mean and variance of the batch normalisation layer.
                   If the batch size is small, it is better to keep the running mean and variance of 
                   the-pretrained model frozen.
      num_classes: number of classes to predict (including background).
    '''
    (self.feed('data')
         .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1')
         .max_pool(3, 3, 2, 2, name='pool1')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1'))

    (self.feed('pool1')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a')
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c'))

    (self.feed('bn2a_branch1', 
               'bn2a_branch2c')
         .add(name='res2a')
         .relu(name='res2a_relu')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a')
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c'))

    (self.feed('res2a_relu', 
               'bn2b_branch2c')
         .add(name='res2b')
         .relu(name='res2b_relu')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a')
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c'))

    (self.feed('res2b_relu', 
               'bn2c_branch2c')
         .add(name='res2c')
         .relu(name='res2c_relu')
         .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1'))

    (self.feed('res2c_relu')
         .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a')
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c'))

    (self.feed('bn3a_branch1', 
               'bn3a_branch2c')
         .add(name='res3a')
         .relu(name='res3a_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b_branch2a')
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b_branch2b')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b_branch2c'))

    (self.feed('res3a_relu', 
               'bn3b_branch2c')
         .add(name='res3b')
         .relu(name='res3b_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3c_branch2a')
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3c_branch2b')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn3c_branch2c'))

    (self.feed('res3b_relu', 
               'bn3c_branch2c')
         .add(name='res3c')
         .relu(name='res3c_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3d_branch2a')
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3d_branch2b')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn3d_branch2c'))

    (self.feed('res3c_relu', 
               'bn3d_branch2c')
         .add(name='res3d')
         .relu(name='res3d_relu')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1'))

    (self.feed('res3d_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c'))

    (self.feed('bn4a_branch1', 
               'bn4a_branch2c')
         .add(name='res4a')
         .relu(name='res4a_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b_branch2c'))

    (self.feed('res4a_relu', 
               'bn4b_branch2c')
         .add(name='res4b')
         .relu(name='res4b_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4c_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4c_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4c_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4c_branch2c'))

    (self.feed('res4b_relu', 
               'bn4c_branch2c')
         .add(name='res4c')
         .relu(name='res4c_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4d_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4d_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4d_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4d_branch2c'))

    (self.feed('res4c_relu', 
               'bn4d_branch2c')
         .add(name='res4d')
         .relu(name='res4d_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4e_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4e_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4e_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4e_branch2c'))

    (self.feed('res4d_relu', 
               'bn4e_branch2c')
         .add(name='res4e')
         .relu(name='res4e_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4f_branch2a')
         .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4f_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4f_branch2b')
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn4f_branch2c'))

    (self.feed('res4e_relu', 
               'bn4f_branch2c')
         .add(name='res4f')
         .relu(name='res4f_relu')
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1'))

    (self.feed('res4f_relu')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a')
         .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b')
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c'))

    (self.feed('bn5a_branch1', 
               'bn5a_branch2c')
         .add(name='res5a')
         .relu(name='res5a_relu')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a')
         .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b')
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c'))

    (self.feed('res5a_relu', 
               'bn5b_branch2c')
         .add(name='res5b')
         .relu(name='res5b_relu')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
         .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a')
         .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b')
         .batch_normalization(activation_fn=tf.nn.relu,  is_training=is_training, name='bn5c_branch2b',)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
         .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c'))

    (self.feed('res5b_relu', 
               'bn5c_branch2c')
         .add(name='res5c')
         .relu(name='res5c_relu')
         .atrous_conv(3, 3, num_classes, 6, padding='SAME', relu=False, name='fc1_voc12_c0'))

    (self.feed('res5c_relu')
         .atrous_conv(3, 3, num_classes, 12, padding='SAME', relu=False, name='fc1_voc12_c1'))

    (self.feed('res5c_relu')
         .atrous_conv(3, 3, num_classes, 18, padding='SAME', relu=False, name='fc1_voc12_c2'))

    (self.feed('res5c_relu')
         .atrous_conv(3, 3, num_classes, 24, padding='SAME', relu=False, name='fc1_voc12_c3'))

    (self.feed('fc1_voc12_c0', 
               'fc1_voc12_c1', 
               'fc1_voc12_c2', 
               'fc1_voc12_c3')
         .add(name='fc1_voc12'))

And when I run the npy2ckpt.py, there is a error:

2018-05-24 15:03:19.815301: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations. 2018-05-24 15:03:19.815332: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations. 2018-05-24 15:03:19.815339: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations. 2018-05-24 15:03:19.815344: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations. 2018-05-24 15:03:19.815349: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations. 2018-05-24 15:03:21.625233: I tensorflow/core/common_runtime/gpu/gpu_device.cc:940] Found device 0 with properties: name: TITAN V major: 7 minor: 0 memoryClockRate (GHz) 1.455 pciBusID 0000:04:00.0 Total memory: 11.78GiB Free memory: 11.36GiB 2018-05-24 15:03:21.859880: W tensorflow/stream_executor/cuda/cuda_driver.cc:523] A non-primary context 0x4c00e90 exists before initializing the StreamExecutor. We haven't verified StreamExecutor works with that. 2018-05-24 15:03:21.860587: I tensorflow/core/common_runtime/gpu/gpu_device.cc:940] Found device 1 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate (GHz) 1.62 pciBusID 0000:03:00.0 Total memory: 10.91GiB Free memory: 10.75GiB 2018-05-24 15:03:21.860614: I tensorflow/core/common_runtime/gpu/gpu_device.cc:832] Peer access not supported between device ordinals 0 and 1 2018-05-24 15:03:21.860622: I tensorflow/core/common_runtime/gpu/gpu_device.cc:832] Peer access not supported between device ordinals 1 and 0 2018-05-24 15:03:21.860631: I tensorflow/core/common_runtime/gpu/gpu_device.cc:961] DMA: 0 1 2018-05-24 15:03:21.860638: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: Y N 2018-05-24 15:03:21.860660: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 1: N Y 2018-05-24 15:03:21.860670: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: TITAN V, pci bus id: 0000:04:00.0) 2018-05-24 15:03:21.860677: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:1) -> (device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:03:00.0) Traceback (most recent call last): File "npy2ckpt.py", line 69, in main() File "npy2ckpt.py", line 62, in main net.load(args.npy_path, sess) File "/home/yangyuchao/zip/tensorflow-deeplab-resnet-master/kaffe/tensorflow/network.py", line 66, in load session.run(var.assign(data)) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 516, in assign return state_ops.assign(self._variable, value, use_locking=use_locking) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/ops/state_ops.py", line 271, in assign validate_shape=validate_shape) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 45, in assign use_locking=use_locking, name=name) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op op_def=op_def) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2508, in create_op set_shapes_for_outputs(ret) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1873, in set_shapes_for_outputs shapes = shape_func(op) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1823, in call_with_requiring return call_cpp_shape_fn(op, require_shape_fn=True) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn debug_python_shape_fn, require_shape_fn) File "/home/yangyuchao/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 676, in _call_cpp_shape_fn_impl raise ValueError(err.message) ValueError: Dimension 0 in both shapes must be equal, but are 2048 and 64 for 'bn5a_branch1_1/Assign' (op: 'Assign') with input shapes: [2048], [64].

DrSleep commented 6 years ago

looks like your 'bn5a_branch1' has 64 features, while in your model definition it is 2048

  .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1')
  .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1