DetectionTeamUCAS / FPN_Tensorflow

This is a tensorflow re-implementation of Feature Pyramid Networks for Object Detection.
https://github.com/DetectionTeamUCAS/FPN_Tensorflow
MIT License
347 stars 132 forks source link

能请教一下mobilenet+FPN的东西吗? #81

Closed rw1995 closed 5 years ago

rw1995 commented 5 years ago

大佬,关于代码中resnet的部分我想换成mobilenet。所以,我模仿resnet将networks中的mobilenetv2.py的代码进行简单的更改,我是这样操作的 `

-- coding: utf-8 --

from future import absolute_import, print_function, division import tensorflow.contrib.slim as slim import tensorflow as tf

from libs.configs import cfgs from libs.networks.mobilenet import mobilenet_v2 from libs.networks.mobilenet.mobilenet import training_scope from libs.networks.mobilenet.mobilenet_v2 import op from libs.networks.mobilenet.mobilenet_v2 import ops expand_input = ops.expand_input_by_factor

V2_BASE_DEF_1 = dict( defaults={

Note: these parameters of batch norm affect the architecture

    # that's why they are here and not in training_scope.
    (slim.batch_norm,): {'center': True, 'scale': True},
    (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
        'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
    },
    (ops.expanded_conv,): {
        'expansion_size': expand_input(6),
        'split_expansion': 1,
        'normalizer_fn': slim.batch_norm,
        'residual': True
    },
    (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
    op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
    op(ops.expanded_conv,
       expansion_size=expand_input(1, divisible_by=1),
       num_outputs=16, scope='expanded_conv'),
    op(ops.expanded_conv, stride=2, num_outputs=24, scope='expanded_conv_1'),
    op(ops.expanded_conv, stride=1, num_outputs=24, scope='expanded_conv_2')
],

) V2_BASE_DEF_2 = dict( defaults={

Note: these parameters of batch norm affect the architecture

    # that's why they are here and not in training_scope.
    (slim.batch_norm,): {'center': True, 'scale': True},
    (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
        'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
    },
    (ops.expanded_conv,): {
        'expansion_size': expand_input(6),
        'split_expansion': 1,
        'normalizer_fn': slim.batch_norm,
        'residual': True
    },
    (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
    op(ops.expanded_conv, stride=2, num_outputs=32, scope='expanded_conv_3'),
    op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_4'),
    op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_5')
],

) V2_BASE_DEF_3 = dict( defaults={

Note: these parameters of batch norm affect the architecture

    # that's why they are here and not in training_scope.
    (slim.batch_norm,): {'center': True, 'scale': True},
    (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
        'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
    },
    (ops.expanded_conv,): {
        'expansion_size': expand_input(6),
        'split_expansion': 1,
        'normalizer_fn': slim.batch_norm,
        'residual': True
    },
    (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
    op(ops.expanded_conv, stride=2, num_outputs=64, scope='expanded_conv_6'),
    op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_7'),
    op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_8'),
    op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_9')
],

) V2_BASE_DEF_4 = dict( defaults={

Note: these parameters of batch norm affect the architecture

    # that's why they are here and not in training_scope.
    (slim.batch_norm,): {'center': True, 'scale': True},
    (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
        'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
    },
    (ops.expanded_conv,): {
        'expansion_size': expand_input(6),
        'split_expansion': 1,
        'normalizer_fn': slim.batch_norm,
        'residual': True
    },
    (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
    op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_10'),
    op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_11'),
    op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_12')
],

) V2_HEAD_DEF = dict( defaults={

Note: these parameters of batch norm affect the architecture

    # that's why they are here and not in training_scope.
    (slim.batch_norm,): {'center': True, 'scale': True},
    (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
        'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
    },
    (ops.expanded_conv,): {
        'expansion_size': expand_input(6),
        'split_expansion': 1,
        'normalizer_fn': slim.batch_norm,
        'residual': True
    },
    (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
    op(ops.expanded_conv, stride=2, num_outputs=160, scope='expanded_conv_13'),
    op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_14'),
    op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_15'),
    op(ops.expanded_conv, stride=1, num_outputs=320, scope='expanded_conv_16'),
    op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280, scope='Conv_1')
],

) def mobilenetv2_scope(is_training=True, trainable=True, weight_decay=0.00004, stddev=0.09, dropout_keep_prob=0.8, bn_decay=0.997): """Defines Mobilenet training scope. In default. We do not use BN

ReWrite the scope. """ batch_norm_params = { 'is_training': False, 'trainable': False, 'decay': bn_decay, } with slim.arg_scope(training_scope(is_training=is_training, weight_decay=weight_decay)): with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_conv2d], trainable=trainable): with slim.arg_scope([slim.batch_norm], **batch_norm_params) as sc: return sc

def fusion_two_layer(C_i, P_j, scope): ''' i = j+1 :param C_i: shape is [1, h, w, c] :param P_j: shape is [1, h/2, w/2, 256] :return: P_i ''' with tf.variable_scope(scope): levelname = scope.split('')[1] h, w = tf.shape(C_i)[1], tf.shape(C_i)[2] upsample_p = tf.image.resize_bilinear(P_j, size=[h, w], name='upsample'+level_name) reduce_dim_c = slim.conv2d(C_i, num_outputs=256, kernel_size=[1, 1], stride=1, scope='reducedim'+level_name) add_f = 0.5upsample_p + 0.5reduce_dim_c return add_f

根据V2_DEF的配置在搭建网络

def mobilenetv2_base(img_batch, is_training=True): with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)): print("11111111111111111111111111111111232323")#执行了 C2, endpoints_1 = mobilenet_v2.mobilenet_base(input_tensor=img_batch, num_classes=None, is_training=False, depth_multiplier=1.0, scope='MobilenetV2', conv_defs=V2_BASE_DEF_1, finegrain_classification_mode=False) print("1111111111111111111111111111111111111") C3, endpoints_2 = mobilenet_v2.mobilenet_base(input_tensor=C2, num_classes=None, is_training=False, depth_multiplier=1.0, scope='MobilenetV2', conv_defs=V2_BASE_DEF_2, finegrain_classification_mode=False) C4, endpoints_3 = mobilenet_v2.mobilenet_base(input_tensor=C3, num_classes=None, is_training=False, depth_multiplier=1.0, scope='MobilenetV2', conv_defs=V2_BASE_DEF_3, finegrain_classification_mode=False) C5, endpoints_4 = mobilenet_v2.mobilenet_base(input_tensor=C4, num_classes=None, is_training=False, depth_multiplier=1.0, scope='MobilenetV2', conv_defs=V2_BASE_DEF_4, finegrain_classification_mode=False) feature_dict={'C2':endpoints_1['{}'.format('mobilenetv2')], 'C3':endpoints_2['{}'.format('mobilenetv2')], 'C4':endpoints_3['{}'.format('mobilenetv2')], 'C5':endpoints_4['{}'.format('mobilenetv2')], }

pyramid_dict = {}
with tf.variable_scope('build_pyramid'):
    with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY),
                        activation_fn=None, normalizer_fn=None):
        P5 = slim.conv2d(C5,
                         num_outputs=256,
                         kernel_size=[1, 1],
                         stride=1, scope='build_P5')
        if "P6" in cfgs.LEVLES:
            P6 = slim.max_pool2d(P5, kernel_size=[1, 1], stride=2, scope='build_P6')
            pyramid_dict['P6'] = P6

        pyramid_dict['P5'] = P5
        for level in range(4, 1, -1):  # build [P4, P3, P2]
            # []里面的是key,=后面是进行内容
            pyramid_dict['P%d' % level] = fusion_two_layer(C_i=feature_dict["C%d" % level],
                                                           P_j=pyramid_dict["P%d" % (level + 1)],
                                                           scope='build_P%d' % level)
        for level in range(4, 1, -1):
            pyramid_dict['P%d' % level] = slim.conv2d(pyramid_dict['P%d' % level],
                                                      num_outputs=256, kernel_size=[3, 3], padding="SAME",
                                                      stride=1, scope="fuse_P%d" % level)
        # for level in range(5, 1, -1):
        #     add_heatmap(
        #     pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]

def mobilenetv2_head(inputs, is_training=True): with slim.arg_scope(mobilenetv2_scope(is_training=istraining, trainable=True)): net, = mobilenet_v2.mobilenet(input_tensor=inputs, num_classes=None, is_training=False, depth_multiplier=1.0, scope='MobilenetV2', conv_defs=V2_HEAD_DEF, finegrain_classification_mode=False)

    net = tf.squeeze(net, [1, 2])
    return net

理论上貌似没啥错吧=-=,但是运行时会产生报错, Traceback (most recent call last): File "train.py", line 186, in train() File "train.py", line 48, in train gtboxes_batch=gtboxes_and_label) File "../libs/networks/build_whole_network.py", line 376, in build_whole_detection_network P_list = self.build_base_network(input_img_batch) # [P2, P3, P4, P5, P6] File "../libs/networks/build_whole_network.py", line 37, in build_base_network return mobilenet_v2.mobilenetv2_base(input_img_batch, is_training=self.is_training) File "../libs/networks/mobilenet_v2.py", line 183, in mobilenetv2_base finegrain_classification_mode=False) File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args return func(args, current_args) File "../libs/networks/mobilenet/mobilenet_v2.py", line 162, in mobilenet_base base_only=True, kwargs) File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args return func(args, current_args) File "../libs/networks/mobilenet/mobilenet_v2.py", line 154, in mobilenet kwargs) File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args return func(args, current_args) File "../libs/networks/mobilenet/mobilenet.py", line 325, in mobilenet net, end_points = mobilenet_base(inputs, scope=scope, mobilenet_args) File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args return func(args, **current_args) File "../libs/networks/mobilenet/mobilenet.py", line 198, in mobilenet_base _set_arg_scope_defaults(conv_defs_defaults), \ File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/contextlib.py", line 59, in enter return next(self.gen) File "../libs/networks/mobilenet/mobilenet.py", line 90, in _set_arg_scope_defaults func, default_arg = items[0] TypeError: 'dict_items' object does not support indexing

` 错误上写着 字典对象不支持索引? 实在是理不明白了,希望能有一些指点

rw1995 commented 5 years ago

代码太长了=-=上面显示不是很好,简单说下就是 1.将V2_BASE_DEF拆为V2_BASE_DEF_1、V2_BASE_DEF_2、V2_BASE_DEF_3、V2_BASE_DEF_4, 2.然后模仿resnet,将feature_to_crop逐个换为C2.C3.C4.C5\ 3.copy大佬你的关于融合代码fusion_two_layer,后续的也类似 然后报错了 最后还是很感谢你写的这份代码~

yangxue0827 commented 5 years ago

https://github.com/DetectionTeamUCAS/FPN_Tensorflow/blob/master/libs/networks/mobilenet/mobilenet.py
在90行插入一行试一下 items=list(items) @rw1995

rw1995 commented 5 years ago

你好,它items=list(items)这是有效的 但出现了新的问题,问题出现在format处 ` Traceback (most recent call last): File "train.py", line 186, in train() File "train.py", line 48, in train gtboxes_batch=gtboxes_and_label) File "../libs/networks/build_whole_network.py", line 376, in build_whole_detection_network P_list = self.build_base_network(input_img_batch) # [P2, P3, P4, P5, P6] File "../libs/networks/build_whole_network.py", line 37, in build_base_network return mobilenet_v2.mobilenetv2_base(input_img_batch, is_training=self.is_training) File "../libs/networks/mobilenet_v2.py", line 212, in mobilenetv2_base feature_dict={'C2':endpoints_1['{}/expanded_conv_2'.format('MobilenetV2_2')], KeyError: 'MobilenetV2_2/expanded_conv_2'

我大概知道是在format处出现了问题,所以我对C2进行了打印 //C2 Tensor("MobilenetV2/expanded_conv_1/output:0", shape=(1, ?, ?, 24), dtype=float32)

//end_point2 {'layer_2/depthwise_output': <tf.Tensor 'MobilenetV2/expanded_conv/depthwise_output:0' shape=(1, ?, ?, 32) dtype=float32>, 'layer_1': <tf.Tensor 'MobilenetV2/Conv/Relu6:0' shape=(1, ?, ?, 32) dtype=float32>, 'layer_3/expansion_output': <tf.Tensor 'MobilenetV2/expanded_conv_1/expansion_output:0' shape=(1, ?, ?, 96) dtype=float32>, 'layer_3/output': <tf.Tensor 'MobilenetV2/expanded_conv_1/output:0' shape=(1, ?, ?, 24) dtype=float32>, 'layer_2': <tf.Tensor 'MobilenetV2/expanded_conv/output:0' shape=(1, ?, ?, 16) dtype=float32>, 'layer_2/output': <tf.Tensor 'MobilenetV2/expanded_conv/output:0' shape=(1, ?, ?, 16) dtype=float32>, 'layer_3/depthwise_output': <tf.Tensor 'MobilenetV2/expanded_conv_1/depthwise_output:0' shape=(1, ?, ?, 96) dtype=float32>, 'layer_3': <tf.Tensor 'MobilenetV2/expanded_conv_1/output:0' shape=(1, ?, ?, 24) dtype=float32>} `

然后我与resnet的c2进行比对

` //C2

Tensor("resnet_v1_50_1/block1/unit_3/bottleneck_v1/Relu:0", shape=(1, ?, ?, 256), dtype=float32)

//end_point_2

OrderedDict([('resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut', <tf.Tensor 'resnet_v1_50_1/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/FusedBatchNorm:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_1/bottleneck_v1/conv1', <tf.Tensor 'resnet_v1_50_1/block1/unit_1/bottleneck_v1/conv1/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_1/bottleneck_v1/conv2', <tf.Tensor 'resnet_v1_50_1/block1/unit_1/bottleneck_v1/conv2/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_1/bottleneck_v1/conv3', <tf.Tensor 'resnet_v1_50_1/block1/unit_1/bottleneck_v1/conv3/BatchNorm/FusedBatchNorm:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_1/bottleneck_v1', <tf.Tensor 'resnet_v1_50_1/block1/unit_1/bottleneck_v1/Relu:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_2/bottleneck_v1/conv1', <tf.Tensor 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/conv1/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_2/bottleneck_v1/conv2', <tf.Tensor 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/conv2/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_2/bottleneck_v1/conv3', <tf.Tensor 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/conv3/BatchNorm/FusedBatchNorm:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_2/bottleneck_v1', <tf.Tensor 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/Relu:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_3/bottleneck_v1/conv1', <tf.Tensor 'resnet_v1_50_1/block1/unit_3/bottleneck_v1/conv1/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_3/bottleneck_v1/conv2', <tf.Tensor 'resnet_v1_50_1/block1/unit_3/bottleneck_v1/conv2/Relu:0' shape=(1, ?, ?, 64) dtype=float32>), ('resnet_v1_50/block1/unit_3/bottleneck_v1/conv3', <tf.Tensor 'resnet_v1_50_1/block1/unit_3/bottleneck_v1/conv3/BatchNorm/FusedBatchNorm:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1/unit_3/bottleneck_v1', <tf.Tensor 'resnet_v1_50_1/block1/unit_3/bottleneck_v1/Relu:0' shape=(1, ?, ?, 256) dtype=float32>), ('resnet_v1_50/block1', <tf.Tensor 'resnet_v1_50_1/block1/unit_3/bottleneck_v1/Relu:0' shape=(1, ?, ?, 256) dtype=float32>)])

`

resnet中resnet_v1_50_1/block1/unit_3/bottleneck_v1/Relu:0这类的是有一定规律的,但是在mobilenet中不仅有MobilenetV2/expanded_conv_1/还有MobilenetV2/expanded_conv,是不是前面关于V2_BASE_DEF直接拆为V2_BASE_DEF_1、2/3/4是错误的? 望请教,大佬

rw1995 commented 5 years ago

可否告知,为什么format中这个的类似这个'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],为什么是这么书写的

yangxue0827 commented 5 years ago

这个我是按照resnet的scop规律写的,可能mobilenet不适用,你完全可以一个一个单独写 @rw1995

rw1995 commented 5 years ago

好的,这个我可能需要暂时放弃一段时间,因为功底太差了,哈哈哈哈

rw1995 commented 5 years ago

最后还是很感谢大佬你的指教,~~~~~多谢~~~~~

rw1995 commented 5 years ago

大佬,我重新编辑了mobilenetv2,并且在build_whole_network.py中的img_shape = tf.shape(input_img_batch),后面我加入了print(input_img_batch)来进行测试打印是否有P2-P6,是有的。 打印结果如下 [<tf.Tensor 'mobilenetv2/build_pyramid/fuse_P2/BiasAdd:0' shape=(1, ?, ?, 256) dtype=float32>, <tf.Tensor 'mobilenetv2/build_pyramid/fuse_P3/BiasAdd:0' shape=(1, ?, ?, 256) dtype=float32>, <tf.Tensor 'mobilenetv2/build_pyramid/fuse_P4/BiasAdd:0' shape=(1, ?, ?, 256) dtype=float32>, <tf.Tensor 'mobilenetv2/build_pyramid/build_P5/BiasAdd:0' shape=(1, ?, ?, 256) dtype=float32>, <tf.Tensor 'mobilenetv2/build_pyramid/build_P6/MaxPool:0' shape=(1, ?, ?, 256) dtype=float32>] 但是还是报错了。简直不能忍 报错代码如下 Traceback (most recent call last): File "train.py", line 186, in <module> train() File "train.py", line 48, in train gtboxes_batch=gtboxes_and_label) File "../libs/networks/build_whole_network.py", line 529, in build_whole_detection_network img_shape=img_shape) File "/home/rw/anaconda3/envs/tf1.10/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 431, in __iter__ "Tensor objects are not iterable when eager execution is not " TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn. 这是迭代图像的问题? 在我重新编辑的mobilnetv2+FPN中,涉及到这个了吗?这和我的代码有关系吗?黑人问号脸?? 同时,因为重新编辑的,我将ckpt的预训练载给设置了None,restorer也设置了None

rw1995 commented 5 years ago

我只是融合了Mobilenetv2模型部分和FPN部分的代码,并替换掉原来的,使得整个mobilenetv2只剩下模型,不调用其他的py文件了(除了cfgs.py)

rw1995 commented 5 years ago

暂时解决了正在训练,loss偏高,如果效果好了,我发出来

rw1995 commented 5 years ago

效果只能达到65,和resnetv1_50差距好大啊

yangxue0827 commented 5 years ago

我用Faster+Mobilenetv2时效果也不理想,你方便把mobilenet+FPN的脚本以txt的形式贴在这里吗? @rw1995

rw1995 commented 5 years ago

我用Faster + Mobilenetv2时效果也不理想,你方便把mobilenet + FPN的脚本以txt的形式贴在这里吗?@ rw1995

当然可以,假如这对大佬你有帮助的话,请稍等我一点时间,我写个详细说明。。。

yangxue0827 commented 5 years ago

或者你重新clone一下和这个repo,加上mobilenet的代码和必要的英文注释,然后pull requests,我审核通过后会接收 @rw1995