CharlesShang / TFFRCNN

FastER RCNN built on tensorflow
MIT License
874 stars 418 forks source link

Using Resnet50 for FRCN #7

Closed miraclebiu closed 7 years ago

miraclebiu commented 7 years ago

Sorry to bother you again, I have tried to repalce the VGG to Resnet50 ,so I add two layers: add_layer and batch_normalization layer:

@layer 
def add(self,input,name):
    return tf.add(input[0],input[1])

@layer
def batch_normalization(self,input,name,relu=True,is_training=False):
    if relu:
        temp_layer=tf.contrib.layers.batch_norm(input,scale=True,center=True,is_training=is_training,scope=name)
        return tf.nn.relu(temp_layer)
    else:
        return tf.contrib.layers.batch_norm(input,scale=True,center=True,is_training=is_training,scope=name)

the model file is convert from the caffe-tensorflow project on the github(you can search this on the github), and I convert the bn_layer's data in the caffe to the moving_average and so on , but the result is really really bad ,map is about 55% , and I don't the reason ,maybe the model is not convert right?

Have you tried to replace VGG to resnet50

below is the resnet50_train.py:

class Resnet50_train(Network): def init(self, trainable=True): self.inputs = [] self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data') self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info') self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes') self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard') self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas') self.keep_prob = tf.placeholder(tf.float32) self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\ 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) self.trainable = trainable self.setup()

def setup(self):

    n_classes = cfg.NCLASSES
    # anchor_scales = [8, 16, 32]
    anchor_scales = cfg.ANCHOR_SCALES
    _feat_stride = [16, ]
    (self.feed('data')
         .conv(7, 7, 64, 2, 2, relu=False, name='conv1')
         .batch_normalization(relu=True, name='bn_conv1', is_training=False)
         .max_pool(3, 3, 2, 2, padding='VALID',name='pool1')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')
         .batch_normalization(name='bn2a_branch1',is_training=False,relu=False))

    (self.feed('pool1')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
         .batch_normalization(relu=True, name='bn2a_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
         .batch_normalization(relu=True, name='bn2a_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')
         .batch_normalization(name='bn2a_branch2c',is_training=False,relu=False))

    (self.feed('bn2a_branch1', 
               'bn2a_branch2c')
         .add(name='res2a')
         .relu(name='res2a_relu')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
         .batch_normalization(relu=True, name='bn2b_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
         .batch_normalization(relu=True, name='bn2b_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')
         .batch_normalization(name='bn2b_branch2c',is_training=False,relu=False))

    (self.feed('res2a_relu', 
               'bn2b_branch2c')
         .add(name='res2b')
         .relu(name='res2b_relu')
         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
         .batch_normalization(relu=True, name='bn2c_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
         .batch_normalization(relu=True, name='bn2c_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')
         .batch_normalization(name='bn2c_branch2c',is_training=False,relu=False))

    (self.feed('res2b_relu', 
               'bn2c_branch2c')
         .add(name='res2c')
         .relu(name='res2c_relu')
         .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1', padding='VALID')
         .batch_normalization(name='bn3a_branch1',is_training=False,relu=False))

    (self.feed('res2c_relu')
         .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn3a_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
         .batch_normalization(relu=True, name='bn3a_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')
         .batch_normalization(name='bn3a_branch2c',is_training=False,relu=False))

    (self.feed('bn3a_branch1', 
               'bn3a_branch2c')
         .add(name='res3a')
         .relu(name='res3a_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a')
         .batch_normalization(relu=True, name='bn3b_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b')
         .batch_normalization(relu=True, name='bn3b_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c')
         .batch_normalization(name='bn3b_branch2c',is_training=False,relu=False))

    (self.feed('res3a_relu', 
               'bn3b_branch2c')
         .add(name='res3b')
         .relu(name='res3b_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a')
         .batch_normalization(relu=True, name='bn3c_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b')
         .batch_normalization(relu=True, name='bn3c_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c')
         .batch_normalization(name='bn3c_branch2c',is_training=False,relu=False))

    (self.feed('res3b_relu', 
               'bn3c_branch2c')
         .add(name='res3c')
         .relu(name='res3c_relu')
         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a')
         .batch_normalization(relu=True, name='bn3d_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b')
         .batch_normalization(relu=True, name='bn3d_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c')
         .batch_normalization(name='bn3d_branch2c',is_training=False,relu=False))

    (self.feed('res3c_relu', 
               'bn3d_branch2c')
         .add(name='res3d')
         .relu(name='res3d_relu')
         .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1', padding='VALID')
         .batch_normalization(name='bn4a_branch1',is_training=False,relu=False))

    (self.feed('res3d_relu')
         .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn4a_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4a_branch2b')
         .batch_normalization(relu=True, name='bn4a_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')
         .batch_normalization(name='bn4a_branch2c',is_training=False,relu=False))

    (self.feed('bn4a_branch1', 
               'bn4a_branch2c')
         .add(name='res4a')
         .relu(name='res4a_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a')
         .batch_normalization(relu=True, name='bn4b_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4b_branch2b')
         .batch_normalization(relu=True, name='bn4b_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c')
         .batch_normalization(name='bn4b_branch2c',is_training=False,relu=False))

    (self.feed('res4a_relu', 
               'bn4b_branch2c')
         .add(name='res4b')
         .relu(name='res4b_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a')
         .batch_normalization(relu=True, name='bn4c_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4c_branch2b')
         .batch_normalization(relu=True, name='bn4c_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c')
         .batch_normalization(name='bn4c_branch2c',is_training=False,relu=False))

    (self.feed('res4b_relu', 
               'bn4c_branch2c')
         .add(name='res4c')
         .relu(name='res4c_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a')
         .batch_normalization(relu=True, name='bn4d_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4d_branch2b')
         .batch_normalization(relu=True, name='bn4d_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c')
         .batch_normalization(name='bn4d_branch2c',is_training=False,relu=False))

    (self.feed('res4c_relu', 
               'bn4d_branch2c')
         .add(name='res4d')
         .relu(name='res4d_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a')
         .batch_normalization(relu=True, name='bn4e_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4e_branch2b')
         .batch_normalization(relu=True, name='bn4e_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c')
         .batch_normalization(name='bn4e_branch2c',is_training=False,relu=False))

    (self.feed('res4d_relu', 
               'bn4e_branch2c')
         .add(name='res4e')
         .relu(name='res4e_relu')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a')
         .batch_normalization(relu=True, name='bn4f_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4f_branch2b')
         .batch_normalization(relu=True, name='bn4f_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c')
         .batch_normalization(name='bn4f_branch2c',is_training=False,relu=False))

    (self.feed('res4e_relu', 
               'bn4f_branch2c')
         .add(name='res4f')
         .relu(name='res4f_relu'))

    #========= RPN ============
    (self.feed('res4f_relu')
         .conv(3,3,512,1,1,name='rpn_conv/3x3')
         .conv(1,1,len(anchor_scales)*3*2 ,1 , 1, padding='VALID', relu = False, name='rpn_cls_score'))

    (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info')
         .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' ))
    # Loss of rpn_cls & rpn_boxes

    (self.feed('rpn_conv/3x3')
         .conv(1,1,len(anchor_scales)*3*4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred'))

    #========= RoI Proposal ============
    (self.feed('rpn_cls_score')
         .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape')
         .spatial_softmax(name='rpn_cls_prob'))

    (self.feed('rpn_cls_prob')
         .spatial_reshape_layer(len(anchor_scales)*3*2, name = 'rpn_cls_prob_reshape'))

    (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info')
         .proposal_layer(_feat_stride, anchor_scales, 'TRAIN',name = 'rpn_rois'))

    (self.feed('rpn_rois','gt_boxes', 'gt_ishard', 'dontcare_areas')
         .proposal_target_layer(n_classes,name = 'roi-data'))

    #========= RCNN ============        
    (self.feed('res4f_relu','roi-data')
         .roi_pool(7,7,1.0/16,name='res5a_branch2a_roipooling')
         .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res5a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn5a_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5a_branch2b')
         .batch_normalization(relu=True, name='bn5a_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')
         .batch_normalization(name='bn5a_branch2c',is_training=False,relu=False))

    (self.feed('res5a_branch2a_roipooling')
         .conv(1,1,2048,2,2,biased=False, relu=False, name='res5a_branch1', padding='VALID')
         .batch_normalization(name='bn5a_branch1',is_training=False,relu=False))

    (self.feed('bn5a_branch2c','bn5a_branch1')
         .add(name='res5a')
         .relu(name='res5a_relu')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
         .batch_normalization(relu=True, name='bn5b_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5b_branch2b')
         .batch_normalization(relu=True, name='bn5b_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
         .batch_normalization(name='bn5b_branch2c',is_training=False,relu=False))
    #pdb.set_trace()
    (self.feed('res5a_relu', 
               'bn5b_branch2c')
         .add(name='res5b')
         .relu(name='res5b_relu')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
         .batch_normalization(relu=True, name='bn5c_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5c_branch2b')
         .batch_normalization(relu=True, name='bn5c_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
         .batch_normalization(name='bn5c_branch2c',is_training=False,relu=False))
    #pdb.set_trace()
    (self.feed('res5b_relu',
               'bn5c_branch2c')
         .add(name='res5c')
         .relu(name='res5c_relu')
         .fc(n_classes, relu=False, name='cls_score')
         .softmax(name='cls_prob'))

    (self.feed('res5c_relu')
         .fc(n_classes*4, relu=False, name='bbox_pred'))
CharlesShang commented 7 years ago

@miraclebiu Thank you for this work. ResNet has batch_norm layers that keeps a moving_avg and moving_var during each iter. And it works only for mini_batch > 1 (see the orginal paper). In our case, the batchsize is 1. so the moving-avg and moving_var is meanlingless.

To better use ResNet, I think you should keep the bn layer fixed, skipping re-computing mean and var during training.

CharlesShang commented 7 years ago

looking forward for your latest result.

CharlesShang commented 7 years ago

I read your code agian and find you've disabled the updating of bn, by passing is_training=False. Then I'm not sure about the causes. Some suggestions:

  1. Try a differen learning rate.
  2. Disable updating of resnet layers, only fine-tuning new layers, like rpn_xx, fc layers.
  3. Add a histogram summary of each layer to make sure there is no large difference in magnitude between the resnet and new layers

There is a resnet implement, https://github.com/ry/tensorflow-resnet, and also the pretrained models.

miraclebiu commented 7 years ago

I will try and if I can find the error, I will tell you

miraclebiu commented 7 years ago

This evening, I turn my code from the Faster-rcnn_TF to your project ,and I train the resnet50 for about 130k iterations ,the test performance map is about 70%,maybe is the Faster-rcnn_TF errors that I didn't find .In my previous opinion, since this project can train the VGG well ,so it can train resnet50 well too But I think I'm wrong. Besides , I also get the code about the psroipooling in R-FCN and implement in the tensorflow ,maybe I can send to you .

miraclebiu commented 7 years ago

I'm new about tensorflow and the FRCN, so I think I should read your code carefully and learn how to use it .

CharlesShang commented 7 years ago

@miraclebiu Congratulations! Very glad you made it. You can try pull request to send you awesome code to this repo. check this link https://help.github.com/articles/about-pull-requests/

And there are something very confusing in the original code, like the inconsistency of axis. some where the rpn_score is ordered by (1, H, W, A, 2) and some where (1, H, W, 2, A).

miraclebiu commented 7 years ago

@CharlesShang I write an email to your gmail.
I don't know how to pull request the file since I'm a newcomer about github, all I know is how to download the code.Besides, the tensorflow machine and my computer is not the same machine , I need to copy the file to this computer, so I create the reposity about TFFRCN_Resnet50,you can see the readme.md, I only change the factory.py, init.py, networks.py in ./lib/networks and add resnet50_train and resnet50_test.py in the same folder. Thank you for your work on this project! the website is https://github.com/miraclebiu/TFFRCN_resnet50

miraclebiu commented 7 years ago

I run this on 0.11 ,you can check what's changed on 0.12 https://github.com/tensorflow/tensorflow/releases

CharlesShang commented 7 years ago

@miraclebiu I merge your contributions to resnet branch. After a simple test, I'll finally merge it to the master.
Looking forward to your resnet101 models. Thanks again.

CharlesShang commented 7 years ago

@miraclebiu Added your code. Notice there is psroi_pooling, have you tested this module?
I think we can build R-FCN together.

miraclebiu commented 7 years ago

@CharlesShang I have tested the forward, I haven't test the backward, I find it on the github and have contrasted it with the caffe code and made some modifications,I think it is right. Besides, I think if someone want to build R-FCN, he should write some code about OHEM which I think is very important for R-FCN. It is just my opinion. Google have submitted a paper on arXiv about many detectors ,"Speed/accuracy trade-offs for modern convolutional object detectors", I don't know if they use the OHEM for R-FCN. I wish they can release their codes.

miraclebiu commented 7 years ago

@CharlesShang ,maybe you can use tf.nn.top_k or tf.nn.in_top_k to sort the loss and use the tf.gather . Since the network has four loss and sum them , I don't know which should do the OHEM .

CharlesShang commented 7 years ago

@miraclebiu, online hard example mining (OHEM), data augment, etc., are very important for detection. I'll add these functions in a few days. A sinple way is to check out the R-FCN code and find out how they use OHEM:-). Thank you for your information about that google paper.

miraclebiu commented 7 years ago

@CharlesShang the current map of Resnet101 model is 70.4% , it has iterated 150k , maybe it need more iterations . The converted Resnet50 and Resnet 101 model has a small gap about 0.6% top5 error, so there is no big difference between using Resnet50 and Resnet101 ,but the Resnet101 model is about twice bigger than the Resnet50.

CharlesShang commented 7 years ago

I think it's the best mAP we can get. Maybe the bottleneck is training data, the origianl paper (FasterRCNN + ResNet101) reported the mAP on 07 was 0.738 using 07+12 for training.

jwnsu commented 7 years ago

Today I tried Resnet101 in CharlesShang/TFFRCNN (pulled latest codes as of today from master branch), saw following error in training:

Traceback (most recent call last):

File "./faster_rcnn/train_net.py", line 109, in

restore=bool(int(args.restore)))

File "./faster_rcnn/../lib/fast_rcnn/train.py", line 396, in train_net

sw.train_model(sess, max_iters, restore=restore)

File "./faster_rcnn/../lib/fast_rcnn/train.py", line 108, in train_model

self.net.build_loss(ohem=cfg.TRAIN.OHEM)

File "./faster_rcnn/../lib/networks/network.py", line 446, in build_loss

cls_score = self.get_output('cls_score') # (R, C+1)

File "./faster_rcnn/../lib/networks/network.py", line 80, in get_output

raise KeyError('Unknown layer name fed: %s'%layer)

KeyError: 'Unknown layer name fed: cls_score'


This happens right after "Solving ...." and print of network layer names.

Went back to Minyue's fork, it works fine there.

Another question, I saw you are working on OHEM. Is it working in master branch? Debug branch has more changes of OHEM.

Thanks

Dennis

On Mon, Dec 5, 2016 at 11:41 PM, Charles Shang notifications@github.com wrote:

I think it's the best mAP we can get. Maybe the bottleneck is training data, the origianl paper (FasterRCNN + ResNet101) reported the mAP on 07 was 0.738 using 07+12 for training.

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/CharlesShang/TFFRCNN/issues/7#issuecomment-265082208, or mute the thread https://github.com/notifications/unsubscribe-auth/AAM4K9spoXwBjlan54alpQbHq_NW3tbIks5rFRG2gaJpZM4LBAdL .