CharlesShang / TFFRCNN

FastER RCNN built on tensorflow
MIT License
874 stars 418 forks source link

Using Resnet50 for FRCN #7

Closed miraclebiu closed 7 years ago

miraclebiu commented 7 years ago

Sorry to bother you again, I have tried to repalce the VGG to Resnet50 ,so I add two layers: add_layer and batch_normalization layer:

def add(self,input,name):
    return tf.add(input[0],input[1])

def batch_normalization(self,input,name,relu=True,is_training=False):
    if relu:
        return tf.nn.relu(temp_layer)
        return tf.contrib.layers.batch_norm(input,scale=True,center=True,is_training=is_training,scope=name)

the model file is convert from the caffe-tensorflow project on the github(you can search this on the github), and I convert the bn_layer's data in the caffe to the moving_average and so on , but the result is really really bad ,map is about 55% , and I don't the reason ,maybe the model is not convert right?

Have you tried to replace VGG to resnet50

below is the

class Resnet50_train(Network): def init(self, trainable=True): self.inputs = [] = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data') self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info') self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes') self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard') self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas') self.keep_prob = tf.placeholder(tf.float32) self.layers = dict({'data', 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\ 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) self.trainable = trainable self.setup()

def setup(self):

    n_classes = cfg.NCLASSES
    # anchor_scales = [8, 16, 32]
    anchor_scales = cfg.ANCHOR_SCALES
    _feat_stride = [16, ]
         .conv(7, 7, 64, 2, 2, relu=False, name='conv1')
         .batch_normalization(relu=True, name='bn_conv1', is_training=False)
         .max_pool(3, 3, 2, 2, padding='VALID',name='pool1')
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')

         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
         .batch_normalization(relu=True, name='bn2a_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
         .batch_normalization(relu=True, name='bn2a_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')

         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
         .batch_normalization(relu=True, name='bn2b_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
         .batch_normalization(relu=True, name='bn2b_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')

         .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
         .batch_normalization(relu=True, name='bn2c_branch2a',is_training=False)
         .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
         .batch_normalization(relu=True, name='bn2c_branch2b',is_training=False)
         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')

         .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1', padding='VALID')

         .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn3a_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
         .batch_normalization(relu=True, name='bn3a_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')

         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a')
         .batch_normalization(relu=True, name='bn3b_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b')
         .batch_normalization(relu=True, name='bn3b_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c')

         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a')
         .batch_normalization(relu=True, name='bn3c_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b')
         .batch_normalization(relu=True, name='bn3c_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c')

         .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a')
         .batch_normalization(relu=True, name='bn3d_branch2a',is_training=False)
         .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b')
         .batch_normalization(relu=True, name='bn3d_branch2b',is_training=False)
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c')

         .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1', padding='VALID')

         .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn4a_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4a_branch2b')
         .batch_normalization(relu=True, name='bn4a_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')

         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a')
         .batch_normalization(relu=True, name='bn4b_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4b_branch2b')
         .batch_normalization(relu=True, name='bn4b_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c')

         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a')
         .batch_normalization(relu=True, name='bn4c_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4c_branch2b')
         .batch_normalization(relu=True, name='bn4c_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c')

         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a')
         .batch_normalization(relu=True, name='bn4d_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4d_branch2b')
         .batch_normalization(relu=True, name='bn4d_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c')

         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a')
         .batch_normalization(relu=True, name='bn4e_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4e_branch2b')
         .batch_normalization(relu=True, name='bn4e_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c')

         .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a')
         .batch_normalization(relu=True, name='bn4f_branch2a',is_training=False)
         .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4f_branch2b')
         .batch_normalization(relu=True, name='bn4f_branch2b',is_training=False)
         .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c')


    #========= RPN ============
         .conv(1,1,len(anchor_scales)*3*2 ,1 , 1, padding='VALID', relu = False, name='rpn_cls_score'))

    (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info')
         .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' ))
    # Loss of rpn_cls & rpn_boxes

         .conv(1,1,len(anchor_scales)*3*4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred'))

    #========= RoI Proposal ============
         .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape')

         .spatial_reshape_layer(len(anchor_scales)*3*2, name = 'rpn_cls_prob_reshape'))

         .proposal_layer(_feat_stride, anchor_scales, 'TRAIN',name = 'rpn_rois'))

    (self.feed('rpn_rois','gt_boxes', 'gt_ishard', 'dontcare_areas')
         .proposal_target_layer(n_classes,name = 'roi-data'))

    #========= RCNN ============        
         .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res5a_branch2a', padding='VALID')
         .batch_normalization(relu=True, name='bn5a_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5a_branch2b')
         .batch_normalization(relu=True, name='bn5a_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')

         .conv(1,1,2048,2,2,biased=False, relu=False, name='res5a_branch1', padding='VALID')

         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
         .batch_normalization(relu=True, name='bn5b_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5b_branch2b')
         .batch_normalization(relu=True, name='bn5b_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
         .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
         .batch_normalization(relu=True, name='bn5c_branch2a',is_training=False)
         .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5c_branch2b')
         .batch_normalization(relu=True, name='bn5c_branch2b',is_training=False)
         .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
         .fc(n_classes, relu=False, name='cls_score')

         .fc(n_classes*4, relu=False, name='bbox_pred'))
CharlesShang commented 7 years ago

@miraclebiu Thank you for this work. ResNet has batch_norm layers that keeps a moving_avg and moving_var during each iter. And it works only for mini_batch > 1 (see the orginal paper). In our case, the batchsize is 1. so the moving-avg and moving_var is meanlingless.

To better use ResNet, I think you should keep the bn layer fixed, skipping re-computing mean and var during training.

CharlesShang commented 7 years ago

looking forward for your latest result.

CharlesShang commented 7 years ago

I read your code agian and find you've disabled the updating of bn, by passing is_training=False. Then I'm not sure about the causes. Some suggestions:

  1. Try a differen learning rate.
  2. Disable updating of resnet layers, only fine-tuning new layers, like rpn_xx, fc layers.
  3. Add a histogram summary of each layer to make sure there is no large difference in magnitude between the resnet and new layers

There is a resnet implement,, and also the pretrained models.

miraclebiu commented 7 years ago

I will try and if I can find the error, I will tell you

miraclebiu commented 7 years ago

This evening, I turn my code from the Faster-rcnn_TF to your project ,and I train the resnet50 for about 130k iterations ,the test performance map is about 70%,maybe is the Faster-rcnn_TF errors that I didn't find .In my previous opinion, since this project can train the VGG well ,so it can train resnet50 well too But I think I'm wrong. Besides , I also get the code about the psroipooling in R-FCN and implement in the tensorflow ,maybe I can send to you .

miraclebiu commented 7 years ago

I'm new about tensorflow and the FRCN, so I think I should read your code carefully and learn how to use it .

CharlesShang commented 7 years ago

@miraclebiu Congratulations! Very glad you made it. You can try pull request to send you awesome code to this repo. check this link

And there are something very confusing in the original code, like the inconsistency of axis. some where the rpn_score is ordered by (1, H, W, A, 2) and some where (1, H, W, 2, A).

miraclebiu commented 7 years ago

@CharlesShang I write an email to your gmail.
I don't know how to pull request the file since I'm a newcomer about github, all I know is how to download the code.Besides, the tensorflow machine and my computer is not the same machine , I need to copy the file to this computer, so I create the reposity about TFFRCN_Resnet50,you can see the, I only change the,, in ./lib/networks and add resnet50_train and in the same folder. Thank you for your work on this project! the website is

miraclebiu commented 7 years ago

I run this on 0.11 ,you can check what's changed on 0.12

CharlesShang commented 7 years ago

@miraclebiu I merge your contributions to resnet branch. After a simple test, I'll finally merge it to the master.
Looking forward to your resnet101 models. Thanks again.

CharlesShang commented 7 years ago

@miraclebiu Added your code. Notice there is psroi_pooling, have you tested this module?
I think we can build R-FCN together.

miraclebiu commented 7 years ago

@CharlesShang I have tested the forward, I haven't test the backward, I find it on the github and have contrasted it with the caffe code and made some modifications,I think it is right. Besides, I think if someone want to build R-FCN, he should write some code about OHEM which I think is very important for R-FCN. It is just my opinion. Google have submitted a paper on arXiv about many detectors ,"Speed/accuracy trade-offs for modern convolutional object detectors", I don't know if they use the OHEM for R-FCN. I wish they can release their codes.

miraclebiu commented 7 years ago

@CharlesShang ,maybe you can use tf.nn.top_k or tf.nn.in_top_k to sort the loss and use the tf.gather . Since the network has four loss and sum them , I don't know which should do the OHEM .

CharlesShang commented 7 years ago

@miraclebiu, online hard example mining (OHEM), data augment, etc., are very important for detection. I'll add these functions in a few days. A sinple way is to check out the R-FCN code and find out how they use OHEM:-). Thank you for your information about that google paper.

miraclebiu commented 7 years ago

@CharlesShang the current map of Resnet101 model is 70.4% , it has iterated 150k , maybe it need more iterations . The converted Resnet50 and Resnet 101 model has a small gap about 0.6% top5 error, so there is no big difference between using Resnet50 and Resnet101 ,but the Resnet101 model is about twice bigger than the Resnet50.

CharlesShang commented 7 years ago

I think it's the best mAP we can get. Maybe the bottleneck is training data, the origianl paper (FasterRCNN + ResNet101) reported the mAP on 07 was 0.738 using 07+12 for training.

jwnsu commented 7 years ago

Today I tried Resnet101 in CharlesShang/TFFRCNN (pulled latest codes as of today from master branch), saw following error in training:

Traceback (most recent call last):

File "./faster_rcnn/", line 109, in


File "./faster_rcnn/../lib/fast_rcnn/", line 396, in train_net

sw.train_model(sess, max_iters, restore=restore)

File "./faster_rcnn/../lib/fast_rcnn/", line 108, in train_model

File "./faster_rcnn/../lib/networks/", line 446, in build_loss

cls_score = self.get_output('cls_score') # (R, C+1)

File "./faster_rcnn/../lib/networks/", line 80, in get_output

raise KeyError('Unknown layer name fed: %s'%layer)

KeyError: 'Unknown layer name fed: cls_score'

This happens right after "Solving ...." and print of network layer names.

Went back to Minyue's fork, it works fine there.

Another question, I saw you are working on OHEM. Is it working in master branch? Debug branch has more changes of OHEM.



On Mon, Dec 5, 2016 at 11:41 PM, Charles Shang wrote:

I think it's the best mAP we can get. Maybe the bottleneck is training data, the origianl paper (FasterRCNN + ResNet101) reported the mAP on 07 was 0.738 using 07+12 for training.

— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or mute the thread .