afantideng / R-FCN-PSROIAlign

A Caffe implementation of PSROI-Align
http://blog.leanote.com/post/afanti.deng@gmail.com/Position-Sensitive
55 stars 20 forks source link

loss不断增加 #3

Open DoubleWJX opened 6 years ago

DoubleWJX commented 6 years ago

我把ps_roi_pooling换成ps_roi_align后,几个batch之后,loss开始疯长。除了换成ps_roi_align,还需要做什么么?谢谢 [

loss

](url)

HXY-95 commented 6 years ago

您好,我也遇到了这个问题,请问您是怎么解决的

DoubleWJX commented 6 years ago

@YanHengxu 我重点把反向传播的代码改了,其他代码可能也改过,参考了caffe2的roi_align,不过我目前实验的效果是ps_roi_align相比ps_roi_pooling没有提升,甚至下降了一点,不过训练过程确实收敛了。你帮我看一下吧,可能有些问题: // -------------------------------------------------------- // R-FCN // Written by Afantiafanti.deng@gmail.com // --------------------------------------------------------

include

include

include

include

include "caffe/layers/psroi_align_layer.hpp"

include "caffe/util/gpu_util.cuh"

using std::max; using std::min;

namespace caffe {

template device void bilinear_interpolate( const Dtype* bottom_data, const int height, const int width, Dtype h, Dtype w, Dtype & val) {

// deal with cases that inverse elements are out of feature map boundary
if (h < -0.5 || h > height - 0.5 || w < -0.5 || w > width - 0.5){
  val = Dtype(0);
  return;
}

if (h <= 0) h = 0;
if (w <= 0) w = 0;        

int h_high;             // h_high 是比 h 大的最小整数
int w_high;             // w_high 是比 w 大的最小整数
int h_low = (int) h;    // h_low  是比 h 小的最大整数
int w_low = (int) w;    // w_low  是比 w 小的最大整数

if (w_low >= width - 1) {
  w_high = w_low = width - 1;
  w = (Dtype) w_low;
} else 
  w_high = w_low + 1;

if (h_low >= height - 1) {
  h_high = h_low = height - 1;
  h = (Dtype) h_low;
} else 
  h_high = h_low + 1;

Dtype l_dh = h - h_low, l_dw = w - w_low;
Dtype h_dh = 1. - l_dh, h_dw = 1. - l_dw;

// 进行双线性内插
Dtype u1 = bottom_data[h_low * width + w_low];
Dtype u2 = bottom_data[h_low * width + w_high];
Dtype u3 = bottom_data[h_high * width + w_low];
Dtype u4 = bottom_data[h_high * width + w_high];
Dtype w1 = h_dh * h_dw, w2 = h_dh * l_dw, w3 = l_dh * h_dw, w4 = l_dh * l_dw;

val = (w1 * u1 + w2 * u2 + w3 * u3 + w4 * u4);

}

template global void PSROIAlignForward( const int nthreads, const Dtype bottom_data, const Dtype spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const Dtype bottom_rois, const int output_dim, // 输出通道数 const int group_size, // kk(c+1) 中的 k Dtype top_data, int mapping_channel, const int sample_num) { CUDA_KERNEL_LOOP(index, nthreads) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int ctop = (index / pooled_width / pooled_height) % output_dim; int n = index / pooled_width / pooled_height / output_dim;

  // [start, end) interval for spatial sampling
  bottom_rois += n * 5;
  int roi_batch_ind = bottom_rois[0];
  Dtype roi_start_w = bottom_rois[1] * spatial_scale;
  Dtype roi_start_h = bottom_rois[2] * spatial_scale;
  Dtype roi_end_w  = (bottom_rois[3] + 1.) * spatial_scale;
  Dtype roi_end_h  = (bottom_rois[4] + 1.) * spatial_scale;

  // Force too small ROIs to be 1x1
  Dtype roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
  Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

  // Compute w and h at bottom
  Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
  Dtype bin_size_w = roi_width  / static_cast<Dtype>(pooled_width);

  // 获得当前RoI的宽和高在池化前特征图上的起始和结束索引值, 浮点数
  Dtype hstart = static_cast<Dtype>(ph) * bin_size_h;
  Dtype wstart = static_cast<Dtype>(pw) * bin_size_w;
  Dtype hend   = static_cast<Dtype>(ph + 1.) * bin_size_h;
  Dtype wend   = static_cast<Dtype>(pw + 1.) * bin_size_w;

  // Add roi offsets and clip to input boundaries
  hstart = min(max(hstart + roi_start_h, Dtype(0)), Dtype(height));
  hend = min(max(hend + roi_start_h, Dtype(0)), Dtype(height));
  wstart = min(max(wstart + roi_start_w, Dtype(0)), Dtype(width));
  wend = min(max(wend + roi_start_w, Dtype(0)), Dtype(width));
  bool is_empty = (hend <= hstart) || (wend <= wstart);

  int gw = pw;
  int gh = ph;
  int c = (ctop*group_size + gh)*group_size + gw; // 

  // 在池化前特征图上采样点之间的距离,浮点数 (在 h 和 w 两个方向上)
  Dtype sample_h = bin_size_h / (sample_num + 1.);
  Dtype sample_w = bin_size_w / (sample_num + 1.);
  Dtype val = 0;
  bottom_data += (roi_batch_ind * channels + c) * height * width;
  Dtype out_sum = 0.0;
  for (int i = 1; i <= sample_num; ++i) {
      for (int j = 1; j <= sample_num; ++j) {
          Dtype cur_h = hstart + i * sample_h;
          Dtype cur_w = wstart + j * sample_w;
          if (cur_h >= hend || cur_w >= wend) continue;
          bilinear_interpolate(bottom_data, height, width, cur_h, cur_w, val);
          out_sum += val; 
      }
  }
  // Dtype bin_area = (hend - hstart) * (wend - wstart);
  top_data[index] = is_empty ? 0. : out_sum / static_cast<Dtype>(sample_num * sample_num);
  mapping_channel[index] = c;
}

}

template void PSROIAlignLayer::Forward_gpu(const vector<Blob>& bottom, const vector<Blob>& top) { const Dtype bottom_data = bottom[0]->gpu_data(); const Dtype bottom_rois = bottom[1]->gpu_data(); Dtype top_data = top[0]->mutable_gpu_data(); int mapping_channel_ptr = mappingchannel.mutable_gpu_data(); int count = top[0]->count(); caffe_gpu_set(count, Dtype(0), top_data); caffe_gpu_set(count, -1, mapping_channel_ptr); // NOLINT_NEXT_LINE(whitespace/operators) PSROIAlignForward << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS >> >(count, bottom_data, spatialscale, channels, height, width_, pooledheight, pooledwidth, bottom_rois, outputdim, groupsize, top_data, mapping_channel_ptr, samplenum); CUDA_POST_KERNEL_CHECK; }

template device void bilinear_interpolate_gradient( const int height, const int width, Dtype h, Dtype w, Dtype& w1, Dtype& w2, Dtype& w3, Dtype& w4, int& w_low, int& w_high, int& h_low, int& h_high) {

// deal with cases that inverse elements are out of feature map boundary
if (h < -0.5 || h > height - 0.5 || w < -0.5 || w > width - 0.5){
  w1 = w2 = w3 = w4 = Dtype(0);
  w_low = w_high = h_low = h_high = -1;
  return;
}

if (h <= 0) h = 0;
if (w <= 0) w = 0;        

h_low = (int) h;    // h_low  是比 h 小的最大整数
w_low = (int) w;    // w_low  是比 w 小的最大整数

if (w_low >= width - 1) {
  w_low = w_high = width - 1;
  w = (Dtype) w_low;
} else 
  w_high = w_low + 1;

if (h_low >= height - 1) {
  h_high = h_low = height - 1;
  h = (Dtype) h_low;
} else 
  h_high = h_low + 1;

Dtype l_dh = h - h_low;
Dtype l_dw = w - w_low;
Dtype h_dh = 1. - l_dh, h_dw = 1. - l_dw;

// 进行双线性内插

w1 = h_dh * h_dw, w2 = h_dh * l_dw, w3 = l_dh * h_dw, w4 = l_dh * l_dw;

}

template global void PSROIAlignBackwardAtomic( const int nthreads, const Dtype top_diff, const int mapping_channel, const int num_rois, const Dtype spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int output_dim, Dtype bottom_diff, const Dtype bottom_rois, const int sample_num) { // 遍历池化后特征图的每一个像素点 CUDA_KERNEL_LOOP(index, nthreads) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int n = index / pooled_width / pooled_height / output_dim;

  // ------------------------------------ 计算当前 pooled 后的点在原图中的位置范围 ------------------------------------------------
  // [start, end) interval for spatial sampling
  bottom_rois += n * 5;
  int roi_batch_ind = bottom_rois[0];
  Dtype roi_start_w = bottom_rois[1] * spatial_scale;
  Dtype roi_start_h = bottom_rois[2] * spatial_scale;
  Dtype roi_end_w = (bottom_rois[3] + 1.) * spatial_scale;
  Dtype roi_end_h = (bottom_rois[4] + 1.) * spatial_scale;

  // Force too small ROIs to be 1x1
  Dtype roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
  Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

  // Compute w and h at bottom
  Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
  Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);

  // 获得当前RoI的宽和高在池化前特征图上的起始和结束索引值, 浮点数
  Dtype hstart = static_cast<Dtype>(ph) * bin_size_h;
  Dtype wstart = static_cast<Dtype>(pw) * bin_size_w;
  Dtype hend   = static_cast<Dtype>(ph + 1.) * bin_size_h;
  Dtype wend   = static_cast<Dtype>(pw + 1.) * bin_size_w;

  // Add roi offsets and clip to input boundaries
  hstart = min(max(hstart + roi_start_h, Dtype(0)), Dtype(height));
  hend = min(max(hend + roi_start_h, Dtype(0)), Dtype(height));
  wstart = min(max(wstart + roi_start_w, Dtype(0)), Dtype(width));
  wend = min(max(wend + roi_start_w, Dtype(0)), Dtype(width));
  bool is_empty = (hend <= hstart) || (wend <= wstart);

  // -------------------------------------------------------------------------------------

  // Compute c at bottom
  int c = mapping_channel[index];
  Dtype* offset_bottom_diff = bottom_diff +
    (roi_batch_ind * channels + c) * height * width;
  Dtype diff_val = is_empty ? 0. : top_diff[index] / (sample_num * sample_num);

  Dtype sample_h = bin_size_h / (sample_num + 1.);
  Dtype sample_w = bin_size_w / (sample_num + 1.);
  //
  for (int i = 1; i <= sample_num; ++i) {
    for (int j = 1; j <= sample_num; ++j) {
      Dtype cur_h = hstart + i * sample_h;
      Dtype cur_w = wstart + j * sample_w;

      if (cur_h >= hend || cur_w >= wend) continue;

      Dtype w1, w2, w3, w4;
      int x_low, x_high, y_low, y_high;

      bilinear_interpolate_gradient(
              height, width, cur_h, cur_w,
              w1, w2, w3, w4,
              x_low, x_high, y_low, y_high    
              );

      Dtype g1 = diff_val * w1;
      Dtype g2 = diff_val * w2;
      Dtype g3 = diff_val * w3;
      Dtype g4 = diff_val * w4;

      if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
        caffe_gpu_atomic_add(g1, offset_bottom_diff + y_low * width + x_low);
        caffe_gpu_atomic_add(g2, offset_bottom_diff + y_low * width + x_high);
        caffe_gpu_atomic_add(g3, offset_bottom_diff + y_high * width + x_low);
        caffe_gpu_atomic_add(g4, offset_bottom_diff + y_high * width + x_high);
      }
    }
  }

  /*
  for (int h = hstart; h < hend; ++h) {
    for (int w = wstart; w < wend; ++w) {
      for(int i = 0; i < sample_num * sample_num; ++i){
          Dtype d_h = abs(sample_pos_diff[2*i + 0] - h);
          Dtype d_w = abs(sample_pos_diff[2*i + 1] - w);
          if(d_h < 1 && d_w < 1){
                int bottom_index = h*width + w;
                caffe_gpu_atomic_add((1 - d_h)*(1 - d_w)*diff_val, offset_bottom_diff + bottom_index);
          }
      }
    }
  }
  */
}

}

template void PSROIAlignLayer::Backward_gpu(const vector<Blob>& top, const vector& propagate_down, const vector<Blob>& bottom) { if (!propagate_down[0]) { return; }

const Dtype* bottom_rois = bottom[1]->gpu_data();
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int bottom_count = bottom[0]->count();
const int* mapping_channel_ptr = mapping_channel_.gpu_data();
caffe_gpu_set(bottom[1]->count(), Dtype(0), bottom[1]->mutable_gpu_diff());
caffe_gpu_set(bottom_count, Dtype(0), bottom_diff);
const int count = top[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
PSROIAlignBackwardAtomic<Dtype> << <CAFFE_GET_BLOCKS(count),
  CAFFE_CUDA_NUM_THREADS >> >(count, top_diff, mapping_channel_ptr,
  top[0]->num(), spatial_scale_, channels_, height_, width_,
  pooled_height_, pooled_width_, output_dim_, bottom_diff,
  bottom_rois, sample_num_);
CUDA_POST_KERNEL_CHECK;

}

INSTANTIATE_LAYER_GPU_FUNCS(PSROIAlignLayer);

} // namespace caffe

HXY-95 commented 6 years ago

谢谢您的代码,我看看反向传播这块

blueyi commented 5 years ago

@YanHengxu 我重点把反向传播的代码改了,其他代码可能也改过,参考了caffe2的roi_align,不过我目前实验的效果是ps_roi_align相比ps_roi_pooling没有提升,甚至下降了一点,不过训练过程确实收敛了。你帮我看一下吧,可能有些问题: // -------------------------------------------------------- // R-FCN // Written by Afantiafanti.deng@gmail.com // --------------------------------------------------------

include

include

include

include

include "caffe/layers/psroi_align_layer.hpp"

include "caffe/util/gpu_util.cuh"

using std::max; using std::min;

namespace caffe {

template device void bilinear_interpolate( const Dtype* bottom_data, const int height, const int width, Dtype h, Dtype w, Dtype & val) {

// deal with cases that inverse elements are out of feature map boundary
if (h < -0.5 || h > height - 0.5 || w < -0.5 || w > width - 0.5){
  val = Dtype(0);
  return;
}

if (h <= 0) h = 0;
if (w <= 0) w = 0;        

int h_high;             // h_high 是比 h 大的最小整数
int w_high;             // w_high 是比 w 大的最小整数
int h_low = (int) h;    // h_low  是比 h 小的最大整数
int w_low = (int) w;    // w_low  是比 w 小的最大整数

if (w_low >= width - 1) {
  w_high = w_low = width - 1;
  w = (Dtype) w_low;
} else 
  w_high = w_low + 1;

if (h_low >= height - 1) {
  h_high = h_low = height - 1;
  h = (Dtype) h_low;
} else 
  h_high = h_low + 1;

Dtype l_dh = h - h_low, l_dw = w - w_low;
Dtype h_dh = 1. - l_dh, h_dw = 1. - l_dw;

// 进行双线性内插
Dtype u1 = bottom_data[h_low * width + w_low];
Dtype u2 = bottom_data[h_low * width + w_high];
Dtype u3 = bottom_data[h_high * width + w_low];
Dtype u4 = bottom_data[h_high * width + w_high];
Dtype w1 = h_dh * h_dw, w2 = h_dh * l_dw, w3 = l_dh * h_dw, w4 = l_dh * l_dw;

val = (w1 * u1 + w2 * u2 + w3 * u3 + w4 * u4);

}

template global void PSROIAlignForward( const int nthreads, const Dtype bottom_data, const Dtype spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const Dtype bottom_rois, const int output_dim, // 输出通道数 const int group_size, // kk(c+1) 中的 k Dtype top_data, int mapping_channel, const int sample_num) { CUDA_KERNEL_LOOP(index, nthreads) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int ctop = (index / pooled_width / pooled_height) % output_dim; int n = index / pooled_width / pooled_height / output_dim;

  // [start, end) interval for spatial sampling
  bottom_rois += n * 5;
  int roi_batch_ind = bottom_rois[0];
  Dtype roi_start_w = bottom_rois[1] * spatial_scale;
  Dtype roi_start_h = bottom_rois[2] * spatial_scale;
  Dtype roi_end_w  = (bottom_rois[3] + 1.) * spatial_scale;
  Dtype roi_end_h  = (bottom_rois[4] + 1.) * spatial_scale;

  // Force too small ROIs to be 1x1
  Dtype roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
  Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

  // Compute w and h at bottom
  Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
  Dtype bin_size_w = roi_width  / static_cast<Dtype>(pooled_width);

  // 获得当前RoI的宽和高在池化前特征图上的起始和结束索引值, 浮点数
  Dtype hstart = static_cast<Dtype>(ph) * bin_size_h;
  Dtype wstart = static_cast<Dtype>(pw) * bin_size_w;
  Dtype hend   = static_cast<Dtype>(ph + 1.) * bin_size_h;
  Dtype wend   = static_cast<Dtype>(pw + 1.) * bin_size_w;

  // Add roi offsets and clip to input boundaries
  hstart = min(max(hstart + roi_start_h, Dtype(0)), Dtype(height));
  hend = min(max(hend + roi_start_h, Dtype(0)), Dtype(height));
  wstart = min(max(wstart + roi_start_w, Dtype(0)), Dtype(width));
  wend = min(max(wend + roi_start_w, Dtype(0)), Dtype(width));
  bool is_empty = (hend <= hstart) || (wend <= wstart);

  int gw = pw;
  int gh = ph;
  int c = (ctop*group_size + gh)*group_size + gw; // 

  // 在池化前特征图上采样点之间的距离,浮点数 (在 h 和 w 两个方向上)
  Dtype sample_h = bin_size_h / (sample_num + 1.);
  Dtype sample_w = bin_size_w / (sample_num + 1.);
  Dtype val = 0;
  bottom_data += (roi_batch_ind * channels + c) * height * width;
  Dtype out_sum = 0.0;
  for (int i = 1; i <= sample_num; ++i) {
      for (int j = 1; j <= sample_num; ++j) {
          Dtype cur_h = hstart + i * sample_h;
          Dtype cur_w = wstart + j * sample_w;
          if (cur_h >= hend || cur_w >= wend) continue;
          bilinear_interpolate(bottom_data, height, width, cur_h, cur_w, val);
          out_sum += val; 
      }
  }
  // Dtype bin_area = (hend - hstart) * (wend - wstart);
  top_data[index] = is_empty ? 0. : out_sum / static_cast<Dtype>(sample_num * sample_num);
  mapping_channel[index] = c;
}

}

template void PSROIAlignLayer::Forwardgpu(const vector<Blob>& bottom, const vector& top) { const Dtype bottom_data = bottom[0]->gpu_data(); const Dtype bottom_rois = bottom[1]->gpu_data(); Dtype top_data = top[0]->mutable_gpu_data(); int mapping_channel_ptr = mappingchannel.mutable_gpu_data(); int count = top[0]->count(); caffe_gpu_set(count, Dtype(0), top_data); caffe_gpu_set(count, -1, mapping_channel_ptr); // NOLINT_NEXT_LINE(whitespace/operators) PSROIAlignForward << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS >> >(count, bottom_data, spatialscale, channels, height, width_, pooledheight, pooledwidth, bottom_rois, outputdim, groupsize, top_data, mapping_channel_ptr, samplenum); CUDA_POST_KERNEL_CHECK; }

template device void bilinear_interpolate_gradient( const int height, const int width, Dtype h, Dtype w, Dtype& w1, Dtype& w2, Dtype& w3, Dtype& w4, int& w_low, int& w_high, int& h_low, int& h_high) {

// deal with cases that inverse elements are out of feature map boundary
if (h < -0.5 || h > height - 0.5 || w < -0.5 || w > width - 0.5){
  w1 = w2 = w3 = w4 = Dtype(0);
  w_low = w_high = h_low = h_high = -1;
  return;
}

if (h <= 0) h = 0;
if (w <= 0) w = 0;        

h_low = (int) h;    // h_low  是比 h 小的最大整数
w_low = (int) w;    // w_low  是比 w 小的最大整数

if (w_low >= width - 1) {
  w_low = w_high = width - 1;
  w = (Dtype) w_low;
} else 
  w_high = w_low + 1;

if (h_low >= height - 1) {
  h_high = h_low = height - 1;
  h = (Dtype) h_low;
} else 
  h_high = h_low + 1;

Dtype l_dh = h - h_low;
Dtype l_dw = w - w_low;
Dtype h_dh = 1. - l_dh, h_dw = 1. - l_dw;

// 进行双线性内插

w1 = h_dh * h_dw, w2 = h_dh * l_dw, w3 = l_dh * h_dw, w4 = l_dh * l_dw;

}

template global void PSROIAlignBackwardAtomic( const int nthreads, const Dtype top_diff, const int mapping_channel, const int num_rois, const Dtype spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int output_dim, Dtype bottom_diff, const Dtype bottom_rois, const int sample_num) { // 遍历池化后特征图的每一个像素点 CUDA_KERNEL_LOOP(index, nthreads) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int n = index / pooled_width / pooled_height / output_dim;

  // ------------------------------------ 计算当前 pooled 后的点在原图中的位置范围 ------------------------------------------------
  // [start, end) interval for spatial sampling
  bottom_rois += n * 5;
  int roi_batch_ind = bottom_rois[0];
  Dtype roi_start_w = bottom_rois[1] * spatial_scale;
  Dtype roi_start_h = bottom_rois[2] * spatial_scale;
  Dtype roi_end_w = (bottom_rois[3] + 1.) * spatial_scale;
  Dtype roi_end_h = (bottom_rois[4] + 1.) * spatial_scale;

  // Force too small ROIs to be 1x1
  Dtype roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
  Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

  // Compute w and h at bottom
  Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
  Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);

  // 获得当前RoI的宽和高在池化前特征图上的起始和结束索引值, 浮点数
  Dtype hstart = static_cast<Dtype>(ph) * bin_size_h;
  Dtype wstart = static_cast<Dtype>(pw) * bin_size_w;
  Dtype hend   = static_cast<Dtype>(ph + 1.) * bin_size_h;
  Dtype wend   = static_cast<Dtype>(pw + 1.) * bin_size_w;

  // Add roi offsets and clip to input boundaries
  hstart = min(max(hstart + roi_start_h, Dtype(0)), Dtype(height));
  hend = min(max(hend + roi_start_h, Dtype(0)), Dtype(height));
  wstart = min(max(wstart + roi_start_w, Dtype(0)), Dtype(width));
  wend = min(max(wend + roi_start_w, Dtype(0)), Dtype(width));
  bool is_empty = (hend <= hstart) || (wend <= wstart);

  // -------------------------------------------------------------------------------------

  // Compute c at bottom
  int c = mapping_channel[index];
  Dtype* offset_bottom_diff = bottom_diff +
    (roi_batch_ind * channels + c) * height * width;
  Dtype diff_val = is_empty ? 0. : top_diff[index] / (sample_num * sample_num);

  Dtype sample_h = bin_size_h / (sample_num + 1.);
  Dtype sample_w = bin_size_w / (sample_num + 1.);
  //
  for (int i = 1; i <= sample_num; ++i) {
    for (int j = 1; j <= sample_num; ++j) {
      Dtype cur_h = hstart + i * sample_h;
      Dtype cur_w = wstart + j * sample_w;

      if (cur_h >= hend || cur_w >= wend) continue;

      Dtype w1, w2, w3, w4;
      int x_low, x_high, y_low, y_high;

      bilinear_interpolate_gradient(
              height, width, cur_h, cur_w,
              w1, w2, w3, w4,
              x_low, x_high, y_low, y_high    
              );

      Dtype g1 = diff_val * w1;
      Dtype g2 = diff_val * w2;
      Dtype g3 = diff_val * w3;
      Dtype g4 = diff_val * w4;

      if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
        caffe_gpu_atomic_add(g1, offset_bottom_diff + y_low * width + x_low);
        caffe_gpu_atomic_add(g2, offset_bottom_diff + y_low * width + x_high);
        caffe_gpu_atomic_add(g3, offset_bottom_diff + y_high * width + x_low);
        caffe_gpu_atomic_add(g4, offset_bottom_diff + y_high * width + x_high);
      }
    }
  }

  /*
  for (int h = hstart; h < hend; ++h) {
    for (int w = wstart; w < wend; ++w) {
      for(int i = 0; i < sample_num * sample_num; ++i){
          Dtype d_h = abs(sample_pos_diff[2*i + 0] - h);
          Dtype d_w = abs(sample_pos_diff[2*i + 1] - w);
          if(d_h < 1 && d_w < 1){
                int bottom_index = h*width + w;
                caffe_gpu_atomic_add((1 - d_h)*(1 - d_w)*diff_val, offset_bottom_diff + bottom_index);
          }
      }
    }
  }
  */
}

}

template void PSROIAlignLayer::Backwardgpu(const vector<Blob>& top, const vector& propagatedown, const vector<Blob>& bottom) { if (!propagate_down[0]) { return; }

const Dtype* bottom_rois = bottom[1]->gpu_data();
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int bottom_count = bottom[0]->count();
const int* mapping_channel_ptr = mapping_channel_.gpu_data();
caffe_gpu_set(bottom[1]->count(), Dtype(0), bottom[1]->mutable_gpu_diff());
caffe_gpu_set(bottom_count, Dtype(0), bottom_diff);
const int count = top[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
PSROIAlignBackwardAtomic<Dtype> << <CAFFE_GET_BLOCKS(count),
  CAFFE_CUDA_NUM_THREADS >> >(count, top_diff, mapping_channel_ptr,
  top[0]->num(), spatial_scale_, channels_, height_, width_,
  pooled_height_, pooled_width_, output_dim_, bottom_diff,
  bottom_rois, sample_num_);
CUDA_POST_KERNEL_CHECK;

}

INSTANTIATE_LAYER_GPU_FUNCS(PSROIAlignLayer);

} // namespace caffe

Hi, there, 你好,请问你这里的实现是参考的FACE++这个实现吗?light_head_rcnn

他这个实现里面双线性插值对于整数坐标点的计算貌似有问题,看到你这里面有修正,是这样吗?谢谢 @DoubleWJX