I added a new layer in the flownetS, the median filter for optical flow, the following is my code, but there are some problems in the training. When the iteration to 5000 + times, LOSS value becomes very large, and then LOSS = -nan, output this layer of data to see that the value of the data becomes very large. i dont know what error

here is my layer median.cu

_#include

include

include "caffe/layers/median_layer.hpp"

define FW_THREADS 16

define FW_TILE_X FW_THREADS

define FW_TILE_C FW_THREADS

namespace caffe { template global void MedianForward(const Dtype in_flow, Dtype out_flow, int imgHeight, int imgWidth, int imgNum, int imgChannel) { // 图像相对索引 int idx = blockDim.x blockIdx.x + threadIdx.x; //width int idy = blockDim.y blockIdx.y + threadIdx.y; //height int lineByteOut = (imgWidth8/8 + 3)/44;

  //定义窗口大小
  float value[8];
        for(int n = 0; n < imgNum; n++)
        {
            __syncthreads();
            for(int c = 0; c < imgChannel; c++)
            {
                __syncthreads();
                     if( idx >= 0 && idy >= 0 && idx < imgWidth && idy < imgHeight)
                    {
                    int ch_off = (n * imgChannel + c) * imgHeight;
                    out_flow[(ch_off + idy) * lineByteOut+ idx] = in_flow[(ch_off + idy) * lineByteOut+ idx];
                    }
                    __syncthreads();
            }
            __syncthreads();
        }
            __syncthreads();

 if(idx>0 && idx<imgWidth-1 && idy>0 && idy<imgHeight-1)
  {
      for(int n = 0; n < imgNum; n++)
      {
           for(int c = 0; c < imgChannel; c++)
                    {
                        int ch_off = (n*imgChannel+c)*imgHeight;
                          for(int i = -1;i<2; i++)
                          {
                              for(int j = -1; j<2; j++)
                              {                       
                                  value[(i+1)*3 + (j+1)] = in_flow[(ch_off + idy + i) * lineByteOut+ idx + j]; 
                              }
                        }
                        //sort(value);
                        float temp;
                        int m, n;
                        for(m = 0; m<9; m++)
                        {
                            for(n = 0; n<9 - m - 1; n++)
                            {
                                if(value[n] > value[n+1])
                                {
                                    temp = value[n];
                                    value[n] = value[n+1];
                                    value[n+1] = temp;
                                }
                            }
                        }    
                        out_flow[(ch_off + idy) * lineByteOut+ idx] = value[4];
                    }   
          }  
  }

// __syncthreads(); }

template void MedianLayer::Forward_gpu(const vector<Blob>& bottom, const vector<Blob>& top) {

int width = top[0]->width();
int height = top[0]->height();
int channels = top[0]->channels();
int num = top[0]->num();

const Dtype* in_flow_data = bottom[0]->gpu_data();
Dtype* out_flow_data = top[0]->mutable_gpu_data();
const int count = bottom[0]->count();

size_t size = num * channels * width * height * sizeof(float);

cudaMemset(out_flow_data, 0, width*height*channels*num*sizeof(float));

dim3 threadsPerBlock(16,16);
dim3 blocksPerGrid((width+15)/16,(height+15)/16);

MedianForward<<<blocksPerGrid,threadsPerBlock>>>(in_flow_data, out_flow_data, height, width, num, channels);
cudaDeviceSynchronize(); 

CUDA_POST_KERNEL_CHECK;

} template global void MedianBackward(const int n, const Dtype in_diff, const Dtype out_data, Dtype* out_diff) { CUDA_KERNEL_LOOP(index, n) { // Dtype sinx = out_data[index]; out_diff[index] = in_diff[index]; } }

template void MedianLayer::Backward_gpu(const vector<Blob>& top, const vector& propagate_down, const vector<Blob>& bottom) { if (propagate_down[0]) { const Dtype bottom_data = bottom[0]->gpu_data(); const Dtype top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); const int count = bottom[0]->count(); // NOLINT_NEXT_LINE(whitespace/operators) MedianBackward<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } }

INSTANTIATE_LAYER_GPU_FUNCS(MedianLayer);

} // namespace caffe_

here is part of my train.prototxt

_layer{ name: "median1" type: "Median" bottom: "predict_flow5" top: "predict_flow5_median" } layer { name: "ReLU20" type: "ReLU" bottom: "predict_flow5_median" top: "predict_flow5_median" relu_param { negative_slope: 0.1 } } layer { name: "Downsample2" type: "Downsample" bottom: "scaled_flow_gt" bottom: "predict_flow5_median" top: "blob28" } layer { name: "flow_loss5" type: "L1Loss" bottom: "predict_flow5_median" bottom: "blob28" top: "flow_loss5" loss_weight: 0.08 l1_loss_param { l2_perlocation: true } }

lmb-freiburg / flownet2

add a new layer and then train flownet #58

include

include "caffe/layers/median_layer.hpp"

define FW_THREADS 16

define FW_TILE_X FW_THREADS

define FW_TILE_C FW_THREADS