I added a new layer in the flownetS, the median filter for optical flow, the following is my code, but there are some problems in the training. When the iteration to 5000 + times, LOSS value becomes very large, and then LOSS = -nan, output this layer of data to see that the value of the data becomes very large. i dont know what error
here is my layer median.cu
_#include
include
include "caffe/layers/median_layer.hpp"
define FW_THREADS 16
define FW_TILE_X FW_THREADS
define FW_TILE_C FW_THREADS
namespace caffe {
template global void MedianForward(const Dtype in_flow, Dtype out_flow, int imgHeight, int imgWidth, int imgNum, int imgChannel) {
// 图像相对索引
int idx = blockDim.x blockIdx.x + threadIdx.x; //width
int idy = blockDim.y blockIdx.y + threadIdx.y; //height
int lineByteOut = (imgWidth8/8 + 3)/44;
I added a new layer in the flownetS, the median filter for optical flow, the following is my code, but there are some problems in the training. When the iteration to 5000 + times, LOSS value becomes very large, and then LOSS = -nan, output this layer of data to see that the value of the data becomes very large. i dont know what error
here is my layer median.cu
_#include
include
include "caffe/layers/median_layer.hpp"
define FW_THREADS 16
define FW_TILE_X FW_THREADS
define FW_TILE_C FW_THREADS
namespace caffe { template
global void MedianForward(const Dtype in_flow, Dtype out_flow, int imgHeight, int imgWidth, int imgNum, int imgChannel) {
// 图像相对索引
int idx = blockDim.x blockIdx.x + threadIdx.x; //width
int idy = blockDim.y blockIdx.y + threadIdx.y; //height
int lineByteOut = (imgWidth8/8 + 3)/44;
// __syncthreads(); }
template
void MedianLayer::Forward_gpu(const vector<Blob>& bottom,
const vector<Blob >& top) {
} template
global void MedianBackward(const int n, const Dtype in_diff,
const Dtype out_data, Dtype* out_diff) {
CUDA_KERNEL_LOOP(index, n) {
// Dtype sinx = out_data[index];
out_diff[index] = in_diff[index];
}
}
template
void MedianLayer::Backward_gpu(const vector<Blob>& top,
const vector& propagate_down,
const vector<Blob >& bottom) {
if (propagate_down[0]) {
const Dtype bottom_data = bottom[0]->gpu_data();
const Dtype top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
MedianBackward<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, bottom_data, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_LAYER_GPU_FUNCS(MedianLayer);
} // namespace caffe_
here is part of my train.prototxt
_layer{ name: "median1" type: "Median" bottom: "predict_flow5" top: "predict_flow5_median" } layer { name: "ReLU20" type: "ReLU" bottom: "predict_flow5_median" top: "predict_flow5_median" relu_param { negative_slope: 0.1 } } layer { name: "Downsample2" type: "Downsample" bottom: "scaled_flow_gt" bottom: "predict_flow5_median" top: "blob28" } layer { name: "flow_loss5" type: "L1Loss" bottom: "predict_flow5_median" bottom: "blob28" top: "flow_loss5" loss_weight: 0.08 l1_loss_param { l2_perlocation: true } }