Closed pierrelzw closed 1 year ago
static __global__ void compute_volum_kernel(int num_valid, const half* camera_feature, const float* valid_index, const int64_t* valid_y, const int64_t* valid_x, int num_camera, int feat_height, int feat_width, half* output_feature) { int tid = cuda_linear_index; if (tid >= num_valid) return; for (int icamera = 0; icamera < num_camera; ++icamera) { int index = icamera * num_valid + tid; if(valid_index[index] == 1.0){ int64_t x = valid_x[index]; int64_t y = valid_y[index]; for(int c=0; c< 64; c++){ output_feature[c*num_valid+tid] = camera_feature[icamera*64*feat_height*feat_width+c*feat_height*feat_width +feat_width*y+x]; } } }
why 64 here?
why 64 here?