Open jzhoulon opened 1 year ago
Hi, when I try to compile splitrpc_server module, it reports error: identifier "cudaDeviceSynchronize" is undefined in device code, it seems in device code(kernel), it call host runtime function, is it intended? seems not right..
error: identifier "cudaDeviceSynchronize" is undefined in device code
__device__ __forceinline__ void cdp_internal(float *inp, float *out) { float* Parameter_96_0 = inp; d_Multiply_2260_0 = out; // kernel_entry_init // name=blockfusion_kernel_2326 BlockFusionKernel_float_int32_t_int32_t_int32_t_int32_t_float_float_float_float_float_float_float_float_float_int32_t_int32_t_float_cuda_Slice_Slice_Slice_Slice_Slice_Slice_Slice_Slice_Concat_Concat_Broadcast_0 <<<dim3(38, 1, 1), dim3(512, 1, 1), 0, 0>>>(Parameter_96_0, d_Constant_97_0, d_Constant_98_0, d_Constant_101_0, d_Constant_102_0, d_Constant_104_0, d_Slice_195_0, d_Slice_197_0, d_Slice_199_0, d_Slice_201_0, d_Slice_203_0, d_Slice_205_0, d_Slice_207_0, d_Slice_209_0, d_Concat_193_0, d_Concat_194_0, d_Broadcast_212_0); cudaDeviceSynchronize(); //BlockFusionKernel_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_cuda_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_2_Call(dim3(256, 1, 1), dim3(128, 1, 1), 0, 0, d_Reshape_196_0, d_Constant_6_0, d_Constant_3_0, d_Constant_9_0, d_Constant_0_0, d_Reshape_198_0, d_Reshape_200_0, d_Reshape_202_0, d_Reshape_204_0, d_Reshape_206_0, d_Reshape_208_0, d_Reshape_210_0, d_Dot_215_0, d_Dot_214_0, d_Dot_216_0, d_Dot_213_0, d_Dot_217_0, d_Dot_218_0, d_Dot_220_0, d_Dot_219_0, d_Dot_221_0, d_Dot_224_0, d_Dot_223_0, d_Dot_222_0, d_Dot_227_0, d_Dot_226_0, d_Dot_228_0, d_Dot_225_0, d_Dot_232_0, d_Dot_230_0, d_Dot_231_0, d_Dot_229_0, d_Dot_233_0, d_Dot_234_0, d_Dot_235_0, d_Dot_236_0, d_Dot_237_0, d_Dot_238_0, d_Dot_239_0, d_Dot_240_0, d_Dot_241_0, d_Dot_242_0, d_Dot_243_0, d_Dot_244_0); BlockFusionKernel_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_cuda_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_2 <<<dim3(256, 1, 1), dim3(128, 1, 1), 0, 0>>>(d_Reshape_196_0, d_Constant_6_0, d_Constant_3_0, d_Constant_9_0, d_Constant_0_0, d_Reshape_198_0, d_Reshape_200_0, d_Reshape_202_0, d_Reshape_204_0, d_Reshape_206_0, d_Reshape_208_0, d_Reshape_210_0, d_Dot_215_0, d_Dot_214_0, d_Dot_216_0, d_Dot_213_0, d_Dot_217_0, d_Dot_218_0, d_Dot_220_0, d_Dot_219_0, d_Dot_221_0, d_Dot_224_0, d_Dot_223_0, d_Dot_222_0, d_Dot_227_0, d_Dot_226_0, d_Dot_228_0, d_Dot_225_0, d_Dot_232_0, d_Dot_230_0, d_Dot_231_0, d_Dot_229_0, d_Dot_233_0, d_Dot_234_0, d_Dot_235_0, d_Dot_236_0, d_Dot_237_0, d_Dot_238_0, d_Dot_239_0, d_Dot_240_0, d_Dot_241_0, d_Dot_242_0, d_Dot_243_0, d_Dot_244_0); cudaDeviceSynchronize();
@minus-one can you have a look. thanks very much
Hi, when I try to compile splitrpc_server module, it reports
error: identifier "cudaDeviceSynchronize" is undefined in device code
, it seems in device code(kernel), it call host runtime function, is it intended? seems not right..