minus-one / splitrpc

A {Control + Data} Path Splitting RPC Stack for ML Inference Serving
MIT License
2 stars 0 forks source link

Compilation failure of splitrpc_server #3

Open jzhoulon opened 1 year ago

jzhoulon commented 1 year ago

Hi, when I try to compile splitrpc_server module, it reports error: identifier "cudaDeviceSynchronize" is undefined in device code, it seems in device code(kernel), it call host runtime function, is it intended? seems not right..

__device__ __forceinline__
void cdp_internal(float *inp, float *out)
{
  float* Parameter_96_0 = inp;
  d_Multiply_2260_0 = out;
  // kernel_entry_init
  // name=blockfusion_kernel_2326
  BlockFusionKernel_float_int32_t_int32_t_int32_t_int32_t_float_float_float_float_float_float_float_float_float_int32_t_int32_t_float_cuda_Slice_Slice_Slice_Slice_Slice_Slice_Slice_Slice_Concat_Concat_Broadcast_0
    <<<dim3(38, 1, 1), dim3(512, 1, 1), 0, 0>>>(Parameter_96_0, d_Constant_97_0, d_Constant_98_0, d_Constant_101_0, d_Constant_102_0, d_Constant_104_0, d_Slice_195_0, d_Slice_197_0, d_Slice_199_0, d_Slice_201_0, d_Slice_203_0, d_Slice_205_0, d_Slice_207_0, d_Slice_209_0, d_Concat_193_0, d_Concat_194_0, d_Broadcast_212_0);
  cudaDeviceSynchronize();
  //BlockFusionKernel_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_cuda_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_2_Call(dim3(256, 1, 1), dim3(128, 1, 1), 0, 0, d_Reshape_196_0, d_Constant_6_0, d_Constant_3_0, d_Constant_9_0, d_Constant_0_0, d_Reshape_198_0, d_Reshape_200_0, d_Reshape_202_0, d_Reshape_204_0, d_Reshape_206_0, d_Reshape_208_0, d_Reshape_210_0, d_Dot_215_0, d_Dot_214_0, d_Dot_216_0, d_Dot_213_0, d_Dot_217_0, d_Dot_218_0, d_Dot_220_0, d_Dot_219_0, d_Dot_221_0, d_Dot_224_0, d_Dot_223_0, d_Dot_222_0, d_Dot_227_0, d_Dot_226_0, d_Dot_228_0, d_Dot_225_0, d_Dot_232_0, d_Dot_230_0, d_Dot_231_0, d_Dot_229_0, d_Dot_233_0, d_Dot_234_0, d_Dot_235_0, d_Dot_236_0, d_Dot_237_0, d_Dot_238_0, d_Dot_239_0, d_Dot_240_0, d_Dot_241_0, d_Dot_242_0, d_Dot_243_0, d_Dot_244_0);
  BlockFusionKernel_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_float_cuda_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_Dot_2
    <<<dim3(256, 1, 1), dim3(128, 1, 1), 0, 0>>>(d_Reshape_196_0, d_Constant_6_0, d_Constant_3_0, d_Constant_9_0, d_Constant_0_0, d_Reshape_198_0, d_Reshape_200_0, d_Reshape_202_0, d_Reshape_204_0, d_Reshape_206_0, d_Reshape_208_0, d_Reshape_210_0, d_Dot_215_0, d_Dot_214_0, d_Dot_216_0, d_Dot_213_0, d_Dot_217_0, d_Dot_218_0, d_Dot_220_0, d_Dot_219_0, d_Dot_221_0, d_Dot_224_0, d_Dot_223_0, d_Dot_222_0, d_Dot_227_0, d_Dot_226_0, d_Dot_228_0, d_Dot_225_0, d_Dot_232_0, d_Dot_230_0, d_Dot_231_0, d_Dot_229_0, d_Dot_233_0, d_Dot_234_0, d_Dot_235_0, d_Dot_236_0, d_Dot_237_0, d_Dot_238_0, d_Dot_239_0, d_Dot_240_0, d_Dot_241_0, d_Dot_242_0, d_Dot_243_0, d_Dot_244_0);
  cudaDeviceSynchronize();
jzhoulon commented 1 year ago

@minus-one can you have a look. thanks very much