Open zjuncd opened 2 years ago
基础Docker镜像: paddlepaddle/paddle:1.8.0-gpu-cuda10.0-cudnn7
# make.sh include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' ) lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
echo $include_dir echo $lib_dir
OPS='farthest_point_sampling_op gather_point_op group_points_op query_ball_op three_interp_op three_nn_op' for op in ${OPS} do nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \ -I ${include_dir}/third_party/ \ -I ${include_dir} done
g++ farthest_point_sampling_op.cc farthest_point_sampling_op.cu.o gather_point_op.cc gather_point_op.cu.o group_points_op.cc group_points_op.cu.o query_ball_op.cu.o query_ball_op.cc three_interp_op.cu.o three_interp_op.cc three_nn_op.cu.o three_nn_op.cc -o pointnet_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ -I ${include_dir}/third_party/ \ -I ${include_dir} \ -L ${lib_dir} \ -L /usr/local/cuda/lib64 -lpaddle_framework -lcudart\ -D_GLIBCXX_USE_CXX11_ABI=0
rm *.cu.o
2. 运行 make.sh 编译通过 3. 执行测试 ```bash export CUDA_VISIBLE_DEVICES=0 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'` export PYTHONPATH=$PYTHONPATH:`pwd` python tests/test_three_nn_op.py
报错信息如下:
W0916 10:55:57.921620 376 init.cc:216] Warning: PaddlePaddle catches a failure signal, it may not work properly W0916 10:55:57.921661 376 init.cc:218] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle W0916 10:55:57.921666 376 init.cc:221] The detail failure signal is: W0916 10:55:57.921671 376 init.cc:224] *** Aborted at 1631789757 (unix time) try "date -d @1631789757" if you are using GNU date *** W0916 10:55:57.923035 376 init.cc:224] PC: @ 0x0 (unknown) W0916 10:55:57.923224 376 init.cc:224] *** SIGFPE (@0x7f1e088ed83b) received by PID 376 (TID 0x7f1e0e650700) from PID 143579195; stack trace: *** W0916 10:55:57.924304 376 init.cc:224] @ 0x7f1e0e22e390 (unknown) W0916 10:55:57.925643 376 init.cc:224] @ 0x7f1e088ed83b std::__detail::_Mod_range_hashing::operator()() W0916 10:55:57.926782 376 init.cc:224] @ 0x7f1e089104b2 std::__detail::_Hash_code_base<>::_M_bucket_index() W0916 10:55:57.927825 376 init.cc:224] @ 0x7f1e0890f5d0 std::_Hashtable<>::_M_bucket_index() W0916 10:55:57.928773 376 init.cc:224] @ 0x7f1e089119bb std::__detail::_Map_base<>::operator[]() W0916 10:55:57.929697 376 init.cc:224] @ 0x7f1e08910992 std::unordered_map<>::operator[]() W0916 10:55:57.930344 376 init.cc:224] @ 0x7f1e0890fe46 _ZN6paddle9framework19RegisterKernelClassINS_8platform9CUDAPlaceEfZNKS0_24OpKernelRegistrarFunctorIS3_Lb0ELm0EINS_9operators33FarthestPointSamplingOpCUDAKernelIfEENS6_IdEEEEclEPKcSB_iEUlRKNS0_16ExecutionContextEE_EEvSB_SB_iT1_ W0916 10:55:57.931105 376 init.cc:224] @ 0x7f1e0890f2e4 paddle::framework::OpKernelRegistrarFunctor<>::operator()() W0916 10:55:57.931761 376 init.cc:224] @ 0x7f1e0890e799 _ZN6paddle9framework17OpKernelRegistrarINS_8platform9CUDAPlaceEJNS_9operators33FarthestPointSamplingOpCUDAKernelIfEENS5_IdEEEEC2EPKcSA_i W0916 10:55:57.932322 376 init.cc:224] @ 0x7f1e0890af49 __static_initialization_and_destruction_0() W0916 10:55:57.932822 376 init.cc:224] @ 0x7f1e0890af77 _GLOBAL__sub_I_tmpxft_000000df_00000000_5_farthest_point_sampling_op.cudafe1.cpp W0916 10:55:57.933336 376 init.cc:224] @ 0x7f1e0e44a6ca (unknown) W0916 10:55:57.933843 376 init.cc:224] @ 0x7f1e0e44a7db (unknown) W0916 10:55:57.934345 376 init.cc:224] @ 0x7f1e0e44f8f2 (unknown) W0916 10:55:57.934846 376 init.cc:224] @ 0x7f1e0e44a574 (unknown) W0916 10:55:57.935348 376 init.cc:224] @ 0x7f1e0e44edb9 (unknown) W0916 10:55:57.935863 376 init.cc:224] @ 0x7f1e0dc4ff09 (unknown) W0916 10:55:57.936401 376 init.cc:224] @ 0x7f1e0e44a574 (unknown) W0916 10:55:57.936937 376 init.cc:224] @ 0x7f1e0dc50571 (unknown) W0916 10:55:57.937688 376 init.cc:224] @ 0x7f1e0dc4ffa1 dlopen W0916 10:55:57.945061 376 init.cc:224] @ 0x7f1da08da6d3 paddle::platform::dynload::GetOpDsoHandle() W0916 10:55:57.950942 376 init.cc:224] @ 0x7f1d9cfbe71d paddle::framework::LoadOpLib() W0916 10:55:57.953336 376 init.cc:224] @ 0x7f1d9d0239ed _ZZN8pybind1112cpp_function10initializeIRPFvRKSsEvIS3_EINS_4nameENS_5scopeENS_7siblingEEEEvOT_PFT0_DpT1_EDpRKT2_ENUlRNS_6detail13function_callEE1_4_FUNESN_ W0916 10:55:57.955534 376 init.cc:224] @ 0x7f1d9d048b39 pybind11::cpp_function::dispatcher() W0916 10:55:57.955688 376 init.cc:224] @ 0x4bc9ba PyEval_EvalFrameEx W0916 10:55:57.955794 376 init.cc:224] @ 0x4ba036 PyEval_EvalCodeEx W0916 10:55:57.955926 376 init.cc:224] @ 0x4c237b PyEval_EvalFrameEx W0916 10:55:57.956028 376 init.cc:224] @ 0x4ba036 PyEval_EvalCodeEx W0916 10:55:57.956147 376 init.cc:224] @ 0x4b9d26 PyEval_EvalCode W0916 10:55:57.956218 376 init.cc:224] @ 0x4b9c5f PyImport_ExecCodeModuleEx W0916 10:55:57.956341 376 init.cc:224] @ 0x4b2f86 (unknown) W0916 10:55:57.956454 376 init.cc:224] @ 0x4a4d21 (unknown) Floating point exception (core dumped)
g++和cmake版本需要与Paddle编译版本一致,看报错应该是版本不一致ABI没匹配
基础Docker镜像: paddlepaddle/paddle:1.8.0-gpu-cuda10.0-cudnn7
echo $include_dir echo $lib_dir
OPS='farthest_point_sampling_op gather_point_op group_points_op query_ball_op three_interp_op three_nn_op' for op in ${OPS} do nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \ -I ${include_dir}/third_party/ \ -I ${include_dir} done
g++ farthest_point_sampling_op.cc farthest_point_sampling_op.cu.o gather_point_op.cc gather_point_op.cu.o group_points_op.cc group_points_op.cu.o query_ball_op.cu.o query_ball_op.cc three_interp_op.cu.o three_interp_op.cc three_nn_op.cu.o three_nn_op.cc -o pointnet_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ -I ${include_dir}/third_party/ \ -I ${include_dir} \ -L ${lib_dir} \ -L /usr/local/cuda/lib64 -lpaddle_framework -lcudart\ -D_GLIBCXX_USE_CXX11_ABI=0
rm *.cu.o
报错信息如下: