Open TJKlein opened 4 years ago
Hi @TJKlein what's the native and Python stack trace? Install python3-dbg
, then gdb --pid #your pid#
and finally bt
.
Hi,
I am using Python 3.6 on Ubuntu 16.06. Thanks for looking into it.
Here is the stack trace:
#0 0x00007ffe057d5b39 in clock_gettime ()
#1 0x00007f8857aad876 in __GI___clock_gettime (clock_id=4, tp=0x7ffe057c5fc0) at ../sysdeps/unix/clock_gettime.c:115
#2 0x00007f883754301e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007f88375fefc7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007f8837620c34 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5 0x00007f883751c6ed in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6 0x00007f883742988e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7 0x00007f883742c1f8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007f88375a7f8b in cuMemcpyDtoH_v2 () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9 0x00007f884ad0a8ca in cudart::driverHelper::memcpyDispatch(void*, void const*, unsigned long, cudaMemcpyKind, bool) () from /home/ubuntu/kmcuda/src/libKMCUDA.so
#10 0x00007f884aceb176 in cudart::cudaApiMemcpy(void*, void const*, unsigned long, cudaMemcpyKind) () from /home/ubuntu/kmcuda/src/libKMCUDA.so
#11 0x00007f884ad0d7b8 in cudaMemcpy () from /home/ubuntu/kmcuda/src/libKMCUDA.so
#12 0x00007f884acb5115 in kmeans_cuda_afkmc2_calc_q (h_samples_size=13000, h_features_size=2, firstc=6373, metric=kmcudaDistanceMetricL2, devs=..., fp16x2=0, verbosity=2, samples=...,
d_q=0x7ffe057c73b0, h_q=0x5642d52b7030) at /home/ubuntu/kmcuda/src/kmeans.cu:857
#13 0x00007f884ac99007 in kmeans_init_centroids (method=kmcudaInitMethodAFKMC2, init_params=0x7ffe057c7554, samples_size=13000, features_size=2, clusters_size=200, metric=kmcudaDistanceMetricL2, seed=3,
devs=..., device_ptrs=-1, fp16x2=0, verbosity=2, host_centroids=0x5642d5178380, samples=..., dists=0x7ffe057c73b0, aux=0x7ffe057c73d0, centroids=0x7ffe057c7390)
at /home/ubuntu/kmcuda/src/kmcuda.cc:361
#14 0x00007f884ac9ad56 in kmeans_cuda (init=kmcudaInitMethodAFKMC2, init_params=0x7ffe057c7554, tolerance=0.0500000007, yinyang_t=0, metric=kmcudaDistanceMetricL2, samples_size=13000, features_size=2,
clusters_size=200, seed=3, device=0, device_ptrs=-1, fp16x2=0, verbosity=2, samples=0x5642d4817f00, centroids=0x5642d5178380, assignments=0x5642d52aa500, average_distance=0x0)
at /home/ubuntu/kmcuda/src/kmcuda.cc:482
Python Exception <class 'gdb.error'> There is no member named ob_item.:
Python Exception <class 'gdb.error'> There is no member named ma_keys.:
#15 0x00007f884acafdd5 in py_kmeans_cuda (self=<module at remote 0x7f8854cdd868>, args=, kwargs=) at /home/ubuntu/kmcuda/src/python.cc:358
#16 0x00005642d25939e4 in _PyCFunction_FastCallDict ()
#17 0x00005642d25c1654 in _PyCFunction_FastCallKeywords ()
#18 0x00005642d2620dfc in call_function ()
#19 0x00005642d2646715 in _PyEval_EvalFrameDefault ()
#20 0x00005642d261af8b in fast_function ()
#21 0x00005642d2620ed5 in call_function ()
#22 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#23 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#24 0x00005642d261b897 in _PyFunction_FastCallDict ()
#25 0x00005642d2593daf in _PyObject_FastCallDict ()
#26 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#27 0x00005642d25937ee in PyObject_Call ()
#28 0x00005642d264710b in _PyEval_EvalFrameDefault ()
#29 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#30 0x00005642d261b67c in _PyFunction_FastCallDict ()
#31 0x00005642d2593daf in _PyObject_FastCallDict ()
#32 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#33 0x00005642d25937ee in PyObject_Call ()
#34 0x00005642d25ef897 in slot_tp_call ()
#35 0x00005642d2593bcb in _PyObject_FastCallDict ()
#36 0x00005642d2620f4e in call_function ()
#37 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#38 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#39 0x00005642d261b897 in _PyFunction_FastCallDict ()
#40 0x00005642d2593daf in _PyObject_FastCallDict ()
#41 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#42 0x00005642d25937ee in PyObject_Call ()
#43 0x00005642d264710b in _PyEval_EvalFrameDefault ()
#44 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#45 0x00005642d261b67c in _PyFunction_FastCallDict ()
#46 0x00005642d2593daf in _PyObject_FastCallDict ()
#47 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#48 0x00005642d25937ee in PyObject_Call ()
#49 0x00005642d25ef897 in slot_tp_call ()
#50 0x00005642d2593bcb in _PyObject_FastCallDict ()
#51 0x00005642d2620f4e in call_function ()
#52 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#53 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#54 0x00005642d261b897 in _PyFunction_FastCallDict ()
#55 0x00005642d2593daf in _PyObject_FastCallDict ()
#56 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#57 0x00005642d25937ee in PyObject_Call ()
#58 0x00005642d264710b in _PyEval_EvalFrameDefault ()
#59 0x00005642d261a206 in _PyEval_EvalCodeWithName ()
#60 0x00005642d261b67c in _PyFunction_FastCallDict ()
#61 0x00005642d2593daf in _PyObject_FastCallDict ()
#62 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#63 0x00005642d25937ee in PyObject_Call ()
#64 0x00005642d25ef897 in slot_tp_call ()
#65 0x00005642d2593bcb in _PyObject_FastCallDict ()
#66 0x00005642d2620f4e in call_function ()
#67 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#68 0x00005642d261af8b in fast_function ()
#69 0x00005642d2620ed5 in call_function ()
#70 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#71 0x00005642d261af8b in fast_function ()
#72 0x00005642d2620ed5 in call_function ()
#73 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#74 0x00005642d261a7d1 in _PyEval_EvalCodeWithName ()
#75 0x00005642d261b67c in _PyFunction_FastCallDict ()
#76 0x00005642d2593daf in _PyObject_FastCallDict ()
#77 0x00005642d2598a73 in _PyObject_Call_Prepend ()
#78 0x00005642d25937ee in PyObject_Call ()
#79 0x00005642d25eeefb in slot_tp_init ()
#80 0x00005642d2621137 in type_call ()
#81 0x00005642d2593bcb in _PyObject_FastCallDict ()
#82 0x00005642d2620f4e in call_function ()
#83 0x00005642d264594a in _PyEval_EvalFrameDefault ()
#84 0x00005642d261bcb9 in PyEval_EvalCodeEx ()
#85 0x00005642d261ca4c in PyEval_EvalCode ()
#86 0x00005642d2698c44 in run_mod ()
#87 0x00005642d2699041 in PyRun_FileExFlags ()
#88 0x00005642d2699244 in PyRun_SimpleFileExFlags ()
#89 0x00005642d269cd24 in Py_Main ()
#90 0x00005642d256475e in main ()
Hi,
I don't get kmcuda running on a V100 (CUDA_ARCH=70) / CUDA 10.0/10.1. It always freezes when running python test.py and while usage.
Here is some output from the test: