Closed yizeyi18 closed 7 months ago
Dear @yizeyi18,
thanks for informing us about this This has been fixed in the mean time with commit a033a253. So upstream master_pre_stage will work fine for you.
Dear @yizeyi18,
thanks for informing us about this This has been fixed in the mean time with commit a033a253. So upstream master_pre_stage will work fine for you.
Is there any difference between master_pre_stage and master branch? It seems that they have no difference.
With
--enable-gpu-streams=amd
configure flag, validate_complex_double_eigenvectors_2stage_default_kernel_gpu_analytic_default.sh --- or say, all 2 stage gpu kernel --- fails with "Invalid DeviceId less than 0" from hip runtime.backtrace is like:
Backtrace from running ABACUS
```shell :0:/tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_fatbin.hpp:50 : 614193288254 us: [pid:509108 tid:0x155546b83cc0] Invalid DeviceId less than 0 Thread 1 "abacus" received signal SIGABRT, Aborted. __pthread_kill_implementation (no_tid=0, signo=6, threadid=23456002882752) at ./nptl/pthread_kill.c:44 44 ./nptl/pthread_kill.c: 没有那个文件或目录. (gdb) bt #0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=23456002882752) at ./nptl/pthread_kill.c:44 #1 __pthread_kill_internal (signo=6, threadid=23456002882752) at ./nptl/pthread_kill.c:78 #2 __GI___pthread_kill (threadid=23456002882752, signo=signo@entry=6) at ./nptl/pthread_kill.c:89 #3 0x0000155551842476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 #4 0x00001555518287f3 in __GI_abort () at ./stdlib/abort.c:79 #5 0x00001555033018e6 in hip::FatBinaryInfo::DeviceIdCheck (device_id=device_id@entry=-1, this=)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_fatbin.hpp:50
#6 hip::FatBinaryInfo::BuildProgram (this=, device_id=device_id@entry=-1)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_fatbin.cpp:335
#7 0x000015550330576e in hip::Function::getStatFunc (this=0x5555564bffc0, hfunc=hfunc@entry=0x7fffffff7b18, deviceId=deviceId@entry=-1)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_global.cpp:132
#8 0x00001555032be473 in hip::StatCO::getStatFunc (this=0x555555e25cd0, hfunc=hfunc@entry=0x7fffffff7b18,
hostFunction=hostFunction@entry=0x155555435378 *, HIP_vector_type*, int)>,
deviceId=deviceId@entry=-1) at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_code_object.cpp:848
#9 0x000015550344b54c in PlatformState::getStatFunc (this=, hfunc=hfunc@entry=0x7fffffff7b18,
hostFunction=hostFunction@entry=0x155555435378 *, HIP_vector_type*, int)>,
deviceId=deviceId@entry=-1) at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_platform.cpp:858
#10 0x000015550344b5a8 in ihipLaunchKernel (hostFunction=0x155555435378 *, HIP_vector_type*, int)>,
gridDim=..., blockDim=..., args=0x7fffffff8080, sharedMemBytes=0, stream=0x1555036aa180 , startEvent=0x0, stopEvent=0x0, flags=0)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_platform.cpp:568
#11 0x0000155503421cb2 in hipLaunchKernel_common (hostFunction=,
hostFunction@entry=0x155555435378 *, HIP_vector_type*, int)>, gridDim=..., blockDim=...,
args=, args@entry=0x7fffffff8080, sharedMemBytes=, stream=)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_module.cpp:672
#12 0x000015550342c333 in hipLaunchKernel (hostFunction=0x155555435378 *, HIP_vector_type*, int)>,
gridDim=..., blockDim=..., args=, sharedMemBytes=, stream=)
at /tmp/yizeyi18/spack-stage/spack-stage-hip-5.7.0-qjrvxutegybi3rhpu4mctpd2temd7krn/spack-src/clr/hipamd/src/hip_module.cpp:679
#13 0x00001555552391c9 in launch_my_unpack_c_hip_kernel_complex_double (row_count=, n_offset=, max_idx=, stripe_width=, a_dim2=,
stripe_count=, l_nev=, row_group_dev=, a_dev=, my_stream=) at ../src/GPU/ROCm/hipUtils_template.cpp:356
#14 0x000015555514838c in gpu_c_kernel::launch_my_unpack_gpu_kernel_complex_double (row_count=, n_offset=0, max_idx=615,
stripe_width=1024, a_dim2=2880, stripe_count=, l_nev=, row_group_dev=, a_dev=,
my_stream=) at ../src/elpa2/GPU/interface_c_gpu_kernel.F90:376
#15 0x0000155555149534 in pack_unpack_gpu::unpack_row_group_complex_gpu_double (obj=..., row_group_dev=23447635755008, a_dev=23437523288064, stripe_count=1, stripe_width=1024, last_stripe_width=615,
a_dim2=2880, l_nev=615, rows=, n_offset=0, row_count=64, wantdebug=.FALSE., allcomputeongpu=.FALSE.,
my_stream=93825182226512) at ../src/elpa2/pack_unpack_gpu.F90:362
#16 0x000015555514a84d in pack_unpack_gpu::unpack_and_prepare_row_group_complex_gpu_double (obj=..., row_group=,
row_group_dev=23447635755008, a_dev=23437523288064, stripe_count=1, stripe_width=1024, last_stripe_width=615, a_dim2=2880, l_nev=615, row_group_size=64, nblk=64, unpack_idx=64, next_unpack_idx=65,
force=.FALSE., wantdebug=.FALSE., allcomputeongpu=.FALSE., my_stream=93825182226512) at ../src/elpa2/pack_unpack_gpu.F90:429
#17 0x00001555550e5c21 in elpa2_compute::trans_ev_tridi_to_band_complex_double (obj=..., na=2816, nev=615, nblk=64, nbw=64, q=..., ldq=2816, matrixcols=2816,
hh_trans=, my_pe=0, mpi_comm_rows=14, mpi_comm_cols=15, wantdebug=.FALSE., usegpu=.TRUE., max_threads_in=1,
success=.TRUE., kernel=23) at ../src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90:1140
#18 0x000015555519af5e in elpa2_impl::elpa_solve_evp_complex_2stage_a_h_a_double_impl (obj=..., aextern=..., evextern=..., qextern=...) at ../src/elpa2/elpa2_template.F90:1403
#19 0x000015555504f64d in elpa_impl::elpa_eigenvectors_a_h_a_dc (self=..., a=..., ev=..., q=..., error=32767) at ../src/elpa_impl_math_solvers_template.F90:126
#20 0x0000155555058d0b in elpa_impl::elpa_eigenvectors_a_h_a_dc_c (handle=, a_p=, ev_p=, q_p=, error=32767)
at ../src/elpa_impl_math_solvers_template.F90:333
#21 0x0000555555aed081 in ELPA_Solver::eigenvector(std::complex*, double*, std::complex*) ()
#22 0x0000555555aee25f in ELPA_Solver::generalized_eigenvector(std::complex*, std::complex*, int&, double*, std::complex*) ()
#23 0x0000555555817aac in hsolver::DiagoElpa >::diag(hamilt::Hamilt, psi::DEVICE_CPU>*, psi::Psi, psi::DEVICE_CPU>&, double*) ()
#24 0x000055555580fd51 in hsolver::HSolverLCAO, psi::DEVICE_CPU>::hamiltSolvePsiK(hamilt::Hamilt, psi::DEVICE_CPU>*, psi::Psi, psi::DEVICE_CPU>&, double*) ()
--Type for more, q to quit, c to continue without paging--
#25 0x000055555581174c in hsolver::HSolverLCAO, psi::DEVICE_CPU>::solveTemplate(hamilt::Hamilt, psi::DEVICE_CPU>*, psi::Psi, psi::DEVICE_CPU>&, elecstate::ElecState*, std::__cxx11::basic_string, std::allocator >, bool) ()
#26 0x0000555555811f85 in hsolver::HSolverLCAO, psi::DEVICE_CPU>::solve(hamilt::Hamilt, psi::DEVICE_CPU>*, psi::Psi, psi::DEVICE_CPU>&, elecstate::ElecState*, std::__cxx11::basic_string, std::allocator >, bool) ()
#27 0x000055555592741c in ModuleESolver::ESolver_KS_LCAO, std::complex >::hamilt2density(int, int, double) ()
#28 0x00005555558e4e23 in ModuleESolver::ESolver_KS, psi::DEVICE_CPU>::Run(int, UnitCell&) ()
#29 0x00005555557bf514 in Relax_Driver::relax_driver(ModuleESolver::ESolver*) ()
#30 0x00005555557d1c99 in Driver::driver_run() ()
#31 0x00005555557d0ee5 in Driver::atomic_world() ()
#32 0x00005555557d1702 in Driver::init() ()
#33 0x00005555555ad1e4 in main ()
```