Closed cwsmith closed 1 year ago
scrabble - Nvidia Quadro P1000
gcc/10.1.0 mpich/3.3.2 cuda/11.4 cmake/3.20.0
pumipic | master | cf87720 omegah | scorec/master | 85c8284 kokkos | n/a | 3.7.01 cabana | master | 452a278 engpar | master | 57b3f57
test_structures_small test_structures_small_4
This stack appears identical to the one in https://github.com/SCOREC/pumi-pic/issues/84.
$gdb --args ./test_structure small_ptcls_e5_p25_r0 ... Padded Cells <Tot %> 167 86.979% Empty Elements <Tot %> 0 0.000% testRebuild cabm, rank 0 rebuildNoChanges cabm, rank 0 Thread 1 "test_structure" hit Breakpoint 1, Kokkos::Impl::host_abort (message=0x2809a70 "cudaDeviceSynchronize() error( cudaErrorIllegalAddress): an illegal memory access was encountered /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:161") at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Error.cpp:80 80 std::cerr << message; (ins)(gdb) where #0 Kokkos::Impl::host_abort (message=0x2809a70 "cudaDeviceSynchronize() error( cudaErrorIllegalAddress): an illegal memory access was encountered /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:161") at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Error.cpp:80 #1 0x00000000017729d1 in Kokkos::abort (message=<optimized out>) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Error.hpp:232 #2 Kokkos::Impl::cuda_internal_error_abort (e=cudaErrorIllegalAddress, name=<optimized out>, file=0x17e8888 "/space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp", line=161) at /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:205 #3 0x0000000001772aaa in Kokkos::Impl::cuda_internal_safe_call (line=161, file=0x17e8888 "/space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp", name=0x17e8f61 "cudaDeviceSynchronize()", e=<optimized out>) at /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp:94 #4 operator() (__closure=<optimized out>) at /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:161 #5 Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::Cuda, Kokkos::Impl::cuda_device_synchronize(const string&)::<lambda()> >(const std::string &, const struct {...} &, Kokkos::Tools::Experimental::SpecialSynchronizationCases) (name=..., func=..., reason=Kokkos::Tools::Experimental::GlobalDeviceSynchronization) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Profiling.hpp:236 #6 0x0000000001775241 in Kokkos::Impl::cuda_device_synchronize (name=...) at /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:971 #7 Kokkos::Cuda::impl_static_fence (name=...) at /space/cwsmith/pumipicDev/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp:971 #8 Kokkos::Impl::ExecSpaceDerived<Kokkos::Cuda>::static_fence (this=<optimized out>, label=...) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_ExecSpaceManager.hpp:125 #9 0x000000000174c97d in Kokkos::Impl::ExecSpaceManager::static_fence (name=..., this=0x20dbe80 <Kokkos::Impl::ExecSpaceManager::get_instance()::space_initializer>) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Core.cpp:224 #10 (anonymous namespace)::fence_internal (name=...) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Core.cpp:732 #11 Kokkos::fence (name=...) at /space/cwsmith/pumipicDev/kokkos/core/src/impl/Kokkos_Core.cpp:1109 #12 0x00000000013de255 in void Kokkos::deep_copy<int, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<0u> >(Kokkos::ViewTraits<int, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<0u> >::non_const_value_type&, Kokkos::View<int, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<0u> > const&, std::enable_if<std::is_same<Kokkos::ViewTraits<int, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<0u> >::specialize, void>::value, void>::type*) () at /opt/scorec/spack/v0154_2/install/linux-rhel7-x86_64/gcc-6.5.0/gcc-10.1.0-tf5jjaditemasrbsl7tz6pnqa6duqwkg/include/c++/10.1.0/bits/stl_tree.h:211 #13 0x000000000134473f in int pumipic::getLastValue<int, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace> >(Kokkos::View<int*, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace> >) () at /opt/scorec/spack/v0154_2/install/linux-rhel7-x86_64/gcc-6.5.0/gcc-10.1.0-tf5jjaditemasrbsl7tz6pnqa6duqwkg/include/c++/10.1.0/bits/stl_tree.h:211 #14 0x000000000130984d in rebuildNoChanges(char const*, pumipic::ParticleStructure<pumipic::MemberTypes<int, double [3], short, int>, Kokkos::CudaSpace>*) () at /opt/scorec/spack/v0154_2/install/linux-rhel7-x86_64/gcc-6.5.0/gcc-10.1.0-tf5jjaditemasrbsl7tz6pnqa6duqwkg/include/c++/10.1.0/bits/stl_tree.h:211 #15 0x00000000013074d2 in testRebuild(char const*, pumipic::ParticleStructure<pumipic::MemberTypes<int, double [3], short, int>, Kokkos::CudaSpace>*) () at /opt/scorec/spack/v0154_2/install/linux-rhel7-x86_64/gcc-6.5.0/gcc-10.1.0-tf5jjaditemasrbsl7tz6pnqa6duqwkg/include/c++/10.1.0/bits/stl_tree.h:211 #16 0x0000000001306703 in main () at /opt/scorec/spack/v0154_2/install/linux-rhel7-x86_64/gcc-6.5.0/gcc-10.1.0-tf5jjaditemasrbsl7tz6pnqa6duqwkg/include/c++/10.1.0/bits/stl_tree.h:211 (ins)(gdb)
It seems like this has been resolved.
environment
scrabble - Nvidia Quadro P1000
versions
pumipic | master | cf87720 omegah | scorec/master | 85c8284 kokkos | n/a | 3.7.01 cabana | master | 452a278 engpar | master | 57b3f57
failing tests
test_structures_small stack
This stack appears identical to the one in https://github.com/SCOREC/pumi-pic/issues/84.