Closed AlexanderSinn closed 7 months ago
Comparison with FBPIC:
The non-zero sy and sz mean in FBPIC are caused by the random fluctuations of the Gaussian beam. In HiPACE++ this was suppressed with beam.do_symmetrize = true.
beam.do_symmetrize = true
electron_beam.do_spin_tracking = true electron_beam.initial_spin = 1 0 0 electron_beam.spin_anom = 0.00115965218128
Add spin tracking from https://github.com/fbpic/fbpic/pull/672.
This PR fixes #1027 and is based on #1069, #1068, #1067 and #1066.
Register usage for the beam pusher in development, the high local memory usage is caused by the kernels that use the parser for external fields.
--- 176 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 424 bytes stack frame, 256 bytes spill stores, 256 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 126 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 109 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 102 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
PR:
--- 176 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 174 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2) --- 112 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name: void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
const
I added the register usage for GPU to the PR description.
Comparison with FBPIC:
The non-zero sy and sz mean in FBPIC are caused by the random fluctuations of the Gaussian beam. In HiPACE++ this was suppressed with
beam.do_symmetrize = true
.Add spin tracking from https://github.com/fbpic/fbpic/pull/672.
This PR fixes #1027 and is based on #1069, #1068, #1067 and #1066.
Register usage for the beam pusher in development, the high local memory usage is caused by the kernels that use the parser for external fields.
PR:
const
isconst
)