Hi-PACE / hipace

Highly efficient Plasma Accelerator Emulation, quasistatic particle-in-cell code
https://hipace.readthedocs.io
Other
54 stars 15 forks source link

Add Beam Spin tracking #1071

Closed AlexanderSinn closed 8 months ago

AlexanderSinn commented 9 months ago

Comparison with FBPIC: image

The non-zero sy and sz mean in FBPIC are caused by the random fluctuations of the Gaussian beam. In HiPACE++ this was suppressed with beam.do_symmetrize = true.

electron_beam.do_spin_tracking = true
electron_beam.initial_spin = 1 0 0
electron_beam.spin_anom = 0.00115965218128

Add spin tracking from https://github.com/fbpic/fbpic/pull/672.

This PR fixes #1027 and is based on #1069, #1068, #1067 and #1066.

Register usage for the beam pusher in development, the high local memory usage is caused by the kernels that use the parser for external fields.

--- 176 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 424 bytes stack frame, 256 bytes spill stores, 256 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 126 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 109 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 102 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)

PR:

--- 176 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 174 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)3>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 128 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)2>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)1>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 127 registers, 240 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)1>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
--- 112 registers, 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads, function name:
void amrex::launch_global<(int)256, std::enable_if<amrex::MaybeDeviceRunnable<T3, void>::value, void>::type amrex::ParallelFor<(int)256, int, std::enable_if<std::is_integral<T2>::value||std::is_same<T2, amrex::Box>::value, bool>::type amrex::detail::ParallelFor_helper2<(int)256, int, AdvanceBeamParticlesSlice(BeamParticleContainer &, const Fields &, const amrex::Vector<amrex::Geometry, std::allocator<amrex::Geometry>> &, int, int)::[lambda(int, T1, T2) (instance 1)], std::integral_constant<int, (int)0>, std::integral_constant<int, (int)0>>(const T2 &, T3 &&, amrex::TypeList<T4...>, const std::array<int, sizeof...(T4)> &)::[lambda(int) (instance 1)], void>(const amrex::Gpu::KernelInfo &, T2, T3 &&)::[lambda() (instance 1)]>(T2)
AlexanderSinn commented 8 months ago

I added the register usage for GPU to the PR description.