henry2004y / TestParticle.jl

Test particle tracing in electromagnetic field
https://henry2004y.github.io/TestParticle.jl/dev/
MIT License
14 stars 3 forks source link

GC time too high for `trace_trajectory` #127

Closed henry2004y closed 5 months ago

henry2004y commented 5 months ago

When I tried to run the Boris pusher using multithreading, I had no speed up (actually it was a slowdown) mostly due to drastically increasing GC time. Here is a demo using ChunkSplitters.jl:

function trace_trajectory_boris(param; trajectories::Int=1, tspan=(0.0, 1.0),
   dt::Float64=0.0044, savestepinterval::Int=1, prob_func::Function=prob_func_thermal)
   stateinit = zeros(6) # particle position and velocity to be modified

   paramBoris = BorisMethod(param)
   prob = TraceProblem(stateinit, tspan, dt, paramBoris; prob_func)

   sols = trace_trajectory(prob; savestepinterval, isoutofdomain, trajectories)

   sols
end

function trace_trajectory_boris_parallel(param; trajectories::Int=1,
   tspan=(0.0, 1.0), dt::Float64=0.0044, savestepinterval::Int=1,
   prob_func::Function=prob_func_thermal, nchunks::Int=Threads.nthreads())
   stateinit = zeros(6) # particle position and velocity to be modified

   paramBoris = BorisMethod(param)
   prob = TraceProblem(stateinit, tspan, dt, paramBoris; prob_func)
   sols = Vector{TestParticle.TraceSolution}(undef, trajectories)

   Threads.@threads for (irange, ichunk) in chunks(1:trajectories, nchunks)
      sols[irange] = trace_trajectory(prob;
         savestepinterval, isoutofdomain, trajectories=length(irange))
   end

   sols
end

When running with 1 thread:

serial:
  7.975629 seconds (875.33 k allocations: 4.548 GiB, 5.98% gc time, 1.50% compilation time)
parallel:
  8.162882 seconds (765.45 k allocations: 4.542 GiB, 7.38% gc time, 1.41% compilation time)

When running with 2 threads:

serial:
  9.158695 seconds (1.47 M allocations: 4.585 GiB, 8.48% gc time, 5.49% compilation time)
parallel:
 15.864802 seconds (845.64 k allocations: 4.529 GiB, 65.82% gc time, 1.24% compilation time)

Maybe reducing the allocations inside trace_trajectory would help?

henry2004y commented 5 months ago

After #128, the GC performance improves quite a bit:

D:\Research\MHD-AEPIC>julia -t 1 trace_cleanup.jl
[ Info: Number of threads: 1
[ Info: Number of particles: 100000
[ Info: Tracing trajectories...
  7.650034 seconds (1.04 M allocations: 999.937 MiB, 4.10% gc time, 7.33% compilation time)
  7.661742 seconds (546.69 k allocations: 974.943 MiB, 6.56% gc time, 2.32% compilation time)

>julia -t 2 trace_cleanup.jl
[ Info: Number of threads: 2
[ Info: Number of particles: 100000
[ Info: Tracing trajectories...
  7.297970 seconds (1.04 M allocations: 1003.599 MiB, 3.94% gc time, 6.74% compilation time)
  4.980314 seconds (546.75 k allocations: 973.270 MiB, 4.75% gc time, 3.98% compilation time)

>julia -t 4 trace_cleanup.jl
[ Info: Number of threads: 4
[ Info: Number of particles: 100000
[ Info: Tracing trajectories...
  7.424457 seconds (1.04 M allocations: 1003.394 MiB, 2.67% gc time, 6.58% compilation time)
  3.157897 seconds (544.65 k allocations: 813.760 MiB, 4.77% gc time, 9.59% compilation time)