mabarnes / moment_kinetics

Other
2 stars 4 forks source link

Segmentation fault in anyv_region() shared memory setup #210

Closed mrhardman closed 2 months ago

mrhardman commented 2 months ago

Running the following command

 mpirun -n 16 julia --project -O3 --check-bounds=yes -e 'include("moment_kinetics/test/fokker_planck_time_evolution_tests.jl")'

(i.e., testing moment_kinetics on 16 cores)

gives the following error message (set debug_level = 2, comment out quiet output command in test)

BoundsError on process 14:
BoundsError: attempt to access 7-element Vector{Int64} at index [8]
Stacktrace:
  [1] getindex
    @ ./essentials.jl:13 [inlined]
  [2] get_local_range(sub_block_rank::Int64, sub_block_size::Int64, dim_size::Int64)
    @ moment_kinetics.looping /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/looping.jl:193
  [3] #15
    @ ./array.jl:0 [inlined]
  [4] iterate
    @ ./generator.jl:47 [inlined]
  [5] collect_to!(dest::AbstractArray{T}, itr::Any, offs::Any, st::Any) where T
    @ Base ./array.jl:892 [inlined]
  [6] collect_to_with_first!(dest::Vector{UnitRange{Int64}}, v1::UnitRange{Int64}, itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int64}, Vector{Int64}, Base.Generator{NTuple{5, Symbol}, moment_kinetics.looping.var"#24#26"{@Kwargs{s::Int64, sn::Int64, r::Int64, z::Int64, vperp::Int64, vpa::Int64, vzeta::Int64, vr::Int64, vz::Int64}}}}}, moment_kinetics.looping.var"#15#17"}, st::Tuple{Int64, Int64, Int64})
    @ Base ./array.jl:870
[7] collect(itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int64}, Vector{Int64}, Base.Generator{NTuple{5, Symbol}, moment_kinetics.looping.var"#24#26"{@Kwargs{s::Int64, sn::Int64, r::Int64, z::Int64, vperp::Int64, vpa::Int64, vzeta::Int64, vr::Int64, vz::Int64}}}}}, moment_kinetics.looping.var"#15#17"})
    @ Base ./array.jl:844
  [8] get_ranges_from_split(block_rank::Int64, effective_block_size::Int64, split::Vector{Int64}, dim_sizes_list::Base.Generator{NTuple{5, Symbol}, moment_kinetics.looping.var"#24#26"{@Kwargs{s::Int64, sn::Int64, r::Int64, z::Int64, vperp::Int64, vpa::Int64, vzeta::Int64, vr::Int64, vz::Int64}}})
    @ moment_kinetics.looping /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/looping.jl:284
  [9] get_anyv_ranges(block_rank::Int64, split::Vector{Int64}, anyv_dims::Tuple{Symbol, Symbol, Symbol}, dim_sizes::@Kwargs{s::Int64, sn::Int64, r::Int64, z::Int64, vperp::Int64, vpa::Int64, vzeta::Int64, vr::Int64, vz::Int64})
    @ moment_kinetics.looping /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/looping.jl:386
 [10] setup_loop_ranges!(block_rank::Int64, block_size::Int64; dim_sizes::@Kwargs{s::Int64, sn::Int64, r::Int64, z::Int64, vperp::Int64, vpa::Int64, vzeta::Int64, vr::Int64, vz::Int64})
    @ moment_kinetics.looping /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/looping.jl:483
 [11] setup_loop_ranges!
    @ /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/looping.jl:437 [inlined]
 [12] setup_moment_kinetics(input_dict::Dict{Any, Any}; restart::Bool, restart_time_index::Int64, debug_loop_type::Nothing, debug_loop_parallel_dims::Nothing)
    @ moment_kinetics //excalibur/moment_kinetics_newfkpl/moment_kinetics/src/moment_kinetics.jl:234
 [13] setup_moment_kinetics
    @ /excalibur/moment_kinetics_newfkpl/moment_kinetics/src/moment_kinetics.jl:207 [inlined]
 [14] run_moment_kinetics(to::Nothing, input_dict::Dict{Any, Any}; restart::Bool, restart_time_index::Int64)
    @ moment_kinetics //excalibur/moment_kinetics_newfkpl/moment_kinetics/src/moment_kinetics.jl:121

This error appears now to be present for 2, 4, 8, 12 cores.