after adding and using 4 processors, ClusterManagers fails when it s time to remove the procs. Below a MWE
using ClusterManagers
using Distributed
N_JOBS = 4
addprocs_sge(N_JOBS; queue="single.q")
@everywhere using Dates
@everywhere function test(id, info)
sleep(10)
# println(second(now()))
return id, second(now()), info
end
futures = []
#works
for i in workers()
global futures
## fetch them inline
println("start: ",fetch( @spawnat i getpid() ), " ", second(now()) )
## fetch them later
result = @spawnat i test(getpid(), "pull results" )
push!(futures, result)
end
# works
for i in futures
data = fetch(i)
println("end: ", data...)
end
rmprocs(workers())
The error is the following:
ERROR: LoadError: MethodError: no method matching get(::Base.Process)
Closest candidates are:
get(::IO, ::Any, ::Any) at show.jl:339
get(::Base.EnvDict, ::AbstractString, ::Any) at env.jl:80
get(::Base.TTY, ::Symbol, ::Any) at ttyhascolor.jl:27
...
Stacktrace:
[1] kill(::SGEManager, ::Int64, ::WorkerConfig) at /home/alequa/.julia/packages/ClusterManagers/Mq0H0/src/qsub.jl:119
[2] _rmprocs(::Array{Int64,1}, ::Int64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1035
[3] rmprocs(::Array{Int64,1}; waitfor::Int64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1018
[4] rmprocs(::Array{Int64,1}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1010
[5] top-level scope at /home/alequa/Documents/Research/phd_project/simulations/tripod_network/cluster/sge_test.jl:29
[6] include(::Function, ::Module, ::String) at ./Base.jl:380
[7] include(::Module, ::String) at ./Base.jl:368
[8] exec_options(::Base.JLOptions) at ./client.jl:296
[9] _start() at ./client.jl:506
in expression starting at /home/alequa/Documents/Research/phd_project/simulations/tripod_network/cluster/sge_test.jl:29
┌ Warning: Forcibly interrupting busy workers
│ exception =
│ MethodError: no method matching get(::Base.Process)
│ Closest candidates are:
│ get(::IO, ::Any, ::Any) at show.jl:339
│ get(::Base.EnvDict, ::AbstractString, ::Any) at env.jl:80
│ get(::Base.TTY, ::Symbol, ::Any) at ttyhascolor.jl:27
│ ...
└ @ Distributed /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1234
┌ Error: Unable to terminate all workers
│ exception =
│ MethodError: no method matching get(::Base.Process)
│ Closest candidates are:
│ get(::IO, ::Any, ::Any) at show.jl:339
│ get(::Base.EnvDict, ::AbstractString, ::Any) at env.jl:80
│ get(::Base.TTY, ::Symbol, ::Any) at ttyhascolor.jl:27
│ ...
│ Stacktrace:
│ [1] kill(::SGEManager, ::Int64, ::WorkerConfig) at /home/alequa/.julia/packages/ClusterManagers/Mq0H0/src/qsub.jl:119
│ [2] _rmprocs(::Array{Int64,1}, ::Float64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1035
│ [3] rmprocs(::Array{Int64,1}; waitfor::Float64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1018
│ [4] terminate_all_workers() at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1238
│ [5] _atexit() at ./initdefs.jl:316
│ [6] exit at ./initdefs.jl:28 [inlined]
│ [7] exec_options(::Base.JLOptions) at ./client.jl:300
│ [8] _start() at ./client.jl:506
└ @ Distributed /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1240
Thanks for your help
PS.
It gives a similar error if I end the program without running rmprpocs:
┌ Warning: Forcibly interrupting busy workers
│ exception =
│ MethodError: no method matching get(::Base.Process)
│ Closest candidates are:
│ get(::IO, ::Any, ::Any) at show.jl:339
│ get(::Base.EnvDict, ::AbstractString, ::Any) at env.jl:80
│ get(::Base.TTY, ::Symbol, ::Any) at ttyhascolor.jl:27
│ ...
└ @ Distributed /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1234
┌ Error: Unable to terminate all workers
│ exception =
│ MethodError: no method matching get(::Base.Process)
│ Closest candidates are:
│ get(::IO, ::Any, ::Any) at show.jl:339
│ get(::Base.EnvDict, ::AbstractString, ::Any) at env.jl:80
│ get(::Base.TTY, ::Symbol, ::Any) at ttyhascolor.jl:27
│ ...
│ Stacktrace:
│ [1] kill(::SGEManager, ::Int64, ::WorkerConfig) at /home/alequa/.julia/packages/ClusterManagers/Mq0H0/src/qsub.jl:119
│ [2] _rmprocs(::Array{Int64,1}, ::Float64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1035
│ [3] rmprocs(::Array{Int64,1}; waitfor::Float64) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1018
│ [4] terminate_all_workers() at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1238
│ [5] _atexit() at ./initdefs.jl:316
└ @ Distributed /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Distributed/src/cluster.jl:1240
Hi,
after adding and using 4 processors, ClusterManagers fails when it s time to remove the procs. Below a MWE
The error is the following:
Thanks for your help
PS.
It gives a similar error if I end the program without running rmprpocs: