Closed giordano closed 7 years ago
@afniedermayer what do you think about this? See for example the test at https://github.com/giordano/Cuba.jl/pull/11/files#diff-fce720c43af3c52c862fd7451c7374b8R79 how a vectorization function would be called.
The first version you had might be faster, because it avoids type instability, whereas this version might be type unstable. (The type of integrand seems to depend on the value of nvec. I'm not entirely sure, I'll check and benchmark once I'm at my office computer again.)
Good point about possible type-instability, but doesn't seem to be the case to me:
julia> @code_warntype cuhre((x,f) -> f[1] = x[1])
Variables:
#self#::Cuba.#cuhre
integrand::##12#13
Body:
begin
return $(Expr(:invoke, MethodInstance for #cuhre#1(::Int64, ::Float64, ::Float64, ::Int64, ::Int64, ::Int64, ::Int64, ::String, ::Ptr{Void}, ::Cuba.#cuhre, ::##12#13, ::Int64, ::Int64), :(Cuba.#cuhre#1), :(Cuba.NVEC), :(Cuba.RELTOL), :(Cuba.ABSTOL), :(Cuba.FLAGS), :(Cuba.MINEVALS), :(Cuba.MAXEVALS), :(Cuba.KEY), :(Cuba.STATEFILE), :(Cuba.SPIN), :(#self#), :(integrand), 1, 1))::Cuba.Integral
end::Cuba.Integral
I also used @code_warntype
on dointegrate
directly, but the output is completely clean from instabilities:
julia> function foo{T}(integrand::T, ndim::Integer=1, ncomp::Integer=1;
nvec::Integer=Cuba.NVEC, reltol::Real=Cuba.RELTOL, abstol::Real=Cuba.ABSTOL,
flags::Integer=Cuba.FLAGS, minevals::Real=Cuba.MINEVALS,
maxevals::Real=Cuba.MAXEVALS, key::Integer=Cuba.KEY,
statefile::AbstractString=Cuba.STATEFILE, spin::Ptr{Void}=Cuba.SPIN)
# Cuhre requires "ndim" to be at least 2, even for an integral over a one
# dimensional domain. Instead, we don't prevent users from setting wrong
# "ndim" values like 0 or negative ones.
ndim == 1 && (ndim = 2)
return (Cuba.Cuhre(integrand, ndim, ncomp, Int64(nvec), Cdouble(reltol),
Cdouble(abstol), flags, trunc(Int64, minevals),
trunc(Int64, maxevals), key, String(statefile), spin))
end
foo (generic function with 3 methods)
julia> integrand = foo((x,f) -> f[1] = x[1] + cos(x[2]) - exp(x[3]), 3)
Cuba.Cuhre{##2#3}(#2, 3, 1, 1, 0.0001, 1.0e-12, 0, 0, 1000000, 0, "", Ptr{Void} @0x0000000000000000)
julia> @code_warntype Cuba.dointegrate(integrand)
Variables:
#self#::Cuba.#dointegrate
x::Cuba.Cuhre{##2#3}
integrand::Ptr{Void}
integral::Array{Float64,1}
error::Array{Float64,1}
prob::Array{Float64,1}
neval::Base.RefValue{Int64}
fail::Base.RefValue{Int32}
nregions::Base.RefValue{Int32}
#temp#@_10::Ptr{Int32}
#temp#@_11::Ptr{Int64}
#temp#@_12::Ptr{Int32}
Body:
begin
NewvarNode(:(integrand::Ptr{Void}))
NewvarNode(:(integral::Array{Float64,1}))
NewvarNode(:(error::Array{Float64,1}))
NewvarNode(:(prob::Array{Float64,1}))
NewvarNode(:(neval::Base.RefValue{Int64}))
NewvarNode(:(fail::Base.RefValue{Int32}))
NewvarNode(:(nregions::Base.RefValue{Int32}))
unless ((Core.getfield)(x::Cuba.Cuhre{##2#3}, :nvec)::Int64 === 1)::Bool goto 12
#= line 162 =#
integrand::Ptr{Void} = $(Expr(:foreigncall, :(:jl_function_ptr), Ptr{Void}, svec(Any, Any, Any), :(Cuba.generic_integrand!), 0, :(Cuba.Cint), 0, :((Core.tuple)(Ref{Int32}, Ptr{Float64}, Ref{Int32}, Ptr{Float64}, Ref{##2#3})::NTuple{5,DataType}), 0))
goto 15
12:
#= line 164 =#
integrand::Ptr{Void} = $(Expr(:foreigncall, :(:jl_function_ptr), Ptr{Void}, svec(Any, Any, Any), :(Cuba.generic_integrand!), 0, :(Cuba.Cint), 0, :((Core.tuple)(Ref{Int32}, Ptr{Float64}, Ref{Int32}, Ptr{Float64}, Ref{##2#3}, Ref{Int32})::NTuple{6,DataType}), 0))
15:
#= line 166 =#
integral::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
#= line 167 =#
error::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
#= line 168 =#
prob::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
#= line 169 =#
neval::Base.RefValue{Int64} = $(Expr(:new, Base.RefValue{Int64}, 0))
#= line 170 =#
fail::Base.RefValue{Int32} = $(Expr(:new, Base.RefValue{Int32}, :((Base.checked_trunc_sint)(Int32, 0)::Int32)))
#= line 171 =#
nregions::Base.RefValue{Int32} = $(Expr(:new, Base.RefValue{Int32}, :((Base.checked_trunc_sint)(Int32, 0)::Int32)))
#= line 172 =#
$(Expr(:inbounds, false))
# meta: location /home/mose/.julia/v0.7/Cuba/src/cuhre.jl dointegrate! 40
SSAValue(3) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :ndim)::Int64)::Int32
SSAValue(4) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :ncomp)::Int64)::Int32
SSAValue(6) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :nvec)::Int64
SSAValue(7) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :reltol)::Float64
SSAValue(8) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :abstol)::Float64
SSAValue(9) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :flags)::Int64)::Int32
SSAValue(10) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :minevals)::Int64
SSAValue(11) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :maxevals)::Int64
SSAValue(12) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :key)::Int64)::Int32
SSAValue(13) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :statefile)::String
SSAValue(14) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :spin)::Ptr{Void}
# meta: location pointer.jl unsafe_convert 38
SSAValue(25) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), SSAValue(13), 0))
SSAValue(24) = (Core.sizeof)(Base.Int)::Int64
# meta: pop location
# meta: location refpointer.jl unsafe_convert 57
SSAValue(23) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(nregions), 0))
#temp#@_12::Ptr{Int32} = (Base.bitcast)(Ptr{Int32}, SSAValue(23))
goto 51
#= line 59 =#
51:
# meta: pop location
# meta: location refpointer.jl unsafe_convert 57
SSAValue(22) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(neval), 0))
#temp#@_11::Ptr{Int64} = (Base.bitcast)(Ptr{Int64}, SSAValue(22))
goto 58
#= line 59 =#
58:
# meta: pop location
# meta: location refpointer.jl unsafe_convert 57
SSAValue(21) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(fail), 0))
#temp#@_10::Ptr{Int32} = (Base.bitcast)(Ptr{Int32}, SSAValue(21))
goto 65
#= line 59 =#
65:
# meta: pop location
# meta: pop location
$(Expr(:inbounds, :pop))
$(Expr(:foreigncall, (:llCuhre, "/home/mose/.julia/v0.7/Cuba/src/../deps/libcuba"), Float64, svec(Int32, Int32, Ptr{Void}, Any, Int64, Float64, Float64, Int32, Int64, Int64, Int32, Ptr{Int8}, Ptr{Void}, Ptr{Int32}, Ptr{Int64}, Ptr{Int32}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), SSAValue(3), 0, SSAValue(4), 0, :(integrand), 0, :($(QuoteNode(#2))), 0, SSAValue(6), 0, SSAValue(7), 0, SSAValue(8), 0, SSAValue(9), 0, SSAValue(10), 0, SSAValue(11), 0, SSAValue(12), 0, :((Base.bitcast)(Ptr{Int8}, (Base.bitcast)(Ptr{Void}, (Base.add_int)((Base.bitcast)(UInt64, SSAValue(25)), (Base.bitcast)(UInt64, SSAValue(24)))::UInt64))), SSAValue(13), SSAValue(14), 0, :(#temp#@_12), :(nregions), :(#temp#@_11), :(neval), :(#temp#@_10), :(fail), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(integral), 0))), :(integral), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(error), 0))), :(error), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(prob), 0))), :(prob)))
#= line 173 =#
return $(Expr(:new, :(Cuba.Integral), :(integral), :(error), :(prob), :((Core.getfield)(neval, :x)::Int64), :((Core.getfield)(fail, :x)::Int32), :((Core.getfield)(nregions, :x)::Int32)))
end::Cuba.Integral
Anyway, I'll wait for your analysis, I have no rush ;-) Thanks!
I also checked, you're right, there is no code instability, because both integrand_ptr
and integrand_ptr_nvec
return type Ptr{Void}
. Both implementations seem to run at the same speed.
Having nvec
is great, because now one can run parallel code. A simple benchmark comparison I made (on my laptop with two physical cores in a VM, I would expect a much larger speedup on a workstation with 12 cores) I got a speedup of a factor 1.3 from multithreading:
julia> using Cuba
julia> using BenchmarkTools
cons
julia> const ndim = 10
10
julia> function fun_vec(x,f)
f[1,:] = 1.0
for j=1:size(x,2)
for i=1:ndim
f[1, j] *= cos(x[i, j])
end
end
end
fun_vec (generic function with 1 method)
julia> @benchmark divonne(fun_vec, ndim, nvec=1000)
BenchmarkTools.Trial:
memory estimate: 720.41 KiB
allocs estimate: 15367
--------------
minimum time: 17.774 ms (0.00% GC)
median time: 17.967 ms (0.00% GC)
mean time: 18.175 ms (0.23% GC)
maximum time: 25.778 ms (0.00% GC)
--------------
samples: 275
evals/sample: 1
julia> function fun_par(x,f)
f[1,:] = 1.0
Threads.@threads for j=1:size(x,2)
for i=1:ndim
f[1, j] *= cos(x[i, j])
end
end
end
fun_par (generic function with 1 method)
julia> @benchmark divonne(fun_par, ndim, nvec=1000)
BenchmarkTools.Trial:
memory estimate: 864.22 KiB
allocs estimate: 18435
--------------
minimum time: 13.515 ms (0.00% GC)
median time: 13.767 ms (0.00% GC)
mean time: 15.331 ms (0.42% GC)
maximum time: 50.003 ms (0.00% GC)
--------------
samples: 326
evals/sample: 1
I added an example to the documentation, based on the function you showed above ;-)
You can view the example in the documentation at https://cubajl.readthedocs.io/en/vectorization/#vectorized-function
Thanks!
That's great, thank! :-)
nvec
was not actually used ingeneric_integrand!
. Fix #10.