Add support for vectorization

giordano commented 7 years ago

nvec was not actually used in generic_integrand!. Fix #10.

giordano commented 7 years ago

@afniedermayer what do you think about this? See for example the test at https://github.com/giordano/Cuba.jl/pull/11/files#diff-fce720c43af3c52c862fd7451c7374b8R79 how a vectorization function would be called.

afniedermayer commented 7 years ago

The first version you had might be faster, because it avoids type instability, whereas this version might be type unstable. (The type of integrand seems to depend on the value of nvec. I'm not entirely sure, I'll check and benchmark once I'm at my office computer again.)

giordano commented 7 years ago

Good point about possible type-instability, but doesn't seem to be the case to me:

julia> @code_warntype cuhre((x,f) -> f[1] = x[1])
Variables:
  #self#::Cuba.#cuhre
  integrand::##12#13

Body:
  begin
      return $(Expr(:invoke, MethodInstance for #cuhre#1(::Int64, ::Float64, ::Float64, ::Int64, ::Int64, ::Int64, ::Int64, ::String, ::Ptr{Void}, ::Cuba.#cuhre, ::##12#13, ::Int64, ::Int64), :(Cuba.#cuhre#1), :(Cuba.NVEC), :(Cuba.RELTOL), :(Cuba.ABSTOL), :(Cuba.FLAGS), :(Cuba.MINEVALS), :(Cuba.MAXEVALS), :(Cuba.KEY), :(Cuba.STATEFILE), :(Cuba.SPIN), :(#self#), :(integrand), 1, 1))::Cuba.Integral
  end::Cuba.Integral

I also used @code_warntype on dointegrate directly, but the output is completely clean from instabilities:

julia> function foo{T}(integrand::T, ndim::Integer=1, ncomp::Integer=1;
                         nvec::Integer=Cuba.NVEC, reltol::Real=Cuba.RELTOL, abstol::Real=Cuba.ABSTOL,
                         flags::Integer=Cuba.FLAGS, minevals::Real=Cuba.MINEVALS,
                         maxevals::Real=Cuba.MAXEVALS, key::Integer=Cuba.KEY,
                         statefile::AbstractString=Cuba.STATEFILE, spin::Ptr{Void}=Cuba.SPIN)
           # Cuhre requires "ndim" to be at least 2, even for an integral over a one
           # dimensional domain.  Instead, we don't prevent users from setting wrong
           # "ndim" values like 0 or negative ones.
           ndim == 1 && (ndim = 2)
           return (Cuba.Cuhre(integrand, ndim, ncomp, Int64(nvec), Cdouble(reltol),
                                    Cdouble(abstol), flags, trunc(Int64, minevals),
                                    trunc(Int64, maxevals), key, String(statefile), spin))
       end
foo (generic function with 3 methods)

julia> integrand = foo((x,f) -> f[1] = x[1] + cos(x[2]) - exp(x[3]), 3)
Cuba.Cuhre{##2#3}(#2, 3, 1, 1, 0.0001, 1.0e-12, 0, 0, 1000000, 0, "", Ptr{Void} @0x0000000000000000)

julia> @code_warntype Cuba.dointegrate(integrand)
Variables:
  #self#::Cuba.#dointegrate
  x::Cuba.Cuhre{##2#3}
  integrand::Ptr{Void}
  integral::Array{Float64,1}
  error::Array{Float64,1}
  prob::Array{Float64,1}
  neval::Base.RefValue{Int64}
  fail::Base.RefValue{Int32}
  nregions::Base.RefValue{Int32}
  #temp#@_10::Ptr{Int32}
  #temp#@_11::Ptr{Int64}
  #temp#@_12::Ptr{Int32}

Body:
  begin
      NewvarNode(:(integrand::Ptr{Void}))
      NewvarNode(:(integral::Array{Float64,1}))
      NewvarNode(:(error::Array{Float64,1}))
      NewvarNode(:(prob::Array{Float64,1}))
      NewvarNode(:(neval::Base.RefValue{Int64}))
      NewvarNode(:(fail::Base.RefValue{Int32}))
      NewvarNode(:(nregions::Base.RefValue{Int32}))
      unless ((Core.getfield)(x::Cuba.Cuhre{##2#3}, :nvec)::Int64 === 1)::Bool goto 12
      #= line 162 =#
      integrand::Ptr{Void} = $(Expr(:foreigncall, :(:jl_function_ptr), Ptr{Void}, svec(Any, Any, Any), :(Cuba.generic_integrand!), 0, :(Cuba.Cint), 0, :((Core.tuple)(Ref{Int32}, Ptr{Float64}, Ref{Int32}, Ptr{Float64}, Ref{##2#3})::NTuple{5,DataType}), 0))
      goto 15
      12: 
      #= line 164 =#
      integrand::Ptr{Void} = $(Expr(:foreigncall, :(:jl_function_ptr), Ptr{Void}, svec(Any, Any, Any), :(Cuba.generic_integrand!), 0, :(Cuba.Cint), 0, :((Core.tuple)(Ref{Int32}, Ptr{Float64}, Ref{Int32}, Ptr{Float64}, Ref{##2#3}, Ref{Int32})::NTuple{6,DataType}), 0))
      15: 
      #= line 166 =#
      integral::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
      #= line 167 =#
      error::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
      #= line 168 =#
      prob::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :((Core.getfield)(x, :ncomp)::Int64), 0))
      #= line 169 =#
      neval::Base.RefValue{Int64} = $(Expr(:new, Base.RefValue{Int64}, 0))
      #= line 170 =#
      fail::Base.RefValue{Int32} = $(Expr(:new, Base.RefValue{Int32}, :((Base.checked_trunc_sint)(Int32, 0)::Int32)))
      #= line 171 =#
      nregions::Base.RefValue{Int32} = $(Expr(:new, Base.RefValue{Int32}, :((Base.checked_trunc_sint)(Int32, 0)::Int32)))
      #= line 172 =#
      $(Expr(:inbounds, false))
      # meta: location /home/mose/.julia/v0.7/Cuba/src/cuhre.jl dointegrate! 40
      SSAValue(3) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :ndim)::Int64)::Int32
      SSAValue(4) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :ncomp)::Int64)::Int32
      SSAValue(6) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :nvec)::Int64
      SSAValue(7) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :reltol)::Float64
      SSAValue(8) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :abstol)::Float64
      SSAValue(9) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :flags)::Int64)::Int32
      SSAValue(10) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :minevals)::Int64
      SSAValue(11) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :maxevals)::Int64
      SSAValue(12) = (Base.checked_trunc_sint)(Int32, (Core.getfield)(x::Cuba.Cuhre{##2#3}, :key)::Int64)::Int32
      SSAValue(13) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :statefile)::String
      SSAValue(14) = (Core.getfield)(x::Cuba.Cuhre{##2#3}, :spin)::Ptr{Void}
      # meta: location pointer.jl unsafe_convert 38
      SSAValue(25) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), SSAValue(13), 0))
      SSAValue(24) = (Core.sizeof)(Base.Int)::Int64
      # meta: pop location
      # meta: location refpointer.jl unsafe_convert 57
      SSAValue(23) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(nregions), 0))
      #temp#@_12::Ptr{Int32} = (Base.bitcast)(Ptr{Int32}, SSAValue(23))
      goto 51
      #= line 59 =#
      51: 
      # meta: pop location
      # meta: location refpointer.jl unsafe_convert 57
      SSAValue(22) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(neval), 0))
      #temp#@_11::Ptr{Int64} = (Base.bitcast)(Ptr{Int64}, SSAValue(22))
      goto 58
      #= line 59 =#
      58: 
      # meta: pop location
      # meta: location refpointer.jl unsafe_convert 57
      SSAValue(21) = $(Expr(:foreigncall, :(:jl_value_ptr), Ptr{Void}, svec(Any), :(fail), 0))
      #temp#@_10::Ptr{Int32} = (Base.bitcast)(Ptr{Int32}, SSAValue(21))
      goto 65
      #= line 59 =#
      65: 
      # meta: pop location
      # meta: pop location
      $(Expr(:inbounds, :pop))
      $(Expr(:foreigncall, (:llCuhre, "/home/mose/.julia/v0.7/Cuba/src/../deps/libcuba"), Float64, svec(Int32, Int32, Ptr{Void}, Any, Int64, Float64, Float64, Int32, Int64, Int64, Int32, Ptr{Int8}, Ptr{Void}, Ptr{Int32}, Ptr{Int64}, Ptr{Int32}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), SSAValue(3), 0, SSAValue(4), 0, :(integrand), 0, :($(QuoteNode(#2))), 0, SSAValue(6), 0, SSAValue(7), 0, SSAValue(8), 0, SSAValue(9), 0, SSAValue(10), 0, SSAValue(11), 0, SSAValue(12), 0, :((Base.bitcast)(Ptr{Int8}, (Base.bitcast)(Ptr{Void}, (Base.add_int)((Base.bitcast)(UInt64, SSAValue(25)), (Base.bitcast)(UInt64, SSAValue(24)))::UInt64))), SSAValue(13), SSAValue(14), 0, :(#temp#@_12), :(nregions), :(#temp#@_11), :(neval), :(#temp#@_10), :(fail), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(integral), 0))), :(integral), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(error), 0))), :(error), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(prob), 0))), :(prob)))
      #= line 173 =#
      return $(Expr(:new, :(Cuba.Integral), :(integral), :(error), :(prob), :((Core.getfield)(neval, :x)::Int64), :((Core.getfield)(fail, :x)::Int32), :((Core.getfield)(nregions, :x)::Int32)))
  end::Cuba.Integral

Anyway, I'll wait for your analysis, I have no rush ;-) Thanks!

afniedermayer commented 7 years ago

I also checked, you're right, there is no code instability, because both integrand_ptr and integrand_ptr_nvec return type Ptr{Void}. Both implementations seem to run at the same speed.

Having nvec is great, because now one can run parallel code. A simple benchmark comparison I made (on my laptop with two physical cores in a VM, I would expect a much larger speedup on a workstation with 12 cores) I got a speedup of a factor 1.3 from multithreading:

julia> using Cuba

julia> using BenchmarkTools
cons
julia> const ndim = 10
10

julia> function fun_vec(x,f)
         f[1,:] = 1.0
         for j=1:size(x,2)
           for i=1:ndim
             f[1, j] *= cos(x[i, j])
           end
         end
       end
fun_vec (generic function with 1 method)

julia> @benchmark divonne(fun_vec, ndim, nvec=1000)
BenchmarkTools.Trial: 
  memory estimate:  720.41 KiB
  allocs estimate:  15367
  --------------
  minimum time:     17.774 ms (0.00% GC)
  median time:      17.967 ms (0.00% GC)
  mean time:        18.175 ms (0.23% GC)
  maximum time:     25.778 ms (0.00% GC)
  --------------
  samples:          275
  evals/sample:     1

julia> function fun_par(x,f)
         f[1,:] = 1.0
         Threads.@threads for j=1:size(x,2)
           for i=1:ndim
             f[1, j] *= cos(x[i, j])
           end
         end
       end
fun_par (generic function with 1 method)

julia> @benchmark divonne(fun_par, ndim, nvec=1000)
BenchmarkTools.Trial: 
  memory estimate:  864.22 KiB
  allocs estimate:  18435
  --------------
  minimum time:     13.515 ms (0.00% GC)
  median time:      13.767 ms (0.00% GC)
  mean time:        15.331 ms (0.42% GC)
  maximum time:     50.003 ms (0.00% GC)
  --------------
  samples:          326
  evals/sample:     1

giordano commented 7 years ago

I added an example to the documentation, based on the function you showed above ;-)

You can view the example in the documentation at https://cubajl.readthedocs.io/en/vectorization/#vectorized-function

Thanks!

afniedermayer commented 7 years ago

That's great, thank! :-)

giordano / Cuba.jl

Add support for vectorization #11