This PR has the auto unrolling macros which can now be used for any function. It also adds the field unroll to SampledBoxMap.
macro generate_unrolled(f, N)
quote
if hasmethod($(esc(f)), (AbstractVector, AbstractVector))
function unrolled_function(u)
@assert (n = length(u)) % $N == 0
du = similar(u)
@inbounds for i in 0 : $N : n - $N
$(esc(f))(
@view( du[i+1:i+$N] ),
u[i+1:i+$N]
)
end
return du
end
else
function unrolled_function(u)
@assert (n = length(u)) % $N == 0
du = similar(u)
@inbounds for i in 0 : $N : n - $N
du[i+1:i+$N] .= $(esc(f))(u[i+1:i+$N])
end
return du
end
end
unrolled_function
end
end
macro generate_simd(f, N)
quote
@inbounds @muladd function simd_function(u::Vector{SVector{$N,T}}; simd=Int(pick_vector_width(T))) where T
n = length(u)
m, r = divrem(n, simd)
@assert r == 0
u_new = reshape(permutedims(reshape(reinterpret(T, u), ($N, n))), ($N * n,))
idx = VecRange{simd}(1)
u_tmp = Vector{Vec{simd,T}}(
reshape(
[u_new[idx + simd * i + n * j]
for j in 0 : $N - 1, i in 0 : m - 1],
($N * m,)
)
)
u_tmp .= $(esc(f))(u_tmp)
for i in 0 : m - 1, j in 0 : $N - 1
u_new[idx + simd * i + n * j] = u_tmp[$N * i + j + 1]
end
u .= reinterpret(SVector{$N,T}, reshape(permutedims(reshape(u_new, (n, $N))), ($N * n,)))
return u
end
simd_function
end
end
Current challenges:
In order to see a good performance gain, one needs to supply @generate_unrolled with an in situ function
For the rk4 integrator: one wants to use @generate_unrolled on the vector field, and then @generate_simd on the integrator. This means we can't just automatically unroll every SampledBoxMap call the same way.
To get around problem 2, I overloaded rk4_flow_map to accept the signature (Function, Int) which autogenerates the correct unrolled function.
function rk4_flow_map(f, N::Int; τ=0.01, steps=20)
unrolled_f = @generate_unrolled f N
rk4_f(u) = rk4_flow_map(unrolled_f, u; τ=τ, steps=steps)
simd_f = @generate_simd rk4_f N
return simd_f
end
But it means that there have to be if/else statements within the SampledBoxMap constructor to check if the function has already been unrolled. A rather inelegant solution to this is
struct SampledBoxMap{N,T,B} <: BoxMap
map::Function
domain::Box{N,T}
domain_points::Function
image_points::Function
unroll::Val{B}
function SampledBoxMap(map, domain, domain_points, image_points, unroll)
new(map, domain, domain_points, image_points, unroll)
end
function SampledBoxMap(map, domain::Box{N,T}, domain_points, image_points, ::Val{:cpu}) where {N,T}
if hasmethod(map, (AbstractVector,), (:simd,))
return new(map, domain, domain_points, image_points, Val{:cpu}())
else
unrolled_map = @generate_unrolled map N
simd_map = @generate_simd unrolled_map N
return new(simd_map, domain, domain_points, image_points, Val{:cpu}())
end
end
end
This PR has the auto unrolling macros which can now be used for any function. It also adds the field
unroll
toSampledBoxMap
.Current challenges:
@generate_unrolled
with an in situ function@generate_unrolled
on the vector field, and then@generate_simd
on the integrator. This means we can't just automatically unroll everySampledBoxMap
call the same way.To get around problem 2, I overloaded
rk4_flow_map
to accept the signature(Function, Int)
which autogenerates the correct unrolled function.But it means that there have to be if/else statements within the
SampledBoxMap
constructor to check if the function has already been unrolled. A rather inelegant solution to this is