function optimized_time_step(u::Matrix{T}) where {T}
n, m = size(u)
error = T(0.0) # not an optimization, but makes it work for all element types
@inbounds for j = 2:m-1
for i = 2:n-1 # @simd ivdep, but why is it writing into u[j,i]? That's not the time step.
temp = u[j, i];
u[j, i] = ( T(4) *(u[j-1, i] + u[j+1, i] +
u[j, i-1] + u[j, i+1]) +
u[j-1, i-1] + u[j+1, i+1] +
u[j+1, i-1] + u[j-1, i+1]) / T(20)
difference = u[j, i] - temp
error += difference*difference;
end
end
return sqrt(error)
end
is probably a better code, though this is a case where most people would do:
using LoopVectorization
function optimized_time_step(u::Matrix{T}) where {T}
n, m = size(u)
error = T(0.0) # not an optimization, but makes it work for all element types
@turbo for j = 2:m-1 # @tturbo for threading
for i = 2:n-1
temp = u[j, i];
u[j, i] = ( T(4) *(u[j-1, i] + u[j+1, i] +
u[j, i-1] + u[j, i+1]) +
u[j-1, i-1] + u[j+1, i+1] +
u[j+1, i-1] + u[j-1, i+1]) / T(20)
difference = u[j, i] - temp
error += difference*difference;
end
end
return sqrt(error)
end
This code is row major:
https://github.com/JulesKouatchou/basic_language_comparison/blob/master/Julia/test_laplace_jacobi_4.jl#L40-L55
is probably a better code, though this is a case where most people would do: