allocations... - Githubissues

j-fu commented 4 years ago

Hi, in your diagonal preconditioner ldiv method, you solve the preconditioning problem as follows:

@inline function ldiv!(y, C::DiagonalPreconditioner, b)
    @inbounds @simd for i in 1:length(C.D)
        y[i,:] .= view(b, i, :) ./ C.D[i]
    end
    return y
end

Though this way of writing the code is very universal, it creates tons of allocations because for every view, an view object is created and an allocation is perfomed. Consequently, this implementation does not scale for large problems. You might benchmark the allocations in your inner loop with the help of @time.

Jürgen

GregVernon commented 4 years ago

I made a similar issue over on IterativeSolvers, but it looks like its the same issue as yours @j-fu, so I closed it there and have pasted the MWE as an excerpt to support your issue.

import BenchmarkTools
import IterativeSolvers
import IncompleteLU
import Preconditioners
import MatrixDepot

# Create a Wathen matrix
NX = 100
NY = 100
A = MatrixDepot.wathen(NX,NY)
b = rand(size(A,1))

# Precompute Preconditioners
PL_ilu = IncompleteLU.ilu(A,τ=1e-2)
PL_jac = Preconditioners.DiagonalPreconditioner(A)  # <===== This one
PL_amg_1 = Preconditioners.AMGPreconditioner{Preconditioners.RugeStuben}(A)
PL_amg_2 = Preconditioners.AMGPreconditioner{Preconditioners.SmoothedAggregation}(A)

# Benchmark the solvers
println("Direct Solver (\\)")
BenchmarkTools.@btime $A\$b

println("Iterative Solver (cg)")
BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5);

println("Iterative Solver (pcg-ilu)")
BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_ilu);

println("Iterative Solver (pcg-jac)")
BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_jac); # <===== This one

println("Iterative Solver (pcg-amg:rs)")
BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_amg_1);

println("Iterative Solver (pcg-amg:sa)")
BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_amg_2);

Results in

Direct Solver (\)
          149.337 ms (53 allocations: 51.89 MiB)
Iterative Solver (cg)
          212.769 ms (19 allocations: 1.16 MiB)
Iterative Solver (pcg-ilu)
          5.610 ms (20 allocations: 1.16 MiB)
Iterative Solver (pcg-jac)
          149.278 ms (4560170 allocations: 209.91 MiB) # <===== This one
Iterative Solver (pcg-amg:rs)
          65.514 ms (29 allocations: 1.16 MiB)
Iterative Solver (pcg-amg:sa)
          50.865 ms (29 allocations: 1.16 MiB)

j-fu commented 4 years ago

Hi, thanks for the MWE! I needed a Jacobi preconditioner for teaching, so I rolled my own, it sits in ExtendableSparse.jl (the preconditioning stuff there is still alpha). I updated the MWE for this:

import BenchmarkTools
import IterativeSolvers
import IncompleteLU
import ExtendableSparse
import Preconditioners
import MatrixDepot

function mwe()

    # Create a Wathen matrix
    NX = 100
    NY = 100
    A = MatrixDepot.wathen(NX,NY)
    b = rand(size(A,1))

    # Precompute Preconditioners
    PL_ilu = IncompleteLU.ilu(A,τ=1e-2)
    PL_jac = Preconditioners.DiagonalPreconditioner(A)  # <===== This one
    PL_amg_1 = Preconditioners.AMGPreconditioner{Preconditioners.RugeStuben}(A)
    PL_amg_2 = Preconditioners.AMGPreconditioner{Preconditioners.SmoothedAggregation}(A)

    PL_xilu0=ExtendableSparse.ILU0Preconditioner(A)
    PL_xjac=ExtendableSparse.JacobiPreconditioner(A)

    # Benchmark the solvers
    println("Direct Solver (\\):")
    BenchmarkTools.@btime $A\$b

    println("Iterative Solver (cg):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,log=true)
    @show history

    println("Iterative Solver (pcg-ilu):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_ilu,log=true);
    @show history

    println("Iterative Solver (pcg-jac):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_jac,log=true); # <===== This one
    @show history

    println("Iterative Solver (pcg-amg:rs):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_amg_1,log=true);
    @show history

    println("Iterative Solver (pcg-amg:sa):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_amg_2,log=true);
    @show history

    println("Iterative Solver (pcg-xjac):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_xjac,log=true); 
    @show history

    println("Iterative Solver (pcg-xilu0):")
    x,history=BenchmarkTools.@btime IterativeSolvers.cg($A,$b,tol=1e-5,Pl=$PL_xilu0,log=true); 
    @show history

end
mwe();

The result is here:

Direct Solver (\):
  36.659 ms (53 allocations: 51.89 MiB)
Iterative Solver (cg):
  79.671 ms (774 allocations: 1.40 MiB)
history = Converged after 250 iterations.
Iterative Solver (pcg-ilu):
  3.935 ms (31 allocations: 1.39 MiB)
history = Converged after 2 iterations.
Iterative Solver (pcg-jac):
  99.862 ms (4560250 allocations: 210.14 MiB)
history = Converged after 25 iterations.
Iterative Solver (pcg-amg:rs):
  30.875 ms (61 allocations: 1.39 MiB)
history = Converged after 9 iterations.
Iterative Solver (pcg-amg:sa):
  25.883 ms (61 allocations: 1.39 MiB)
history = Converged after 9 iterations.
Iterative Solver (pcg-xjac):
  8.492 ms (100 allocations: 1.39 MiB)
history = Converged after 25 iterations.
Iterative Solver (pcg-xilu0):
  10.573 ms (64 allocations: 1.39 MiB)
history = Converged after 13 iterations.

We see that both variants of Jacobi use 25 iterations, and the allocations make the Jacobi in Preconditioners.jl using 10x more time.

mohamed82008 commented 4 years ago

Thanks for reporting this and sorry for the late response. Could you please make a PR changing the broadcasting to a loop and benchmark?

j-fu commented 4 years ago

Hi, ok, I can get my hands on it ~~tuesday night or wednesday~~ friday.

JuliaLinearAlgebra / Preconditioners.jl

allocations... #12