exanauts / CUDSS.jl

MIT License
18 stars 1 forks source link

Unable to reuse the analysis if we only store one triangle for symmetric factorizations #10

Closed amontoison closed 6 months ago

amontoison commented 9 months ago
using CUDA, CUDA.CUSPARSE
using CUDSS
using LinearAlgebra
using SparseArrays
using Test

println(CUDSS.CUDSS_INSTALLATION)

function cudss_generic()
  n = 100
  p = 5
  @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
    R = real(T)
    @testset "Unsymmetric -- Non-Hermitian" begin
      A_cpu = sprand(T, n, n, 0.02) + I
      b_cpu = rand(T, n)

      A_gpu = CuSparseMatrixCSR(A_cpu)
      b_gpu = CuVector(b_cpu)

      @testset "ldiv!" begin
        x_cpu = zeros(T, n)
        x_gpu = CuVector(x_cpu)

        solver = lu(A_gpu)
        ldiv!(x_gpu, solver, b_gpu)
        r_gpu = b_gpu - A_gpu * x_gpu
        @test norm(r_gpu) ≤ √eps(R)

        A_gpu2 = rand(T) * A_gpu
        lu!(solver, A_gpu2)
        x_gpu .= b_gpu
        ldiv!(solver, x_gpu)
        r_gpu2 = b_gpu - A_gpu2 * x_gpu
        @test norm(r_gpu2) ≤ √eps(R)
      end

      @testset "\\" begin
        solver = lu(A_gpu)
        x_gpu = solver \ b_gpu
        r_gpu = b_gpu - A_gpu * x_gpu
        @test norm(r_gpu) ≤ √eps(R)

        A_gpu2 = rand(T) * A_gpu
        lu!(solver, A_gpu2)
        x_gpu = solver \ b_gpu
        r_gpu2 = b_gpu - A_gpu2 * x_gpu
        @test norm(r_gpu2) ≤ √eps(R)
      end
    end

    @testset "Symmetric -- Hermitian" begin
          @testset "view = $view" for view in ('F', 'L', 'U')
        A_cpu = sprand(T, n, n, 0.01) + I
        A_cpu = A_cpu + A_cpu'
        B_cpu = rand(T, n, p)

        (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
        (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
        (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
        B_gpu = CuMatrix(B_cpu)

        @testset "ldiv!" begin
          X_cpu = zeros(T, n, p)
          X_gpu = CuMatrix(X_cpu)

          solver = ldlt(A_gpu; view)
          ldiv!(X_gpu, solver, B_gpu)
          R_gpu = B_gpu - A_gpu * X_gpu
          @test norm(R_gpu) ≤ √eps(R)

          c = rand(R)
          A_cpu2 = c * A_cpu
          A_gpu2 = c * A_gpu

          ldlt!(solver, A_gpu2)
          X_gpu .= B_gpu
          ldiv!(solver, X_gpu)
          R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
          @test norm(R_gpu2) ≤ √eps(R)
        end

        @testset "\\" begin
          solver = ldlt(A_gpu; view)
          X_gpu = solver \ B_gpu
          R_gpu = B_gpu - A_gpu * X_gpu
          @test norm(R_gpu) ≤ √eps(R)

          c = rand(R)
          A_cpu2 = c * A_cpu
          A_gpu2 = c * A_gpu

          ldlt!(solver, A_gpu2)
          X_gpu = solver \ B_gpu
          R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
          @test norm(R_gpu2) ≤ √eps(R)
        end
      end
   end

      @testset "SPD -- HPD" begin
    @testset "view = $view" for view in ('F', 'L', 'U')
        A_cpu = sprand(T, n, n, 0.01)
        A_cpu = A_cpu * A_cpu' + I
        B_cpu = rand(T, n, p)

        (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
        (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
        (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
        B_gpu = CuMatrix(B_cpu)

        @testset "ldiv!" begin
          X_cpu = zeros(T, n, p)
          X_gpu = CuMatrix(X_cpu)

          solver = cholesky(A_gpu; view)
          ldiv!(X_gpu, solver, B_gpu)
          R_gpu = B_gpu - A_gpu * X_gpu
          @test norm(R_gpu) ≤ √eps(R)

          c = rand(R)
          A_cpu2 = c * A_cpu
          A_gpu2 = c * A_gpu

          cholesky!(solver, A_gpu2)
          X_gpu .= B_gpu
          ldiv!(solver, X_gpu)
          R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
          @test norm(R_gpu2) ≤ √eps(R)
        end

        @testset "\\" begin
          solver = cholesky(A_gpu; view)
          X_gpu = solver \ B_gpu
          R_gpu = B_gpu - A_gpu * X_gpu
          @test norm(R_gpu) ≤ √eps(R)

          c = rand(R)
          A_cpu2 = c * A_cpu
          A_gpu2 = c * A_gpu

          cholesky!(solver, A_gpu2)
          X_gpu = solver \ B_gpu
          R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu
          @test norm(R_gpu2) ≤ √eps(R)
        end
      end
    end
  end
end

cudss_generic()