Closed amontoison closed 6 months ago
using CUDA, CUDA.CUSPARSE using CUDSS using LinearAlgebra using SparseArrays using Test println(CUDSS.CUDSS_INSTALLATION) function cudss_generic() n = 100 p = 5 @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64) R = real(T) @testset "Unsymmetric -- Non-Hermitian" begin A_cpu = sprand(T, n, n, 0.02) + I b_cpu = rand(T, n) A_gpu = CuSparseMatrixCSR(A_cpu) b_gpu = CuVector(b_cpu) @testset "ldiv!" begin x_cpu = zeros(T, n) x_gpu = CuVector(x_cpu) solver = lu(A_gpu) ldiv!(x_gpu, solver, b_gpu) r_gpu = b_gpu - A_gpu * x_gpu @test norm(r_gpu) ≤ √eps(R) A_gpu2 = rand(T) * A_gpu lu!(solver, A_gpu2) x_gpu .= b_gpu ldiv!(solver, x_gpu) r_gpu2 = b_gpu - A_gpu2 * x_gpu @test norm(r_gpu2) ≤ √eps(R) end @testset "\\" begin solver = lu(A_gpu) x_gpu = solver \ b_gpu r_gpu = b_gpu - A_gpu * x_gpu @test norm(r_gpu) ≤ √eps(R) A_gpu2 = rand(T) * A_gpu lu!(solver, A_gpu2) x_gpu = solver \ b_gpu r_gpu2 = b_gpu - A_gpu2 * x_gpu @test norm(r_gpu2) ≤ √eps(R) end end @testset "Symmetric -- Hermitian" begin @testset "view = $view" for view in ('F', 'L', 'U') A_cpu = sprand(T, n, n, 0.01) + I A_cpu = A_cpu + A_cpu' B_cpu = rand(T, n, p) (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) B_gpu = CuMatrix(B_cpu) @testset "ldiv!" begin X_cpu = zeros(T, n, p) X_gpu = CuMatrix(X_cpu) solver = ldlt(A_gpu; view) ldiv!(X_gpu, solver, B_gpu) R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) c = rand(R) A_cpu2 = c * A_cpu A_gpu2 = c * A_gpu ldlt!(solver, A_gpu2) X_gpu .= B_gpu ldiv!(solver, X_gpu) R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end @testset "\\" begin solver = ldlt(A_gpu; view) X_gpu = solver \ B_gpu R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) c = rand(R) A_cpu2 = c * A_cpu A_gpu2 = c * A_gpu ldlt!(solver, A_gpu2) X_gpu = solver \ B_gpu R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end end end @testset "SPD -- HPD" begin @testset "view = $view" for view in ('F', 'L', 'U') A_cpu = sprand(T, n, n, 0.01) A_cpu = A_cpu * A_cpu' + I B_cpu = rand(T, n, p) (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) B_gpu = CuMatrix(B_cpu) @testset "ldiv!" begin X_cpu = zeros(T, n, p) X_gpu = CuMatrix(X_cpu) solver = cholesky(A_gpu; view) ldiv!(X_gpu, solver, B_gpu) R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) c = rand(R) A_cpu2 = c * A_cpu A_gpu2 = c * A_gpu cholesky!(solver, A_gpu2) X_gpu .= B_gpu ldiv!(solver, X_gpu) R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end @testset "\\" begin solver = cholesky(A_gpu; view) X_gpu = solver \ B_gpu R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) c = rand(R) A_cpu2 = c * A_cpu A_gpu2 = c * A_gpu cholesky!(solver, A_gpu2) X_gpu = solver \ B_gpu R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end end end end end cudss_generic()