exanauts / CUDSS.jl

MIT License
18 stars 1 forks source link

Error with small matrices #45

Closed amontoison closed 2 months ago

amontoison commented 3 months ago
using CUDA, CUDA.CUSPARSE
using CUDSS
using SparseArrays, LinearAlgebra

using Random
Random.seed!(666)

function example1(T::DataType, n::Int)
    A_cpu = sprand(T, n, n, 1.0) + I
    x_cpu = zeros(T, n)
    b_cpu = rand(T, n)

    A_gpu = CuSparseMatrixCSR(A_cpu)
    x_gpu = CuVector(x_cpu)
    b_gpu = CuVector(b_cpu)

    solver = CudssSolver(A_gpu, "G", 'F')

    cudss("analysis", solver, x_gpu, b_gpu)
    cudss("factorization", solver, x_gpu, b_gpu)
    cudss("solve", solver, x_gpu, b_gpu)

    r_gpu = b_gpu - A_gpu * x_gpu
    norm(r_gpu)
end

function example2(T::DataType, n::Int)
    A_cpu = sprand(T, n, n, 1.0) + I
    A_cpu = A_cpu + A_cpu' + I
    x_cpu = zeros(T, n)
    b_cpu = rand(T, n)

    A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
    x_gpu = CuVector(x_cpu)
    b_gpu = CuVector(b_cpu)

    structure = T <: Real ? "S" : "H"
    solver = CudssSolver(A_gpu, structure, 'L')

    cudss("analysis", solver, x_gpu, b_gpu)
    cudss("factorization", solver, x_gpu, b_gpu)
    cudss("solve", solver, x_gpu, b_gpu)

    r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
    norm(r_gpu)
end

function example3(T::DataType, n::Int)
    A_cpu = sprand(T, n, n, 1.0)
    A_cpu = A_cpu * A_cpu' + I
    x_cpu = zeros(T, n)
    b_cpu = rand(T, n)

    A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
    x_gpu = CuVector(x_cpu)
    b_gpu = CuVector(b_cpu)

    structure = T <: Real ? "SPD" : "HPD"
    solver = CudssSolver(A_gpu, structure, 'U')

    cudss("analysis", solver, x_gpu, b_gpu)
    cudss("factorization", solver, x_gpu, b_gpu)
    cudss("solve", solver, x_gpu, b_gpu)

    r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
    norm(r_gpu)
end

for n in (10, 5, 2, 1)
    println("Size of the linear systems: $n")

    for T in (Float32, Float64, ComplexF32, ComplexF64)
        println("Precision: $T")

        r1 = example1(T, n)
        println("Residual norm for example1: $r1")

        r2 = example2(T, n)
        println("Residual norm for example2: $r2")

        r3 = example3(T, n)
        println("Residual norm for example3: $r3")

        println()
    end
end
ERROR: CUDSSError: an internal operation failed (code 7, CUDSS_STATUS_INTERNAL_ERROR)
amontoison commented 3 months ago

It was working with CUDSS v0.1.0 but not anymore with CUDSS v0.2.1.

amontoison commented 2 months ago

NVIDIA solved this issue with the release v0.3.0.