AMGX crashed due to out of memory[Issue]

Hello!

One of my AMGX cases crashed due to an error: out of memory.

Backtrace

See details below:

Caught amgx exception: Cuda failure: 'out of memory'
 at: /home/test/software/amgx/src/solvers/dense_lu_solver.cu:632
Stack trace:
 /home/test/software/cu_amgx/lib/libamgxsh.so : void amgx::dense_lu_solver::DenseLUSolver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::allocMem<float, int>(float*&, int, bool)+0x333
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::dense_lu_solver::DenseLUSolver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::solver_setup(bool)+0x361
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::setup(amgx::Operator<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&, bool)+0x108
 /home/test/software/cu_amgx/lib/libamgxsh.so : void amgx::AMG_Setup<(AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2>::setup<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2>, (AMGX_MemorySpace)1, (AMGX_MemorySpace)0>(amgx::AMG<(AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2>*, amgx::Matrix<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&)+0x114
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::AMG<(AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2>::setup(amgx::Matrix<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&)+0xeb
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::AlgebraicMultigrid_Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::solver_setup(bool)+0x67
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::setup(amgx::Operator<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&, bool)+0x108
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::FGMRES_Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::solver_setup(bool)+0x2e6
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::setup(amgx::Operator<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&, bool)+0x108
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::setup_no_throw(amgx::Operator<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&, bool)+0x80
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::AMG_Solver<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::setup(amgx::Matrix<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >&)+0x53
 /home/test/software/cu_amgx/lib/libamgxsh.so : amgx::AMGX_ERROR amgx::(anonymous namespace)::set_solver_with_shared<(AMGX_Mode)8465, amgx::AMG_Solver, amgx::Matrix>(AMGX_solver_handle_struct*, AMGX_matrix_handle_struct*, amgx::Resources*, amgx::AMGX_ERROR (amgx::AMG_Solver<amgx::TemplateMode<(AMGX_Mode)8465>::Type>::*)(std::shared_ptr<amgx::Matrix<amgx::TemplateMode<(AMGX_Mode)8465>::Type> >))+0x3eb
 /home/test/software/cu_amgx/lib/libamgxsh.so : AMGX_solver_setup()+0x282
 /home/test/workspace/mylib/bin/../lib/libmysolverlib.so : MySolver::solve()+0x8e4
 mysolverbin : main()+0x34a6
 /lib/x86_64-linux-gnu/libc.so.6 : __libc_start_main()+0xf3
 mysolverbin : ()+0x27b4e

The program only involves an Ax=b solution, where A is a CSR square (float type) matrix with:

nnz = 373760

rowx = rowy = 51200

I have read your paper in SISC and realized the maximum nnz of the matrix can be far larger than mine.

Additionally, your GPU (K40) has identical memory capacity to mine (RTX 4070 Ti).

Config

Would you mind giving some advice on whether I need to set the memory pool size manually?

My configuration string only includes the solver:

    const char* config_string = R"(
        {
            "config_version": 2, 
            "solver": {
                "preconditioner": {
                    "error_scaling": 0, 
                    "print_grid_stats": 0, 
                    "max_uncolored_percentage": 0.05, 
                    "algorithm": "AGGREGATION", 
                    "solver": "AMG", 
                    "smoother": "MULTICOLOR_DILU", 
                    "presweeps": 0, 
                    "selector": "SIZE_2", 
                    "coarse_solver": "DENSE_LU_SOLVER", 
                    "max_iters": 1, 
                    "postsweeps": 2, 
                    "min_coarse_rows": 2, 
                    "relaxation_factor": 0.75, 
                    "scope": "amg", 
                    "max_levels": 1, 
                    "matrix_coloring_scheme": "PARALLEL_GREEDY", 
                    "cycle": "V"
                }, 
                "use_scalar_norm": 1, 
                "solver": "FGMRES", 
                "print_solve_stats": 0, 
                "obtain_timings": 0, 
                "max_iters": 10, 
                "monitor_residual": 1, 
                "gmres_n_restart": 10, 
                "convergence": "RELATIVE_INI_CORE", 
                "scope": "main", 
                "tolerance": 1e-10, 
                "norm": "L2"
            }
        }
    )";

Platform

NO_MPI, NO_OPENMP

Ubuntu 20.04 GCC 9.4.0 CUDA 12.2 CARD: RTX 4070 Ti (sm_89)

AMGX: the latest commit

NVIDIA / AMGX