IDA inconsistent results on MacOS

Hello,

Current Behavior:

As an example, when simulating this simple model using SUNDIALS 7.0.0

#include <iostream>
#include <ida/ida.h>
#include <sundials/sundials_nvector.h>
#include <sunmatrix/sunmatrix_sparse.h>
#include <sunlinsol/sunlinsol_klu.h>
#include <nvector/nvector_serial.h>

int evalResFun(sunrealtype tres, N_Vector yy, N_Vector yp, N_Vector rr, void* user_data) {
    double* yy_ptr = N_VGetArrayPointer(yy);
    double* yp_ptr = N_VGetArrayPointer(yp);
    double* rr_ptr = N_VGetArrayPointer(rr);

    double omega1 = yy_ptr[0];
    double omega2 = yy_ptr[1];
    double phi1 = yy_ptr[2];
    double phi2 = yy_ptr[3];
    double omega1p = yp_ptr[0];
    double omega2p = yp_ptr[1];
    double phi1p = yp_ptr[2];
    double phi2p = yp_ptr[3];

    rr_ptr[0] = (omega1 - phi1p);
    rr_ptr[1] = ((0.4 * omega1p) - ((11 * (phi2 - phi1)) + (0.2 * (phi2p - phi1p))));
    rr_ptr[2] = (omega2 - phi2p);
    rr_ptr[3] = (omega2p - ((((11 * (phi1 - phi2)) + (0.2 * (phi1p - phi2p))) - (5 * phi2)) - phi2p));

    return 0;
}

int evalJac(sunrealtype t, sunrealtype cj, N_Vector yy, N_Vector yp, N_Vector r, SUNMatrix j, void* user_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) {
    int* ap_ptr = SUNSparseMatrix_IndexPointers(j);
    int* ai_ptr = SUNSparseMatrix_IndexValues(j);
    double* ax_ptr = SUNSparseMatrix_Data(j);

    SUNMatZero(j);

    ax_ptr[0] = 1 + cj * 0;
    ax_ptr[1] = 0 + cj * -1;
    ax_ptr[2] = 0 + cj * 0.4;
    ax_ptr[3] = 11 + cj * 0.2;
    ax_ptr[4] = -11 + cj * -0.2;
    ax_ptr[5] = 1 + cj * 0;
    ax_ptr[6] = 0 + cj * -1;
    ax_ptr[7] = 0 + cj * 1;
    ax_ptr[8] = -11 + cj * -0.2;
    ax_ptr[9] = 16 + cj * 1.2;

    ap_ptr[0] = 0;
    ap_ptr[1] = 2;
    ap_ptr[2] = 5;
    ap_ptr[3] = 7;
    ap_ptr[4] = 10;

    ai_ptr[0] = 0;
    ai_ptr[1] = 2;
    ai_ptr[2] = 0;
    ai_ptr[3] = 2;
    ai_ptr[4] = 3;
    ai_ptr[5] = 1;
    ai_ptr[6] = 3;
    ai_ptr[7] = 1;
    ai_ptr[8] = 2;
    ai_ptr[9] = 3;

    return 0;
}

int evalRootFun(sunrealtype t, N_Vector y, N_Vector yp, sunrealtype *gout, void *user_data) {
    return 0;
}

int main() {
    double t_start = 0;
    double t_end = 5.0;
    double step = 0.1;
    int n = 4;
    int nnz = 10;

    SUNContext sunCtx;
    SUNContext_Create(SUN_COMM_NULL, &sunCtx);

    double *y0_ptr = new double[n];
    y0_ptr[0] = 0.0;
    y0_ptr[1] = 0.0;
    y0_ptr[2] = 0.0;
    y0_ptr[3] = 1.0;

    double *yp0_ptr = new double[n];
    yp0_ptr[0] = 27.5;
    yp0_ptr[1] = -16.0;
    yp0_ptr[2] = 0.0;
    yp0_ptr[3] = 0.0;

    N_Vector y0 = N_VMake_Serial(n, y0_ptr, sunCtx);
    N_Vector yp0 = N_VMake_Serial(n, yp0_ptr, sunCtx);

    SUNMatrix j = SUNSparseMatrix(n, n, nnz, CSR_MAT, sunCtx);

    SUNLinearSolver ls = SUNLinSol_KLU(y0, j, sunCtx);
    void* idaMem = IDACreate(sunCtx);

    IDAInit(idaMem, evalResFun, t_start, y0, yp0);

    double rtol = 1.0e-4;
    double atol = 1.0e-8;
    IDASStolerances(idaMem, rtol, atol);

    IDASetLinearSolver(idaMem, ls, j);

    IDASetJacFn(idaMem, evalJac);

    int nrtfn = 0;
    IDARootInit(idaMem, nrtfn, evalRootFun);

    N_Vector yret = N_VNew_Serial(n, sunCtx);
    N_VConst(0, yret);
    N_Vector ypret = N_VNew_Serial(n, sunCtx);
    N_VConst(0, ypret);
    for (double t = t_start + step; t <= t_end; t += step) {
        double tret;
        IDASolve(idaMem, t, &tret, yret, ypret, IDA_NORMAL);
        std::cout << "t=" << t << std::endl;
        N_VPrint_Serial(yret);
        N_VPrint_Serial(ypret);
    }
}

At the last time step (5s), I get on Linux and Windows exactly the same values for yret and ypret:

 t=5                         
 -1.1768448081950666e-01     
 -1.4030561658710816e-01     
 -1.8051392641026923e-01     
 -7.9751443704232775e-02     

 2.7596546355924620e+00      
 -5.6479629226880101e-01     
 -1.1767876764306312e-01     
 -1.4030818408793722e-01

While on MacOS, I get quite different values (and difference is growing when running more and more steps):

 t=5                        
 -1.1768573689544144e-01    
 -1.4030646958802137e-01    
 -1.8051704733017529e-01    
 -7.9750007283730884e-02    

 2.7597717495834471e+00     
 -5.6484970611817487e-01    
 -1.1768140567696388e-01    
 -1.4030831226341678e-01

Expected Behavior:

I would expect to get consistent results on Linux, Windows and MacOS.

Steps To Reproduce:

Just compile this code and link with IDA and KLU linear solver libraries.

Environment:

I got this results using SUNDIALS 7.0.0 and GitHub action Linux, MacOS and Windows latest workers (ubuntu-latest, windows-latest, macos-latest). I also observe the same issue on my MacBook Pro (Darwin mbp-de-xxx 23.5.0 Darwin Kernel Version 23.5.0: Wed May 1 20:12:58 PDT 2024; root:xnu-10063.121.3~5/RELEASE_ARM64_T6000 arm64).

LLNL / sundials