egraphs-good / eggcc

MIT License
42 stars 8 forks source link

Memory leak? or just need to limit eqsat #477

Open glenn-sun opened 5 months ago

glenn-sun commented 5 months ago

When I run cargo run --release path/to/benchmark.bril for the below benchmark, eggcc consumes 72 GB of RAM before getting killed by macOS (it's a 16 GB machine but I think it's virtual RAM?). Not sure if memory leak or it's just a big benchmark that requires a lot of memory, but 72 GB sounds excessive.

(i wish github would allow attaching code files)

## This benchmark is a translation of 2mm.c from Polybench.
## 2mm computes D := alpha * A * B * C + beta * D
## for some procedurally generated matrices A, B, C, D
##
## The int version replaces floating point division in
## generating A, B, C, D with integer division.

@main {
    # constants
    # matrix size - corresponds to Polybench SMALL_DATASET
    # (STANDARD_DATASET = 1024 takes a long time to run)

    N: int = const 128;

    zero: int = const 0;
    one: int = const 1;
    two: int = const 2;
    three: int = const 3;

    # initialize arrays
    A: ptr<int> = call @matrix_new N;
    B: ptr<int> = call @matrix_new N;
    C: ptr<int> = call @matrix_new N;
    D: ptr<int> = call @matrix_new N;
    alpha: int = const 32412;
    beta: int = const 2123;

    i: int = const 0;
.init_A_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .init_A_j .init_A_i_done;
.init_A_j:
    cond: bool = lt j N;
    br cond .init_A_body .init_A_j_done;
.init_A_body:
    val: int = mul i j;
    val: int = div val N;
    call @matrix_set A i j N val;
    j: int = add j one;
    jmp .init_A_j;
.init_A_j_done:
    i: int = add i one;
    jmp .init_A_i;
.init_A_i_done:

    i: int = const 0;
.init_B_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .init_B_j .init_B_i_done;
.init_B_j:
    cond: bool = lt j N;
    br cond .init_B_body .init_B_j_done;
.init_B_body:
    val: int = add j one;
    val: int = mul i val;
    val: int = div val N;
    call @matrix_set B i j N val;
    j: int = add j one;
    jmp .init_B_j;
.init_B_j_done:
    i: int = add i one;
    jmp .init_B_i;
.init_B_i_done:

    i: int = const 0;
.init_C_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .init_C_j .init_C_i_done;
.init_C_j:
    cond: bool = lt j N;
    br cond .init_C_body .init_C_j_done;
.init_C_body:
    val: int = add j three;
    val: int = mul i val;
    val: int = div val N;
    call @matrix_set C i j N val;
    j: int = add j one;
    jmp .init_C_j;
.init_C_j_done:
    i: int = add i one;
    jmp .init_C_i;
.init_C_i_done:

    i: int = const 0;
.init_D_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .init_D_j .init_D_i_done;
.init_D_j:
    cond: bool = lt j N;
    br cond .init_D_body .init_D_j_done;
.init_D_body:
    val: int = add j two;
    val: int = mul i val;
    val: int = div val N;
    call @matrix_set D i j N val;
    j: int = add j one;
    jmp .init_D_j;
.init_D_j_done:
    i: int = add i one;
    jmp .init_D_i;
.init_D_i_done:

    # main computation
    # computes D := alpha * A * B * C + beta * D

    # first compute tmp := alpha * A * B 
    tmp: ptr<int> = call @matrix_new N;

    i: int = const 0;
.part1_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .part1_j .part1_i_done;
.part1_j:
    k: int = const 0;
    cond: bool = lt j N;
    br cond .part1_j_body .part1_j_done;
.part1_j_body:
    call @matrix_set tmp i j N zero;
    jmp .part1_k;
.part1_k:
    cond: bool = lt k N;
    br cond .part1_k_body .part1_k_done;
.part1_k_body:
    Aik: int = call @matrix_get A i k N;
    Bkj: int = call @matrix_get B k j N;
    incr: int = mul alpha Aik;
    incr: int = mul incr Bkj;
    call @matrix_incr tmp i j N incr;
    k: int = add k one;
    jmp .part1_k;
.part1_k_done:
    j: int = add j one;
    jmp .part1_j;
.part1_j_done:
    i: int = add i one;
    jmp .part1_i;
.part1_i_done:

    # now compute D := tmp * C + beta * D

    i: int = const 0;
.part2_i:
    j: int = const 0;
    cond: bool = lt i N;
    br cond .part2_j .part2_i_done;
.part2_j:
    k: int = const 0;
    cond: bool = lt j N;
    br cond .part2_j_body .part2_j_done;
.part2_j_body:
    call @matrix_scale D i j N beta;
    jmp .part2_k;
.part2_k:
    cond: bool = lt k N;
    br cond .part2_k_body .part2_k_done;
.part2_k_body:
    tmpik: int = call @matrix_get tmp i k N;
    Ckj: int = call @matrix_get C k j N;
    incr: int = mul tmpik Ckj;
    call @matrix_incr D i j N incr;
    k: int = add k one;
    jmp .part2_k;
.part2_k_done:
    j: int = add j one;
    jmp .part2_j;
.part2_j_done:
    i: int = add i one;
    jmp .part2_i;
.part2_i_done:

    call @matrix_print D N;

    free A;
    free B;
    free C;
    free D;
    free tmp;
}

@matrix_new(N: int): ptr<int> {
    sq: int = mul N N;
    ptr: ptr<int> = alloc sq;
    ret ptr;
}

@matrix_loc(mtx: ptr<int>, row: int, col: int, N: int): ptr<int> {
    row_offset: int = mul row N;
    offset: int = add row_offset col;
    new_ptr: ptr<int> = ptradd mtx offset;
    ret new_ptr;
}

@matrix_get(mtx: ptr<int>, row: int, col: int, N: int): int {
    ptr: ptr<int> = call @matrix_loc mtx row col N;
    val: int = load ptr;
    ret val;
}

@matrix_set(mtx: ptr<int>, row: int, col: int, N: int, val: int) {
    ptr: ptr<int> = call @matrix_loc mtx row col N;
    store ptr val;
}

@matrix_incr(mtx: ptr<int>, row: int, col: int, N: int, incr: int) {
    ptr: ptr<int> = call @matrix_loc mtx row col N;
    val: int = load ptr;
    new_val: int = add val incr;
    store ptr new_val;
}

@matrix_scale(mtx: ptr<int>, row: int, col: int, N: int, scale: int) {
    ptr: ptr<int> = call @matrix_loc mtx row col N;
    val: int = load ptr;
    new_val: int = mul val scale;
    store ptr new_val;
}

@matrix_print(mtx: ptr<int>, N: int) {
    i: int = const 0;
    one: int = const 1;
    sq: int = mul N N;
.while:
    cond: bool = lt i sq;
    br cond .body .done;
.body:
    mtx_loc: ptr<int> = ptradd mtx i;
    val: int = load mtx_loc;
    print val;
    i: int = add i one;
    jmp .while;
.done:
    nop;
}
oflatt commented 5 months ago

This works on main now, please add it in the next benchmarks PR!