Reducing an array of over 1 TB is not working

This shows the issue:

# This computes the sum of a diagonal matrix.  This is to measure ultimate
# write and read performance, without the media (either memory or disk) being a
# bottleneck.

import iarray as ia
from iarray.udf import jit, Array, float64
from time import time

# N = 20_000  # around 3 GB
# N = 80_000  # around 50 GB
N = 200_000   # around 300 GB  # it works!
# N = 400_000   # around 1.2 TB  # it never ends!
size = N * N * 8 / 2**30  # size in GB

@jit
def eye(out: Array(float64, 2)) -> int:
    n = out.window_shape[0]
    m = out.window_shape[1]
    row_start = out.window_start[0]
    col_start = out.window_start[1]
    for i in range(n):
        for j in range(m):
            if row_start + i == col_start + j:
                out[i, j] = 1
            else:
                out[i, j] = 0
    return 0

if True:
    ia.set_config_defaults(favor=ia.Favor.BALANCE)
    t0 = time()
    # expr = ia.expr_from_udf(eye, [], shape=(N, N), mode='w', urlpath="reduce-eye.iarr")
    expr = ia.expr_from_udf(eye, [], shape=(N, N))  # in-memory
    iax = expr.eval()
    t = time() - t0
    print(f"time for storing array: {t:.3f}s ({size / t:.3g} GB/s)")

# iax = ia.open("reduce-eye.iarr")

t0 = time()
total = iax.sum(axis=(1,0))
# total = iax.sum()
t = time() - t0
print(f"time for reducing array: {t:.3f}s ({size / t:.3g} GB/s)")

print("Total sum:", total)

As data is highly compressible, this is not taking more than 100 MB, so one does not to have to be afraid of running out of memory when executing this :-)

inaos / iron-array

Reducing an array of over 1 TB is not working #609