Open martaiborra opened 2 years ago
Benchmarking we issued that the matrix multiplication of two transposed arrays is really inefficient as can be seen in the plot: In-memory:
On-disk:
We should somehow improve this performance.
The code to reproduce it is:
import iarray as ia import numpy as np import os def iarray_matmul(a, b): return ia.matmul(a, b) def iarray_transpose(array): return array.transpose() ia.set_config_defaults(dtype=np.float64) func = "transpose" shape = (100_000, 25000, 1000) amshape = (shape[0], shape[1]) bmshape = (shape[1], shape[2]) # Obtain optimal chunk and block shapes mparams = ia.matmul_params(amshape, bmshape) amchunks, amblocks, bmchunks, bmblocks = mparams if func == "transpose": amshape = (shape[1], shape[0]) bmshape = (shape[2], shape[1]) amchunks = np.array(amchunks).transpose() amblocks = np.array(amblocks).transpose() bmchunks = np.array(bmchunks).transpose() bmblocks = np.array(bmblocks).transpose() filename = func + "arr-gemm.iarr" cmd = 'vmtouch -e ' + filename if not os.path.exists(filename): ia.set_config_defaults(btune=False) am = ia.random.normal(amshape, 3, 2, chunks=amchunks, blocks=amblocks, urlpath=filename, fp_mantissa_bits=20) os.system(cmd) am = ia.load(filename) w = np.ones(bmshape) bm = ia.numpy2iarray(w, chunks=bmchunks, blocks=bmblocks) print(bm.info) a_opt = iarray_transpose(am) b_opt = iarray_transpose(bm) res = iarray_matmul(a_opt, b_opt)
Benchmarking we issued that the matrix multiplication of two transposed arrays is really inefficient as can be seen in the plot: In-memory:
On-disk:
We should somehow improve this performance.
The code to reproduce it is: