The np.*_t doesn't have to be modified since that is a registered type-identifier for ndarrays in Cython. Only the deprecated dtype references np.int and np.float have been changed to 'int' and 'float' respectively. Following is a simple test script I used to ensure correctness.
import numpy as np
import scipy.sparse as sp
from xclib.utils.sparse import topk, rank
def obtain_topk(preds_, k, threshold=0):
"""
Definition:
Inefficient function to obtain topk, and pad with zeros if fewer elements than k.
Implementation:
Obtains top-k indices and values for every row and dumps to row and column ndarrays
to create a COO matrix, and subsequently converts to CSR before returning.
"""
coo_rows = np.repeat(np.arange(preds_.shape[0]), k).astype(np.int64)
coo_cols = np.zeros(coo_rows.shape, dtype=np.int64)
coo_data = np.zeros(coo_rows.shape, dtype=np.float32)
for idx in trange(preds_.shape[0]):
data = preds_[idx].data
indices = preds_[idx].indices
if len(data) == 0:
data = np.zeros(k, dtype=np.float32)
indices = np.zeros(k, dtype=np.int64)
elif len(data) < k:
data = np.concatenate([data, np.zeros(k - len(data), dtype=np.float32)])
indices = np.concatenate([indices, np.zeros(k - len(indices), dtype=np.int64)])
topk = np.argsort(data)[::-1][:k]
coo_cols[idx * k: (idx + 1) * k] = indices[topk]
coo_data[idx * k: (idx + 1) * k] = data[topk]
coo_data[coo_data < threshold] = 0
topk_preds = sp.coo_matrix(
(coo_data, (coo_rows, coo_cols)),
shape=(preds_.shape[0], preds_.shape[1])
).tocsr()
topk_preds.eliminate_zeros()
return topk_preds
path = # Path to .npz sparse matrix
x = sp.load_npz(path)
x_mine = obtain_topk(x, 5, threshold=-20)
x_lib_nb = topk(rank(-x), k=5, pad_ind=0, pad_val=0, use_cython=True)
for i_mine, i_lib in zip(x_mine[0].indices, x_lib_nb[0]):
print(x[0, i_mine], x[0, i_lib], i_mine, i_lib)
PR for #34.
The
np.*_t
doesn't have to be modified since that is a registered type-identifier for ndarrays in Cython. Only the deprecated dtype referencesnp.int
andnp.float
have been changed to'int'
and'float'
respectively. Following is a simple test script I used to ensure correctness.This returns identical values.