Speed difference between mxnet.numpy and numpy

For example,

import d2l
import mxnet as mx
import numpy as np
from mxnet import npx
mx.npx.set_np()

class benchmark:
    def __init__(self, description = 'Done in %.4f sec'):
        self.description = description

    def __enter__(self):
        self.timer = d2l.Timer()
        return self

    def __exit__(self, *args):
        print(self.description % self.timer.stop())

predicts = [mx.nd.random_uniform(0.0, 1.0, (64, 10)) for i in range(1000)]
labels = [mx.nd.random_uniform(0.0, 9, (64,)).round().astype('int64') for i in range(1000)]

ps=[x.asnumpy() for x in predicts]
ls=[x.asnumpy() for x in labels]
with benchmark('numpy: %.4f sec'):
    s=0
    for x,y in zip(ps, ls): 
        tmp=x.argmax(axis=-1)
        s+=(tmp==y).sum()
    npx.waitall()
print(s)

mx_p=[x.as_np_ndarray() for x in predicts]
mx_l=[x.as_np_ndarray() for x in labels]
with benchmark('mxnet.numpy: %.4f sec'):
    s1=0
    for x,y in zip(mx_p,mx_l): 
        tmp=x.argmax(axis=-1)
        s1+=(tmp==y).sum()
    npx.waitall()
print(s1)

we get:

numpy: 0.0141 sec 6402 mxnet.numpy: 0.2624 sec 6402

mxnet.numpy runs slower than numpy

apache / mxnet

Speed difference between mxnet.numpy and numpy #18302