apache / tvm

Open deep learning compiler stack for cpu, gpu and specialized accelerators
https://tvm.apache.org/
Apache License 2.0
11.78k stars 3.47k forks source link

topi.nn.conv2d_transpose_nchw and te.gradient behave strange for kernel size (1,1) #8087

Closed JiaruiWang-Jill closed 1 month ago

JiaruiWang-Jill commented 3 years ago

@hzfan and I found topi.nn.conv2d_transpose_nchw and te.gradient behave strange when they are combined together and kernel's (height, width)=(1, 1).

For instance,

X = te.placeholder((8, 3, 32, 32), name="X")
W = te.placeholder((3, 16, 1, 1), name="W")
R = topi.nn.conv2d_transpose_nchw(X, W, (2,2), (1,1), "float32", (1,1))

Under our testing, test result shows

38% of elements differ, first 10 of wrong positions: [(0, 1, 11, 30), (0, 1, 12, 4), (0, 1, 12, 8), (0, 1, 12, 12), (0, 1, 12, 16), (0, 1, 12, 20), (0, 1, 15, 2), (0, 1, 15, 3), (0, 1, 15, 6), (0, 1, 15, 7)]
distance > atol*sqrt(n) + rtol*grad_norm
distance 275.2369689941406 > 0.01*156.76734353812338 + 0.1*1753.1251220703125

Please see the following lines for python test.


import numpy as np
import random
import tvm
from tvm import topi
from tvm import te
from tvm.testing import assert_allclose
from tvm.topi.utils import get_const_tuple

random.seed(2)

# check_grad function refers to https://github.com/apache/tvm/blob/main/tests/python/unittest/test_te_autodiff.py

def check_grad(
    out, inputs, args=[], data_range=(-10, 10), desired_grads=None, assert_no_jacobian=True
):
    inputs = inputs if isinstance(inputs, list) else [inputs]

    def check_device(device, host="llvm"):
        dev = tvm.device(device, 0)
        if not tvm.testing.device_enabled(host):
            return

        sout = te.create_schedule(out.op)
        mout = tvm.build(sout, [out] + inputs + args)
        out_shape = get_const_tuple(out.shape)

        l, h = data_range
        input_data = [
            tvm.nd.array(
                np.random.uniform(l, h, size=get_const_tuple(input.shape)).astype(input.dtype)
            )
            for input in inputs
        ]
        arg_vals = [
            tvm.nd.array(np.random.uniform(l, h, size=get_const_tuple(arg.shape)).astype(arg.dtype))
            for arg in args
        ]

        ones = topi.full_like(out, 1.0)
        grads = te.gradient(out, inputs, head=ones)
        grad_sched = te.create_schedule([grad.op for grad in grads])
        mgrad = tvm.build(grad_sched, list(grads) + inputs + args)
        if assert_no_jacobian:
            lowered_ir = str(tvm.lower(grad_sched, list(grads) + inputs + args, simple_mode=True))
            assert "jacobian" not in lowered_ir, lowered_ir

        grad_data = [tvm.nd.empty(get_const_tuple(i.shape), g.dtype) for i, g in zip(inputs, grads)]

        mgrad(*grad_data, *input_data, *arg_vals)
        g_res = [g.asnumpy() for g in grad_data]

        if desired_grads:
            assert isinstance(desired_grads, list)
            for actual, desired in zip(g_res, desired_grads):
                assert_allclose(actual, desired, rtol=0.1, atol=1e-2)
        else:

            def forward(*in_data):
                out_data = tvm.nd.empty(out_shape, out.dtype)
                mout(out_data, *[tvm.nd.array(d) for d in list(in_data)])
                return out_data.asnumpy().sum()

            tvm.testing.check_numerical_grads(
                forward, [d.asnumpy() for d in input_data + arg_vals], g_res
            )

    check_device("cpu")

X = te.placeholder((8, 3, 32, 32), name="X")
W = te.placeholder((3, 16, 1, 1), name="W")
R = topi.nn.conv2d_transpose_nchw(X, W, (2,2), (1,1), "float32", (1,1))
check_grad(R, [X, W])
JiaruiWang-Jill commented 3 years ago

@yzhliu fyi

tqchen commented 3 years ago

ping @yzhliu @hzfan do you have more info on this? cc @altanh

kk2049 commented 3 years ago

Hey, I'm facing a similar question. Have you guys fixed the problem?