apache / tvm

Open deep learning compiler stack for cpu, gpu and specialized accelerators
https://tvm.apache.org/
Apache License 2.0
11.54k stars 3.42k forks source link

[ONNX][OP] Support GEMM #1231

Closed rajh619 closed 6 years ago

rajh619 commented 6 years ago

nnvm.compiler.build fails for onnx models downloaded from onnx-modelzoo.

The problem seems to be in the FC layer containing GEMM operation .

Tried with VGG19 and Resnet50 ONNX models from ONNX model zoo . (couldnt try this properly in "bvlc_alexnet" as LRN operator support #1157 is not closed).

both model fails to compile at gemm operation .

test script:

import numpy as np
import nnvm
import nnvm.compiler
import onnx

onnx_path ='onnx_ip/resnet50_model.onnx'

#load onnx model
onnx_model = onnx.load(onnx_path)

#preview onnx IR
onnx.checker.check_model(onnx_model)
print(onnx.helper.printable_graph(onnx_model.graph))

sym, params = nnvm.frontend.from_onnx(onnx_model)
print("debug symbols:",sym.debug_str())
print("\n loading in nnvm done ")

x_in = np.zeros((1, 3, 224, 224), dtype=np.float32)

input_x = sym.list_input_names()[0]

input_dict ={input_x : x_in.shape} 

target ='llvm'
graph, lib, params = nnvm.compiler.build(sym, target, input_dict, params=params)   

print("\n compiling in nnvm done ")

Vgg19 : Following are the logs Graph IR :


graph vgg19 (
  %data_0[FLOAT, 1x3x224x224]
) initializers (
  %conv1_1_w_0[FLOAT, 64x3x3x3]
  %conv1_1_b_0[FLOAT, 64]
  %conv1_2_w_0[FLOAT, 64x64x3x3]
  %conv1_2_b_0[FLOAT, 64]
  %conv2_1_w_0[FLOAT, 128x64x3x3]
  %conv2_1_b_0[FLOAT, 128]
  %conv2_2_w_0[FLOAT, 128x128x3x3]
  %conv2_2_b_0[FLOAT, 128]
  %conv3_1_w_0[FLOAT, 256x128x3x3]
  %conv3_1_b_0[FLOAT, 256]
  %conv3_2_w_0[FLOAT, 256x256x3x3]
  %conv3_2_b_0[FLOAT, 256]
  %conv3_3_w_0[FLOAT, 256x256x3x3]
  %conv3_3_b_0[FLOAT, 256]
  %conv3_4_w_0[FLOAT, 256x256x3x3]
  %conv3_4_b_0[FLOAT, 256]
  %conv4_1_w_0[FLOAT, 512x256x3x3]
  %conv4_1_b_0[FLOAT, 512]
  %conv4_2_w_0[FLOAT, 512x512x3x3]
  %conv4_2_b_0[FLOAT, 512]
  %conv4_3_w_0[FLOAT, 512x512x3x3]
  %conv4_3_b_0[FLOAT, 512]
  %conv4_4_w_0[FLOAT, 512x512x3x3]
  %conv4_4_b_0[FLOAT, 512]
  %conv5_1_w_0[FLOAT, 512x512x3x3]
  %conv5_1_b_0[FLOAT, 512]
  %conv5_2_w_0[FLOAT, 512x512x3x3]
  %conv5_2_b_0[FLOAT, 512]
  %conv5_3_w_0[FLOAT, 512x512x3x3]
  %conv5_3_b_0[FLOAT, 512]
  %conv5_4_w_0[FLOAT, 512x512x3x3]
  %conv5_4_b_0[FLOAT, 512]
  %fc6_w_0[FLOAT, 4096x25088]
  %fc6_b_0[FLOAT, 4096]
  %fc7_w_0[FLOAT, 4096x4096]
  %fc7_b_0[FLOAT, 4096]
  %fc8_w_0[FLOAT, 1000x4096]
  %fc8_b_0[FLOAT, 1000]
) {
  %conv1_1_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%data_0, %conv1_1_w_0, %conv1_1_b_0)
  %conv1_1_2 = Relu(%conv1_1_1)
  %conv1_2_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv1_1_2, %conv1_2_w_0, %conv1_2_b_0)
  %conv1_2_2 = Relu(%conv1_2_1)
  %pool1_1 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%conv1_2_2)
  %conv2_1_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%pool1_1, %conv2_1_w_0, %conv2_1_b_0)
  %conv2_1_2 = Relu(%conv2_1_1)
  %conv2_2_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv2_1_2, %conv2_2_w_0, %conv2_2_b_0)
  %conv2_2_2 = Relu(%conv2_2_1)
  %pool2_1 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%conv2_2_2)
  %conv3_1_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%pool2_1, %conv3_1_w_0, %conv3_1_b_0)
  %conv3_1_2 = Relu(%conv3_1_1)
  %conv3_2_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv3_1_2, %conv3_2_w_0, %conv3_2_b_0)
  %conv3_2_2 = Relu(%conv3_2_1)
  %conv3_3_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv3_2_2, %conv3_3_w_0, %conv3_3_b_0)
  %conv3_3_2 = Relu(%conv3_3_1)
  %conv3_4_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv3_3_2, %conv3_4_w_0, %conv3_4_b_0)
  %conv3_4_2 = Relu(%conv3_4_1)
  %pool3_1 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%conv3_4_2)
  %conv4_1_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%pool3_1, %conv4_1_w_0, %conv4_1_b_0)
  %conv4_1_2 = Relu(%conv4_1_1)
  %conv4_2_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv4_1_2, %conv4_2_w_0, %conv4_2_b_0)
  %conv4_2_2 = Relu(%conv4_2_1)
  %conv4_3_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv4_2_2, %conv4_3_w_0, %conv4_3_b_0)
  %conv4_3_2 = Relu(%conv4_3_1)
  %conv4_4_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv4_3_2, %conv4_4_w_0, %conv4_4_b_0)
  %conv4_4_2 = Relu(%conv4_4_1)
  %pool4_1 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%conv4_4_2)
  %conv5_1_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%pool4_1, %conv5_1_w_0, %conv5_1_b_0)
  %conv5_1_2 = Relu(%conv5_1_1)
  %conv5_2_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv5_1_2, %conv5_2_w_0, %conv5_2_b_0)
  %conv5_2_2 = Relu(%conv5_2_1)
  %conv5_3_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv5_2_2, %conv5_3_w_0, %conv5_3_b_0)
  %conv5_3_2 = Relu(%conv5_3_1)
  %conv5_4_1 = Conv[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%conv5_3_2, %conv5_4_w_0, %conv5_4_b_0)
  %conv5_4_2 = Relu(%conv5_4_1)
  %pool5_1 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%conv5_4_2)
  %fc6_1 = Gemm[broadcast = 1, transB = 1](%pool5_1, %fc6_w_0, %fc6_b_0)
  %fc6_2 = Relu(%fc6_1)
  %fc6_3, %_fc6_mask_1 = Dropout[is_test = 1, ratio = 0.5](%fc6_2)
  %fc7_1 = Gemm[broadcast = 1, transB = 1](%fc6_3, %fc7_w_0, %fc7_b_0)
  %fc7_2 = Relu(%fc7_1)
  %fc7_3, %_fc7_mask_1 = Dropout[is_test = 1, ratio = 0.5](%fc7_2)
  %fc8_1 = Gemm[broadcast = 1, transB = 1](%fc7_3, %fc8_w_0, %fc8_b_0)
  %prob_1 = Softmax(%fc8_1)
  return %prob_1
}

Graph Symbols

output[0]=softmax0(0)
Variable:data_0
Variable:conv1_1_w_0
Variable:conv1_1_b_0
--------------------
Op:conv2d, Name=conv2d0
Inputs:
        arg[0]=data_0(0) version=0
        arg[1]=conv1_1_w_0(0) version=0
        arg[2]=conv1_1_b_0(0) version=0
Attrs:
        channels=64
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu0
Inputs:
        arg[0]=conv2d0(0)
Variable:conv1_2_w_0
Variable:conv1_2_b_0
--------------------
Op:conv2d, Name=conv2d1
Inputs:
        arg[0]=relu0(0)
        arg[1]=conv1_2_w_0(0) version=0
        arg[2]=conv1_2_b_0(0) version=0
Attrs:
        channels=64
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu1
Inputs:
        arg[0]=conv2d1(0)
--------------------
Op:max_pool2d, Name=max_pool2d0
Inputs:
        arg[0]=relu1(0)
Attrs:
        ceil_mode=False
        padding=(0, 0)
        pool_size=(2, 2)
        strides=(2, 2)
Variable:conv2_1_w_0
Variable:conv2_1_b_0
--------------------
Op:conv2d, Name=conv2d2
Inputs:
        arg[0]=max_pool2d0(0)
        arg[1]=conv2_1_w_0(0) version=0
        arg[2]=conv2_1_b_0(0) version=0
Attrs:
        channels=128
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu2
Inputs:
        arg[0]=conv2d2(0)
Variable:conv2_2_w_0
Variable:conv2_2_b_0
--------------------
Op:conv2d, Name=conv2d3
Inputs:
        arg[0]=relu2(0)
        arg[1]=conv2_2_w_0(0) version=0
        arg[2]=conv2_2_b_0(0) version=0
Attrs:
        channels=128
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu3
Inputs:
        arg[0]=conv2d3(0)
--------------------
Op:max_pool2d, Name=max_pool2d1
Inputs:
        arg[0]=relu3(0)
Attrs:
        ceil_mode=False
        padding=(0, 0)
        pool_size=(2, 2)
        strides=(2, 2)
Variable:conv3_1_w_0
Variable:conv3_1_b_0
--------------------
Op:conv2d, Name=conv2d4
Inputs:
        arg[0]=max_pool2d1(0)
        arg[1]=conv3_1_w_0(0) version=0
        arg[2]=conv3_1_b_0(0) version=0
Attrs:
        channels=256
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu4
Inputs:
        arg[0]=conv2d4(0)
Variable:conv3_2_w_0
Variable:conv3_2_b_0
--------------------
Op:conv2d, Name=conv2d5
Inputs:
        arg[0]=relu4(0)
        arg[1]=conv3_2_w_0(0) version=0
        arg[2]=conv3_2_b_0(0) version=0
Attrs:
        channels=256
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu5
Inputs:
        arg[0]=conv2d5(0)
Variable:conv3_3_w_0
Variable:conv3_3_b_0
--------------------
Op:conv2d, Name=conv2d6
Inputs:
        arg[0]=relu5(0)
        arg[1]=conv3_3_w_0(0) version=0
        arg[2]=conv3_3_b_0(0) version=0
Attrs:
        channels=256
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu6
Inputs:
        arg[0]=conv2d6(0)
Variable:conv3_4_w_0
Variable:conv3_4_b_0
--------------------
Op:conv2d, Name=conv2d7
Inputs:
        arg[0]=relu6(0)
        arg[1]=conv3_4_w_0(0) version=0
        arg[2]=conv3_4_b_0(0) version=0
Attrs:
        channels=256
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu7
Inputs:
        arg[0]=conv2d7(0)
--------------------
Op:max_pool2d, Name=max_pool2d2
Inputs:
        arg[0]=relu7(0)
Attrs:
        ceil_mode=False
        padding=(0, 0)
        pool_size=(2, 2)
        strides=(2, 2)
Variable:conv4_1_w_0
Variable:conv4_1_b_0
--------------------
Op:conv2d, Name=conv2d8
Inputs:
        arg[0]=max_pool2d2(0)
        arg[1]=conv4_1_w_0(0) version=0
        arg[2]=conv4_1_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu8
Inputs:
        arg[0]=conv2d8(0)
Variable:conv4_2_w_0
Variable:conv4_2_b_0
--------------------
Op:conv2d, Name=conv2d9
Inputs:
        arg[0]=relu8(0)
        arg[1]=conv4_2_w_0(0) version=0
        arg[2]=conv4_2_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu9
Inputs:
        arg[0]=conv2d9(0)
Variable:conv4_3_w_0
Variable:conv4_3_b_0
--------------------
Op:conv2d, Name=conv2d10
Inputs:
        arg[0]=relu9(0)
        arg[1]=conv4_3_w_0(0) version=0
        arg[2]=conv4_3_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu10
Inputs:
        arg[0]=conv2d10(0)
Variable:conv4_4_w_0
Variable:conv4_4_b_0
--------------------
Op:conv2d, Name=conv2d11
Inputs:
        arg[0]=relu10(0)
        arg[1]=conv4_4_w_0(0) version=0
        arg[2]=conv4_4_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu11
Inputs:
        arg[0]=conv2d11(0)
--------------------
Op:max_pool2d, Name=max_pool2d3
Inputs:
        arg[0]=relu11(0)
Attrs:
        ceil_mode=False
        padding=(0, 0)
        pool_size=(2, 2)
        strides=(2, 2)
Variable:conv5_1_w_0
Variable:conv5_1_b_0
--------------------
Op:conv2d, Name=conv2d12
Inputs:
        arg[0]=max_pool2d3(0)
        arg[1]=conv5_1_w_0(0) version=0
        arg[2]=conv5_1_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu12
Inputs:
        arg[0]=conv2d12(0)
Variable:conv5_2_w_0
Variable:conv5_2_b_0
--------------------
Op:conv2d, Name=conv2d13
Inputs:
        arg[0]=relu12(0)
        arg[1]=conv5_2_w_0(0) version=0
        arg[2]=conv5_2_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu13
Inputs:
        arg[0]=conv2d13(0)
Variable:conv5_3_w_0
Variable:conv5_3_b_0
--------------------
Op:conv2d, Name=conv2d14
Inputs:
        arg[0]=relu13(0)
        arg[1]=conv5_3_w_0(0) version=0
        arg[2]=conv5_3_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu14
Inputs:
        arg[0]=conv2d14(0)
Variable:conv5_4_w_0
Variable:conv5_4_b_0
--------------------
Op:conv2d, Name=conv2d15
Inputs:
        arg[0]=relu14(0)
        arg[1]=conv5_4_w_0(0) version=0
        arg[2]=conv5_4_b_0(0) version=0
Attrs:
        channels=512
        kernel_size=(3, 3)
        padding=(1, 1)
        strides=(1, 1)
        use_bias=True
--------------------
Op:relu, Name=relu15
Inputs:
        arg[0]=conv2d15(0)
--------------------
Op:max_pool2d, Name=max_pool2d4
Inputs:
        arg[0]=relu15(0)
Attrs:
        ceil_mode=False
        padding=(0, 0)
        pool_size=(2, 2)
        strides=(2, 2)
--------------------
Op:__mul_scalar__, Name=__mul_scalar__0
Inputs:
        arg[0]=max_pool2d4(0)
Attrs:
        scalar=1.0
Variable:fc6_w_0
Variable:fc6_b_0
--------------------
Op:__mul_scalar__, Name=__mul_scalar__1
Inputs:
        arg[0]=fc6_b_0(0) version=0
Attrs:
        scalar=1.0
--------------------
Op:dense, Name=dense0
Inputs:
        arg[0]=__mul_scalar__0(0)
        arg[1]=fc6_w_0(0) version=0
        arg[2]=__mul_scalar__1(0)
Attrs:
        units=4096
--------------------
Op:relu, Name=relu16
Inputs:
        arg[0]=dense0(0)
--------------------
Op:dropout, Name=dropout0
Inputs:
        arg[0]=relu16(0)
Attrs:
        rate=0.5
--------------------
Op:__mul_scalar__, Name=__mul_scalar__2
Inputs:
        arg[0]=dropout0(0)
Attrs:
        scalar=1.0
Variable:fc7_w_0
Variable:fc7_b_0
--------------------
Op:__mul_scalar__, Name=__mul_scalar__3
Inputs:
        arg[0]=fc7_b_0(0) version=0
Attrs:
        scalar=1.0
--------------------
Op:dense, Name=dense1
Inputs:
        arg[0]=__mul_scalar__2(0)
        arg[1]=fc7_w_0(0) version=0
        arg[2]=__mul_scalar__3(0)
Attrs:
        units=4096
--------------------
Op:relu, Name=relu17
Inputs:
        arg[0]=dense1(0)
--------------------
Op:dropout, Name=dropout1
Inputs:
        arg[0]=relu17(0)
Attrs:
        rate=0.5
--------------------
Op:__mul_scalar__, Name=__mul_scalar__4
Inputs:
        arg[0]=dropout1(0)
Attrs:
        scalar=1.0
Variable:fc8_w_0
Variable:fc8_b_0
--------------------
Op:__mul_scalar__, Name=__mul_scalar__5
Inputs:
        arg[0]=fc8_b_0(0) version=0
Attrs:
        scalar=1.0
--------------------
Op:dense, Name=dense2
Inputs:
        arg[0]=__mul_scalar__4(0)
        arg[1]=fc8_w_0(0) version=0
        arg[2]=__mul_scalar__5(0)
Attrs:
        units=1000
--------------------
Op:softmax, Name=softmax0
Inputs:
        arg[0]=dense2(0)

Error logs :

Traceback (most recent call last):
  File "C:\Users\rg\Documents\Visual Studio 2015\Projects\NNVM_TVM\nnvm_tvm\nnvm_tvm\src\nnvm_compile_onnx.py", line 31, in <module>
    graph, lib, params = nnvm.compiler.build(sym, target, input_dict, params=params)
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\compiler\build_module.py", line 261, in build
    ishape, _ = graph_util.infer_shape(graph, **shape)
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\compiler\graph_util.py", line 31, in infer_shape
    graph = graph.apply("InferShape")
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\graph.py", line 234, in apply
    check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle)))
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\_base.py", line 75, in check_call
    raise NNVMError(py_str(_LIB.NNGetLastError()))
nnvm._base.NNVMError: Error in operator dense0: [17:48:45] C:\TVM\v0_3\tvm\nnvm\src\top\nn\nn.cc:58: Operator dense(units=4096, name=dense0) expects weight's shape to be [4096,7], but got [4096,25088].
Press any key to continue . . .

resnet50 error log :

Traceback (most recent call last):
  File "C:\Users\rg\Documents\Visual Studio 2015\Projects\NNVM_TVM\nnvm_tvm\nnvm_tvm\src\nnvm_compile_onnx.py", line 31, in <module>
    graph, lib, params = nnvm.compiler.build(sym, target, input_dict, params=params)
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\compiler\build_module.py", line 261, in build
    ishape, _ = graph_util.infer_shape(graph, **shape)
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\compiler\graph_util.py", line 31, in infer_shape
    graph = graph.apply("InferShape")
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\graph.py", line 234, in apply
    check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle)))
  File "C:\TVM\v0_3\tvm\nnvm\python\nnvm\_base.py", line 75, in check_call
    raise NNVMError(py_str(_LIB.NNGetLastError()))
nnvm._base.NNVMError: Error in operator dense0: [18:24:49] C:\TVM\v0_3\tvm\nnvm\src\top\nn\nn.cc:58: Operator dense(units=1000, name=dense0) expects weight's shape to be [1000,1], but got [1000,2048].
Press any key to continue . . .
tqchen commented 6 years ago

should be fixed by #1241