dmlc / mxnet-memonger

Sublinear memory optimization for deep learning, reduce GPU memory cost to train deeper nets
Apache License 2.0
308 stars 63 forks source link

it seems doesn't work on googlenet #8

Open iezsf opened 7 years ago

iezsf commented 7 years ago

# my example_googlenet.py as follows:

"""References:

Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. "Going deeper with convolutions." arXiv preprint arXiv:1409.4842 (2014).

""" import sys sys.path.append('../../python/') import math import mxnet as mx import memonger

def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''): conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True) act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix)) return act

def Inception7A(data, num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, pool, proj, name): tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2') pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%spool%s_pool' % (pool, name))) cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='chconcat%s_chconcat' % name) return concat

First Downsample

def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name): tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name)) tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1') tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2') pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('maxpool%s_pool' % name)) concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='chconcat%s_chconcat' % name) return concat

def Inception7C(data, num_1x1, num_d7_red, num_d7_1, num_d7_2, num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2') tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4') pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%spool%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv')

concat

concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name)
return concat

def Inception7D(data, num_3x3_red, num_3x3, num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, pool, name): tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv') tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3') pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%spool%s_pool' % (pool, name)))

concat

concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name)
return concat

def Inception7E(data, num_1x1, num_d3_red, num_d3_1, num_d3_2, num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv') tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1') tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv') tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1') pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%spool%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv')

concat

concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name)
return concat

In[49]:

def get_symbol(num_classes=1000, **kwargs): data = mx.symbol.Variable(name="data")

stage 1

conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
# stage 2
conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
# stage 3
in3a = Inception7A(pool1, 64,
                   64, 96, 96,
                   48, 64,
                   "avg", 32, "mixed")
in3b = Inception7A(in3a, 64,
                   64, 96, 96,
                   48, 64,
                   "avg", 64, "mixed_1")
in3c = Inception7A(in3b, 64,
                   64, 96, 96,
                   48, 64,
                   "avg", 64, "mixed_2")
in3d = Inception7B(in3c, 384,
                   64, 96, 96,
                   "max", "mixed_3")
# stage 4
in4a = Inception7C(in3d, 192,
                   128, 128, 192,
                   128, 128, 128, 128, 192,
                   "avg", 192, "mixed_4")
in4b = Inception7C(in4a, 192,
                   160, 160, 192,
                   160, 160, 160, 160, 192,
                   "avg", 192, "mixed_5")
in4c = Inception7C(in4b, 192,
                   160, 160, 192,
                   160, 160, 160, 160, 192,
                   "avg", 192, "mixed_6")
in4d = Inception7C(in4c, 192,
                   192, 192, 192,
                   192, 192, 192, 192, 192,
                   "avg", 192, "mixed_7")
in4e = Inception7D(in4d, 192, 320,
                   192, 192, 192, 192,
                   "max", "mixed_8")
# stage 5
in5a = Inception7E(in4e, 320,
                   384, 384, 384,
                   448, 384, 384, 384,
                   "avg", 192, "mixed_9")
in5b = Inception7E(in5a, 320,
                   384, 384, 384,
                   448, 384, 384, 384,
                   "max", 192, "mixed_10")
# pool
pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool")
flatten = mx.sym.Flatten(data=pool, name="flatten")
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
return softmax

batch_size = 32 image_shape = [3, 224, 224] num_classes = 1000

net = get_symbol(num_classes) print net dshape = (32, 3, 227, 227)

net_mem_planned = memonger.search_plan(net, data=dshape) old_cost = memonger.get_cost(net, data=dshape) new_cost = memonger.get_cost(net_mem_planned, data=dshape)

print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost)


errors below:

/home/user/anaconda2/bin/python /home/user/workspace/mxnet-std/example/mxnet-memonger/example_googlenet.py

[11:30:47] /home/user/workspace/mxnet-std/dmlc-core/include/dmlc/logging.h:304: [11:30:47] src/operator/./pooling-inl.h:196: Check failed: param_.kernel[0] <= dshape[2] + 2 * param_.pad[0] kernel size (8) exceeds input (5 padded to 5) Stack trace returned 10 entries: [bt] (0) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x3c) [0x7f738b405c2c] [bt] (1) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZNK5mxnet2op11PoolingProp10InferShapeEPSt6vectorIN4nnvm6TShapeESaIS4_EES7_S7_+0x93b) [0x7f738c35610b] [bt] (2) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x149f387) [0x7f738c181387] [bt] (3) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x266f6a1) [0x7f738d3516a1] [bt] (4) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x2670eb2) [0x7f738d352eb2] [bt] (5) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x26717f6) [0x7f738d3537f6] [bt] (6) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm11ApplyPassesENS_5GraphERKSt6vectorISsSaISsEE+0x518) [0x7f738d33c6c8] [bt] (7) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm9ApplyPassENS_5GraphERKSs+0x8e) [0x7f738c015f5e] [bt] (8) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm4pass10InferShapeENS_5GraphESt6vectorINS_6TShapeESaIS3_EESs+0x24e) [0x7f738c0187ae] [bt] (9) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(MXSymbolInferShape+0x156f) [0x7f738c01119f] Traceback (most recent call last): File "/home/user/workspace/mxnet-std/example/mxnet-memonger/example_googlenet.py", line 187, in net_mem_planned = memonger.search_plan(net, data=dshape) File "/home/user/workspace/mxnet-std/example/mxnet-memonger/memonger.py", line 140, in search_plan sym = make_mirror_plan(sym, threshold=threshold, plan_info=info, **kwargs) File "/home/user/workspace/mxnet-std/example/mxnet-memonger/memonger.py", line 62, in make_mirror_plan _, out_shapes, _ = internals.infer_shape(**kwargs) File "../../python/mxnet/symbol.py", line 932, in infer_shape res = self._infer_shape_impl(False, *args, **kwargs) File "../../python/mxnet/symbol.py", line 1058, in _infer_shape_impl ctypes.byref(complete))) File "../../python/mxnet/base.py", line 85, in check_call raise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: Error in operator global_pool: [11:30:47] src/operator/./pooling-inl.h:196: Check failed: param_.kernel[0] <= dshape[2] + 2 * param_.pad[0] kernel size (8) exceeds input (5 padded to 5) Stack trace returned 10 entries: [bt] (0) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x3c) [0x7f738b405c2c] [bt] (1) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZNK5mxnet2op11PoolingProp10InferShapeEPSt6vectorIN4nnvm6TShapeESaIS4_EES7_S7_+0x93b) [0x7f738c35610b] [bt] (2) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x149f387) [0x7f738c181387] [bt] (3) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x266f6a1) [0x7f738d3516a1] [bt] (4) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x2670eb2) [0x7f738d352eb2] [bt] (5) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(+0x26717f6) [0x7f738d3537f6] [bt] (6) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm11ApplyPassesENS_5GraphERKSt6vectorISsSaISsEE+0x518) [0x7f738d33c6c8] [bt] (7) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm9ApplyPassENS_5GraphERKSs+0x8e) [0x7f738c015f5e] [bt] (8) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(_ZN4nnvm4pass10InferShapeENS_5GraphESt6vectorINS_6TShapeESaIS3_EESs+0x24e) [0x7f738c0187ae] [bt] (9) /home/user/workspace/mxnet-std/python/mxnet/../../lib/libmxnet.so(MXSymbolInferShape+0x156f) [0x7f738c01119f] infer_shape error. Arguments: data: (32, 3, 227, 227) Process finished with exit code 1 ---------------------------------------------------------------------------------------- I really appreciate it for who can solve this
iezsf commented 7 years ago

when i set the kernel size = 5*5 int line 171. it worked why ?

iezsf commented 7 years ago

new problem : when it worked , i modified the "batch_size" in line 178, the result in terminal doesn't change.

iezsf commented 7 years ago

@mli @pluskid @piiswrong @javelinjs @CodingCat