NNgen / nngen

NNgen: A Fully-Customizable Hardware Synthesis Compiler for Deep Neural Network
Apache License 2.0
340 stars 46 forks source link

Slice operator requires large RAM #26

Open RyusukeYamano opened 4 years ago

RyusukeYamano commented 4 years ago

It is not able to embed yolov3-tiny to ultra96 due to that model require large RAM at slice operator near by yolo layer.

Verbose log when executing example/yolov3-tiny.

<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_0
input_rams [(16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 1112), (16, 16), (16, 16), (16, 16), (16, 16), (16, 16), (16, 16), (16, 16), (16, 16), (16, 16), (32, 16), (16, 16)]
output_rams [(16, 1664)]
temp_rams []
<class 'nngen.operator.pool_serial.max_pool_serial'> ======================================================
MaxPool_3
input_rams [(16, 26624)]
output_rams [(16, 6656)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_4
input_rams [(16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 64), (16, 64), (16, 64), (16, 64), (16, 64), (16, 64), (16, 64), (16, 64), (16, 64), (32, 32), (16, 32)]
output_rams [(16, 832)]
temp_rams []
<class 'nngen.operator.pool_serial.max_pool_serial'> ======================================================
MaxPool_7
input_rams [(16, 26624)]
output_rams [(16, 6656)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_8
input_rams [(16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 2240), (16, 128), (16, 128), (16, 128), (16, 128), (16, 128), (16, 128), (16, 128), (16, 128), (16, 128), (32, 64), (16, 64)]
output_rams [(16, 416)]
temp_rams []
<class 'nngen.operator.pool_serial.max_pool_serial'> ======================================================
MaxPool_11
input_rams [(16, 26624)]
output_rams [(16, 6656)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_12
input_rams [(16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 256), (16, 256), (16, 256), (16, 256), (16, 256), (16, 256), (16, 256), (16, 256), (16, 256), (32, 128), (16, 128)]
output_rams [(16, 208)]
temp_rams []
<class 'nngen.operator.pool_serial.max_pool_serial'> ======================================================
MaxPool_15
input_rams [(16, 26624)]
output_rams [(16, 6656)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_16
input_rams [(16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 2304), (16, 512), (16, 512), (16, 512), (16, 512), (16, 512), (16, 512), (16, 512), (16, 512), (16, 512), (32, 256), (16, 256)]
output_rams [(16, 104)]
temp_rams []
<class 'nngen.operator.pool_serial.max_pool_serial'> ======================================================
MaxPool_19
input_rams [(16, 26624)]
output_rams [(16, 6656)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_20
input_rams [(16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (32, 512), (16, 512)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.pad.pad'> ======================================================
Pad_23
input_rams [(16, 13312)]
output_rams [(16, 14336)]
temp_rams []
<class 'nngen.operator.pool.max_pool'> ======================================================
MaxPool_24
input_rams [(16, 7168), (16, 7168), (16, 7168), (16, 7168)]
output_rams [(16, 13312)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_25
input_rams [(16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 5120), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (16, 2048), (32, 1024), (16, 1024)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_28
input_rams [(16, 26624), (16, 4096), (32, 256), (16, 256)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_55
input_rams [(16, 6656), (16, 1024), (32, 128), (16, 128)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.upsampling2d.upsampling2d'> ======================================================
Upsample_59
input_rams [(16, 256)]
output_rams [(16, 256)]
temp_rams []
<class 'nngen.operator.normalize.scaled_concat'> ======================================================
Concat_60
input_rams [(16, 256)]
output_rams [(16, 384)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_61
input_rams [(16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 6912), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (16, 1536), (32, 256), (16, 256)]
output_rams [(16, 104)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_64
input_rams [(16, 13312), (16, 1024), (32, 255), (16, 2)]
output_rams [(16, 104)]
temp_rams []
<class 'nngen.operator.basic._lazy_reshape'> ======================================================
Reshape_66
input_rams [(16, 255)]
output_rams [(16, 255)]
temp_rams []
<class 'nngen.operator.basic.transpose'> ======================================================
Transpose_67
input_rams [(16, 85)]
output_rams [(16, 85)]
temp_rams []
<class 'nngen.operator.basic._lazy_reshape'> ======================================================
Reshape_69
input_rams ()
output_rams ()
temp_rams ()
<class 'nngen.operator.slice_.slice_'> ======================================================
None
act_shape [2028, 86]
self.begins (0, 4)
self.ends (2028, 5)
self.strides (1, 1)
348816
348816
input_rams [(16, 348816)]
output_rams [(16, 8112)]
temp_rams []
<class 'nngen.operator.slice_.slice_'> ======================================================
None
act_shape [2028, 86]
self.begins (0, 5)
self.ends (2028, 85)
self.strides (1, 1)
348816
348816
input_rams [(16, 348816)]
output_rams [(16, 324480)]
temp_rams []
<class 'nngen.operator.basic.multiply'> ======================================================
Mul_82
input_rams [(16, 160), (16, 4)]
output_rams [(16, 160)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_31
input_rams [(16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 2560), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (16, 1024), (32, 512), (16, 512)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.conv2d.conv2d'> ======================================================
Conv_34
input_rams [(16, 13312), (16, 2048), (32, 255), (16, 2)]
output_rams [(16, 52)]
temp_rams []
<class 'nngen.operator.basic._lazy_reshape'> ======================================================
Reshape_36
input_rams [(16, 255)]
output_rams [(16, 255)]
temp_rams []
<class 'nngen.operator.basic.transpose'> ======================================================
Transpose_37
input_rams [(16, 85)]
output_rams [(16, 85)]
temp_rams []
<class 'nngen.operator.basic._lazy_reshape'> ======================================================
Reshape_39
input_rams ()
output_rams ()
temp_rams ()
<class 'nngen.operator.slice_.slice_'> ======================================================
None
act_shape [507, 86]
self.begins (0, 4)
self.ends (507, 5)
self.strides (1, 1)
87204
87204
input_rams [(16, 87204)]
output_rams [(16, 2028)]
temp_rams []
<class 'nngen.operator.slice_.slice_'> ======================================================
None
act_shape [507, 86]
self.begins (0, 5)
self.ends (507, 85)
self.strides (1, 1)
87204
87204
input_rams [(16, 87204)]
output_rams [(16, 81120)]
temp_rams []
<class 'nngen.operator.basic.multiply'> ======================================================
Mul_52
input_rams [(16, 160), (16, 4)]
output_rams [(16, 160)]
temp_rams []
<class 'nngen.operator.normalize.scaled_concat'> ======================================================
Concat_85
input_rams [(16, 80)]
output_rams [(16, 80)]
temp_rams []
act_shape [2028, 86]
self.begins (0, 5)
self.ends (2028, 85)
self.strides (1, 1)
348816
348816
act_shape [507, 86]
self.begins (0, 4)
self.ends (507, 5)
self.strides (1, 1)
87204
87204
act_shape [507, 86]
self.begins (0, 5)
self.ends (507, 85)
self.strides (1, 1)
87204
87204
NNgen: Neural Network Accelerator Generator (version 1.2.0)
[IP-XACT]
  Output: yolov3tiny
[Configuration]
(AXI Master Interface)
  Data width   : 32
  Address width: 32
(AXI Slave Interface)
  Data width   : 32
  Address width: 32
[Schedule Table]
(Stage 0)
(Stage 1)
  <conv2d Conv_0 dtype:int16 shape:(1, 416, 416, 16) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(16,) scale:(16,) cshamt_out:13 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:19502656 g_index:0 l_index:1 word_alignment:2 aligned_shape:(1, 416, 416, 16) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:434.483748>
  | <placeholder act dtype:int16 shape:(1, 416, 416, 3) default_addr:405632 g_index:2 word_alignment:2 aligned_shape:(1, 416, 416, 4) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:128.000000>
  | <variable module_list.0.Conv2d.weight dtype:int16 shape:(16, 3, 3, 3) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(16, 3, 3, 4) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:13903.479945>
  | <variable onnx_Conv_0_conv.bias dtype:int32 shape:(16,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(16,) scale_factor:1779645.432922>
  | <variable onnx_Conv_0_conv.scale dtype:int16 shape:(16,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(16,) scale_factor:2.000000>
(Stage 2)
  <max_pool_serial MaxPool_3 dtype:int16 shape:(1, 208, 208, 16) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:25040448 g_index:0 l_index:2 word_alignment:2 aligned_shape:(1, 208, 208, 16) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:434.483748>
  | <conv2d Conv_0 dtype:int16 shape:(1, 416, 416, 16) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(16,) scale:(16,) cshamt_out:13 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:19502656 g_index:0 l_index:1 word_alignment:2 aligned_shape:(1, 416, 416, 16) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:434.483748>
(Stage 3)
  <conv2d Conv_4 dtype:int16 shape:(1, 208, 208, 32) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(32,) scale:(32,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter default_addr:26424896 g_index:0 l_index:3 word_alignment:2 aligned_shape:(1, 208, 208, 32) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:845.394497>
  | <max_pool_serial MaxPool_3 dtype:int16 shape:(1, 208, 208, 16) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:25040448 g_index:0 l_index:2 word_alignment:2 aligned_shape:(1, 208, 208, 16) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:434.483748>
  | <variable module_list.2.Conv2d.weight dtype:int16 shape:(32, 3, 3, 16) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(32, 3, 3, 16) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:15939.541460>
  | <variable onnx_Conv_4_conv.bias dtype:int32 shape:(32,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(32,) scale_factor:6925471.719068>
  | <variable onnx_Conv_4_conv.scale dtype:int16 shape:(32,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(32,) scale_factor:64.000000>
(Stage 4)
  <max_pool_serial MaxPool_7 dtype:int16 shape:(1, 104, 104, 32) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:29193792 g_index:0 l_index:4 word_alignment:2 aligned_shape:(1, 104, 104, 32) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:845.394497>
  | <conv2d Conv_4 dtype:int16 shape:(1, 208, 208, 32) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(32,) scale:(32,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter default_addr:26424896 g_index:0 l_index:3 word_alignment:2 aligned_shape:(1, 208, 208, 32) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:845.394497>
(Stage 5)
  <conv2d Conv_8 dtype:int16 shape:(1, 104, 104, 64) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(64,) scale:(64,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:8 stationary:filter default_addr:29886016 g_index:0 l_index:5 word_alignment:2 aligned_shape:(1, 104, 104, 64) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1575.078006>
  | <max_pool_serial MaxPool_7 dtype:int16 shape:(1, 104, 104, 32) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:29193792 g_index:0 l_index:4 word_alignment:2 aligned_shape:(1, 104, 104, 32) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:845.394497>
  | <variable module_list.4.Conv2d.weight dtype:int16 shape:(64, 3, 3, 32) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(64, 3, 3, 32) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:15262.743102>
  | <variable onnx_Conv_8_conv.bias dtype:int32 shape:(64,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(64,) scale_factor:12903039.026557>
  | <variable onnx_Conv_8_conv.scale dtype:int16 shape:(64,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(64,) scale_factor:64.000000>
(Stage 6)
  <max_pool_serial MaxPool_11 dtype:int16 shape:(1, 52, 52, 64) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:31270464 g_index:0 l_index:6 word_alignment:2 aligned_shape:(1, 52, 52, 64) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1575.078006>
  | <conv2d Conv_8 dtype:int16 shape:(1, 104, 104, 64) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(64,) scale:(64,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:8 stationary:filter default_addr:29886016 g_index:0 l_index:5 word_alignment:2 aligned_shape:(1, 104, 104, 64) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1575.078006>
(Stage 7)
  <conv2d Conv_12 dtype:int16 shape:(1, 52, 52, 128) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(128,) scale:(128,) cshamt_out:21 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:16 stationary:filter default_addr:31616576 g_index:0 l_index:7 word_alignment:2 aligned_shape:(1, 52, 52, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1093.161615>
  | <max_pool_serial MaxPool_11 dtype:int16 shape:(1, 52, 52, 64) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:31270464 g_index:0 l_index:6 word_alignment:2 aligned_shape:(1, 52, 52, 64) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1575.078006>
  | <variable module_list.6.Conv2d.weight dtype:int16 shape:(128, 3, 3, 64) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(128, 3, 3, 64) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:22742.187791>
  | <variable onnx_Conv_12_conv.bias dtype:int32 shape:(128,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(128,) scale_factor:35820719.801639>
  | <variable onnx_Conv_12_conv.scale dtype:int16 shape:(128,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(128,) scale_factor:64.000000>
(Stage 8)
  <max_pool_serial MaxPool_15 dtype:int16 shape:(1, 26, 26, 128) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:32308800 g_index:0 l_index:8 word_alignment:2 aligned_shape:(1, 26, 26, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1093.161615>
  | <conv2d Conv_12 dtype:int16 shape:(1, 52, 52, 128) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(128,) scale:(128,) cshamt_out:21 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:16 stationary:filter default_addr:31616576 g_index:0 l_index:7 word_alignment:2 aligned_shape:(1, 52, 52, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1093.161615>
(Stage 9)
  <conv2d Conv_16 dtype:int16 shape:(1, 26, 26, 256) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(256,) scale:(256,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:8 stationary:filter default_addr:32481856 g_index:0 l_index:9 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:2745.399286>
  | <max_pool_serial MaxPool_15 dtype:int16 shape:(1, 26, 26, 128) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:32308800 g_index:0 l_index:8 word_alignment:2 aligned_shape:(1, 26, 26, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1093.161615>
  | <variable module_list.8.Conv2d.weight dtype:int16 shape:(256, 3, 3, 128) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256, 3, 3, 128) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:20573.637640>
  | <variable onnx_Conv_16_conv.bias dtype:int32 shape:(256,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(256,) scale_factor:22490310.949332>
  | <variable onnx_Conv_16_conv.scale dtype:int16 shape:(256,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256,) scale_factor:64.000000>
(Stage 10)
  <max_pool_serial MaxPool_19 dtype:int16 shape:(1, 13, 13, 256) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:32827968 g_index:0 l_index:10 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:2745.399286>
  | <conv2d Conv_16 dtype:int16 shape:(1, 26, 26, 256) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(256,) scale:(256,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:8 stationary:filter default_addr:32481856 g_index:0 l_index:9 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:2745.399286>
(Stage 11)
  <conv2d Conv_20 dtype:int16 shape:(1, 13, 13, 512) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(512,) scale:(512,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:32914496 g_index:0 l_index:11 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
  | <max_pool_serial MaxPool_19 dtype:int16 shape:(1, 13, 13, 256) ksize:(1, 2, 2, 1) strides:(1, 2, 2, 1) padding:(0, 0, 0, 0) no_reuse default_addr:32827968 g_index:0 l_index:10 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:2745.399286>
  | <variable module_list.10.Conv2d.weight dtype:int16 shape:(512, 3, 3, 256) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(512, 3, 3, 256) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:42965.326831>
  | <variable onnx_Conv_20_conv.bias dtype:int32 shape:(512,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(512,) scale_factor:117956977.595654>
  | <variable onnx_Conv_20_conv.scale dtype:int16 shape:(512,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(512,) scale_factor:64.000000>
(Stage 12)
  <pad Pad_23 dtype:int16 shape:(1, 14, 14, 512) padding:(0, 1, 0, 1) default_addr:33087552 g_index:0 l_index:12 word_alignment:2 aligned_shape:(1, 14, 14, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
  | <conv2d Conv_20 dtype:int16 shape:(1, 13, 13, 512) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(512,) scale:(512,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:32914496 g_index:0 l_index:11 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
(Stage 13)
  <max_pool MaxPool_24 dtype:int16 shape:(1, 13, 13, 512) ksize:(1, 2, 2, 1) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) default_addr:33288256 g_index:0 l_index:13 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
  | <pad Pad_23 dtype:int16 shape:(1, 14, 14, 512) padding:(0, 1, 0, 1) default_addr:33087552 g_index:0 l_index:12 word_alignment:2 aligned_shape:(1, 14, 14, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
(Stage 14)
  <conv2d Conv_25 dtype:int16 shape:(1, 13, 13, 1024) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(1024,) scale:(1024,) cshamt_out:22 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:33461312 g_index:0 l_index:14 word_alignment:2 aligned_shape:(1, 13, 13, 1024) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1523.915455>
  | <max_pool MaxPool_24 dtype:int16 shape:(1, 13, 13, 512) ksize:(1, 2, 2, 1) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) default_addr:33288256 g_index:0 l_index:13 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7199.522558>
  | <variable module_list.12.Conv2d.weight dtype:int16 shape:(1024, 3, 3, 512) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(1024, 3, 3, 512) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:55487.747957>
  | <variable onnx_Conv_25_conv.bias dtype:int32 shape:(1024,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(1024,) scale_factor:399485293.128401>
  | <variable onnx_Conv_25_conv.scale dtype:int16 shape:(1024,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(1024,) scale_factor:16.000000>
(Stage 15)
  <conv2d Conv_28 dtype:int16 shape:(1, 13, 13, 256) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(256,) scale:(256,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:33807424 g_index:0 l_index:15 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7965.936386>
  | <conv2d Conv_25 dtype:int16 shape:(1, 13, 13, 1024) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(1024,) scale:(1024,) cshamt_out:22 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:33461312 g_index:0 l_index:14 word_alignment:2 aligned_shape:(1, 13, 13, 1024) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1523.915455>
  | <variable module_list.13.Conv2d.weight dtype:int16 shape:(256, 1, 1, 1024) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256, 1, 1, 1024) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:42821.897134>
  | <variable onnx_Conv_28_conv.bias dtype:int32 shape:(256,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(256,) scale_factor:65256950.870254>
  | <variable onnx_Conv_28_conv.scale dtype:int16 shape:(256,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256,) scale_factor:128.000000>
(Stage 16)
  <conv2d Conv_55 dtype:int16 shape:(1, 13, 13, 128) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(128,) scale:(128,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:16 stationary:filter keep_input default_addr:34411200 g_index:0 l_index:22 word_alignment:2 aligned_shape:(1, 13, 13, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:6841.001736>
  | <conv2d Conv_28 dtype:int16 shape:(1, 13, 13, 256) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(256,) scale:(256,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:33807424 g_index:0 l_index:15 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7965.936386>
  | <variable module_list.18.Conv2d.weight dtype:int16 shape:(128, 1, 1, 256) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(128, 1, 1, 256) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:56281.128556>
  | <variable onnx_Conv_55_conv.bias dtype:int32 shape:(128,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(128,) scale_factor:448331889.781508>
  | <variable onnx_Conv_55_conv.scale dtype:int16 shape:(128,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(128,) scale_factor:16.000000>
(Stage 17)
  <upsampling2d Upsample_59 dtype:int16 shape:(1, 26, 26, 128) default_addr:34454464 g_index:0 l_index:23 word_alignment:2 aligned_shape:(1, 26, 26, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:6841.001736>
  | <conv2d Conv_55 dtype:int16 shape:(1, 13, 13, 128) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(128,) scale:(128,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:16 stationary:filter keep_input default_addr:34411200 g_index:0 l_index:22 word_alignment:2 aligned_shape:(1, 13, 13, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:6841.001736>
(Stage 18)
  <scaled_concat Concat_60 dtype:int16 shape:(1, 26, 26, 384) buffered scales:(128, 319) shamt:8 default_addr:34627520 g_index:0 l_index:24 word_alignment:2 aligned_shape:(1, 26, 26, 384) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:3420.500868>
  | <upsampling2d Upsample_59 dtype:int16 shape:(1, 26, 26, 128) default_addr:34454464 g_index:0 l_index:23 word_alignment:2 aligned_shape:(1, 26, 26, 128) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:6841.001736>
  | <conv2d Conv_16 dtype:int16 shape:(1, 26, 26, 256) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(256,) scale:(256,) cshamt_out:19 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:8 stationary:filter default_addr:32481856 g_index:0 l_index:9 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:2745.399286>
(Stage 19)
  <conv2d Conv_61 dtype:int16 shape:(1, 26, 26, 256) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(256,) scale:(256,) cshamt_out:22 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:35146688 g_index:0 l_index:25 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:4418.436110>
  | <scaled_concat Concat_60 dtype:int16 shape:(1, 26, 26, 384) buffered scales:(128, 319) shamt:8 default_addr:34627520 g_index:0 l_index:24 word_alignment:2 aligned_shape:(1, 26, 26, 384) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:3420.500868>
  | <variable module_list.21.Conv2d.weight dtype:int16 shape:(256, 3, 3, 384) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256, 3, 3, 384) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:84656.206817>
  | <variable onnx_Conv_61_conv.bias dtype:int32 shape:(256,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(256,) scale_factor:289566628.905870>
  | <variable onnx_Conv_61_conv.scale dtype:int16 shape:(256,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(256,) scale_factor:64.000000>
(Stage 20)
  <conv2d Conv_64 dtype:int16 shape:(1, 26, 26, 255) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(255,) scale:(1,) cshamt_out:17 sum_dtype:int64 concur_och:16 stationary:filter keep_input default_addr:35492800 g_index:0 l_index:26 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1457.059043>
  | <conv2d Conv_61 dtype:int16 shape:(1, 26, 26, 256) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(256,) scale:(256,) cshamt_out:22 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:2 stationary:filter default_addr:35146688 g_index:0 l_index:25 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:4418.436110>
  | <variable module_list.22.Conv2d.weight dtype:int16 shape:(255, 1, 1, 256) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(255, 1, 1, 256) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:43223.357344>
  | <variable module_list.22.Conv2d.bias dtype:int32 shape:(255,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(255,) scale_factor:190979642.883883>
  | <variable onnx_Conv_64_conv.scale dtype:int16 shape:(1,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(2,) scale_factor:1.000000>
(Stage 21)
  <_lazy_reshape Reshape_66 dtype:int16 shape:(1, 26, 26, 3, 85) default_addr:35838912 g_index:0 l_index:27 word_alignment:2 aligned_shape:(1, 26, 26, 3, 86) layout:('N', 'H', 'W', 'X0', 'X1') onnx_layout:('N', 'X0', 'X1', 'H', 'W') scale_factor:1457.059043>
  | <conv2d Conv_64 dtype:int16 shape:(1, 26, 26, 255) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(255,) scale:(1,) cshamt_out:17 sum_dtype:int64 concur_och:16 stationary:filter keep_input default_addr:35492800 g_index:0 l_index:26 word_alignment:2 aligned_shape:(1, 26, 26, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1457.059043>
(Stage 22)
  <transpose Transpose_67 dtype:int16 shape:(1, 3, 26, 26, 85) perm:(0, 3, 1, 2, 4) onnx_perm:(0, 1, 3, 4, 2) default_addr:36187776 g_index:0 l_index:28 word_alignment:2 aligned_shape:(1, 3, 26, 26, 86) layout:('N', 'X0', 'H', 'W', 'X1') onnx_layout:('N', 'X0', 'H', 'W', 'X1') scale_factor:1457.059043>
  | <_lazy_reshape Reshape_66 dtype:int16 shape:(1, 26, 26, 3, 85) default_addr:35838912 g_index:0 l_index:27 word_alignment:2 aligned_shape:(1, 26, 26, 3, 86) layout:('N', 'H', 'W', 'X0', 'X1') onnx_layout:('N', 'X0', 'X1', 'H', 'W') scale_factor:1457.059043>
(Stage 23)
  <_lazy_reshape Reshape_69 dtype:int16 shape:(2028, 85) alias_of:Transpose_67 default_addr:36187776 g_index:0 l_index:28 word_alignment:2 aligned_shape:(2028, 86) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
  | <transpose Transpose_67 dtype:int16 shape:(1, 3, 26, 26, 85) perm:(0, 3, 1, 2, 4) onnx_perm:(0, 1, 3, 4, 2) default_addr:36187776 g_index:0 l_index:28 word_alignment:2 aligned_shape:(1, 3, 26, 26, 86) layout:('N', 'X0', 'H', 'W', 'X1') onnx_layout:('N', 'X0', 'H', 'W', 'X1') scale_factor:1457.059043>
(Stage 24)
  <slice_ None dtype:int16 shape:(2028, 1) begins:(0, 4) ends:(2028, 5) strides:(1, 1)  default_addr:36861120 g_index:0 l_index:30 word_alignment:2 aligned_shape:(2028, 2) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
  | <_lazy_reshape Reshape_69 dtype:int16 shape:(2028, 85) alias_of:Transpose_67 default_addr:36187776 g_index:0 l_index:28 word_alignment:2 aligned_shape:(2028, 86) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
(Stage 25)
  <slice_ None dtype:int16 shape:(2028, 80) begins:(0, 5) ends:(2028, 85) strides:(1, 1)  default_addr:36536640 g_index:0 l_index:29 word_alignment:2 aligned_shape:(2028, 80) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
  | <_lazy_reshape Reshape_69 dtype:int16 shape:(2028, 85) alias_of:Transpose_67 default_addr:36187776 g_index:0 l_index:28 word_alignment:2 aligned_shape:(2028, 86) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
(Stage 26)
  <multiply Mul_82 dtype:int16 shape:(2028, 80) default_addr:36950400 g_index:0 l_index:32 word_alignment:2 aligned_shape:(2028, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | <sigmoid Sigmoid_79 dtype:int16 shape:(2028, 80) chained default_addr:0 word_alignment:2 aligned_shape:(2028, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | | <slice_ None dtype:int16 shape:(2028, 80) begins:(0, 5) ends:(2028, 85) strides:(1, 1)  default_addr:36536640 g_index:0 l_index:29 word_alignment:2 aligned_shape:(2028, 80) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
  | <sigmoid Sigmoid_81 dtype:int16 shape:(2028, 1) chained default_addr:0 word_alignment:2 aligned_shape:(2028, 2) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | | <slice_ None dtype:int16 shape:(2028, 1) begins:(0, 4) ends:(2028, 5) strides:(1, 1)  default_addr:36861120 g_index:0 l_index:30 word_alignment:2 aligned_shape:(2028, 2) onnx_layout:('X0', 'X1') scale_factor:1457.059043>
(Stage 27)
  <conv2d Conv_31 dtype:int16 shape:(1, 13, 13, 512) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(512,) scale:(512,) cshamt_out:21 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:33893952 g_index:0 l_index:16 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:4674.111764>
  | <conv2d Conv_28 dtype:int16 shape:(1, 13, 13, 256) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(256,) scale:(256,) cshamt_out:20 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:33807424 g_index:0 l_index:15 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:7965.936386>
  | <variable module_list.14.Conv2d.weight dtype:int16 shape:(512, 3, 3, 256) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(512, 3, 3, 256) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:76908.118207>
  | <variable onnx_Conv_31_conv.bias dtype:int32 shape:(512,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(512,) scale_factor:612645177.166219>
  | <variable onnx_Conv_31_conv.scale dtype:int16 shape:(512,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(512,) scale_factor:16.000000>
(Stage 28)
  <conv2d Conv_34 dtype:int16 shape:(1, 13, 13, 255) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(255,) scale:(1,) cshamt_out:17 sum_dtype:int64 concur_och:8 stationary:filter keep_input default_addr:34067008 g_index:0 l_index:17 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1117.906799>
  | <conv2d Conv_31 dtype:int16 shape:(1, 13, 13, 512) strides:(1, 1, 1, 1) padding:(1, 1, 1, 1) bias:(512,) scale:(512,) cshamt_out:21 act_func:leaky_relu_214748368_31 sum_dtype:int64 concur_och:4 stationary:filter keep_input default_addr:33893952 g_index:0 l_index:16 word_alignment:2 aligned_shape:(1, 13, 13, 512) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:4674.111764>
  | <variable module_list.15.Conv2d.weight dtype:int16 shape:(255, 1, 1, 512) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(255, 1, 1, 512) layout:('O', 'H', 'W', 'I') onnx_layout:('O', 'I', 'H', 'W') scale_factor:31348.475905>
  | <variable module_list.15.Conv2d.bias dtype:int32 shape:(255,) default_addr:1790080 g_index:3 word_alignment:1 aligned_shape:(255,) scale_factor:146526280.017658>
  | <variable onnx_Conv_34_conv.scale dtype:int16 shape:(1,) default_addr:1790080 g_index:3 word_alignment:2 aligned_shape:(2,) scale_factor:1.000000>
(Stage 29)
  <_lazy_reshape Reshape_36 dtype:int16 shape:(1, 13, 13, 3, 85) default_addr:34153536 g_index:0 l_index:18 word_alignment:2 aligned_shape:(1, 13, 13, 3, 86) layout:('N', 'H', 'W', 'X0', 'X1') onnx_layout:('N', 'X0', 'X1', 'H', 'W') scale_factor:1117.906799>
  | <conv2d Conv_34 dtype:int16 shape:(1, 13, 13, 255) strides:(1, 1, 1, 1) padding:(0, 0, 0, 0) bias:(255,) scale:(1,) cshamt_out:17 sum_dtype:int64 concur_och:8 stationary:filter keep_input default_addr:34067008 g_index:0 l_index:17 word_alignment:2 aligned_shape:(1, 13, 13, 256) layout:('N', 'H', 'W', 'C') onnx_layout:('N', 'C', 'H', 'W') scale_factor:1117.906799>
(Stage 30)
  <transpose Transpose_37 dtype:int16 shape:(1, 3, 13, 13, 85) perm:(0, 3, 1, 2, 4) onnx_perm:(0, 1, 3, 4, 2) default_addr:34240768 g_index:0 l_index:19 word_alignment:2 aligned_shape:(1, 3, 13, 13, 86) layout:('N', 'X0', 'H', 'W', 'X1') onnx_layout:('N', 'X0', 'H', 'W', 'X1') scale_factor:1117.906799>
  | <_lazy_reshape Reshape_36 dtype:int16 shape:(1, 13, 13, 3, 85) default_addr:34153536 g_index:0 l_index:18 word_alignment:2 aligned_shape:(1, 13, 13, 3, 86) layout:('N', 'H', 'W', 'X0', 'X1') onnx_layout:('N', 'X0', 'X1', 'H', 'W') scale_factor:1117.906799>
(Stage 31)
  <_lazy_reshape Reshape_39 dtype:int16 shape:(507, 85) alias_of:Transpose_37 default_addr:34240768 g_index:0 l_index:19 word_alignment:2 aligned_shape:(507, 86) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
  | <transpose Transpose_37 dtype:int16 shape:(1, 3, 13, 13, 85) perm:(0, 3, 1, 2, 4) onnx_perm:(0, 1, 3, 4, 2) default_addr:34240768 g_index:0 l_index:19 word_alignment:2 aligned_shape:(1, 3, 13, 13, 86) layout:('N', 'X0', 'H', 'W', 'X1') onnx_layout:('N', 'X0', 'H', 'W', 'X1') scale_factor:1117.906799>
(Stage 32)
  <slice_ None dtype:int16 shape:(507, 1) begins:(0, 4) ends:(507, 5) strides:(1, 1)  default_addr:34409152 g_index:0 l_index:21 word_alignment:2 aligned_shape:(507, 2) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
  | <_lazy_reshape Reshape_39 dtype:int16 shape:(507, 85) alias_of:Transpose_37 default_addr:34240768 g_index:0 l_index:19 word_alignment:2 aligned_shape:(507, 86) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
(Stage 33)
  <slice_ None dtype:int16 shape:(507, 80) begins:(0, 5) ends:(507, 85) strides:(1, 1)  default_addr:34328000 g_index:0 l_index:20 word_alignment:2 aligned_shape:(507, 80) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
  | <_lazy_reshape Reshape_39 dtype:int16 shape:(507, 85) alias_of:Transpose_37 default_addr:34240768 g_index:0 l_index:19 word_alignment:2 aligned_shape:(507, 86) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
(Stage 34)
  <multiply Mul_52 dtype:int16 shape:(507, 80) default_addr:36869248 g_index:0 l_index:31 word_alignment:2 aligned_shape:(507, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | <sigmoid Sigmoid_49 dtype:int16 shape:(507, 80) chained default_addr:0 word_alignment:2 aligned_shape:(507, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | | <slice_ None dtype:int16 shape:(507, 80) begins:(0, 5) ends:(507, 85) strides:(1, 1)  default_addr:34328000 g_index:0 l_index:20 word_alignment:2 aligned_shape:(507, 80) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
  | <sigmoid Sigmoid_51 dtype:int16 shape:(507, 1) chained default_addr:0 word_alignment:2 aligned_shape:(507, 2) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | | <slice_ None dtype:int16 shape:(507, 1) begins:(0, 4) ends:(507, 5) strides:(1, 1)  default_addr:34409152 g_index:0 l_index:21 word_alignment:2 aligned_shape:(507, 2) onnx_layout:('X0', 'X1') scale_factor:1117.906799>
(Stage 35)
  <scaled_concat Concat_85 dtype:int16 shape:(2535, 80) buffered scales:(1, 1) shamt:7 default_addr:0 g_index:1 word_alignment:2 aligned_shape:(2535, 80) onnx_layout:('X0', 'X1') scale_factor:243.203125>
  | <multiply Mul_52 dtype:int16 shape:(507, 80) default_addr:36869248 g_index:0 l_index:31 word_alignment:2 aligned_shape:(507, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
  | <multiply Mul_82 dtype:int16 shape:(2028, 80) default_addr:36950400 g_index:0 l_index:32 word_alignment:2 aligned_shape:(2028, 80) onnx_layout:('X0', 'X1') scale_factor:31130.000000>
[RAM (spec: num)]
  32-bit 1024-entry 2-port 1-bank RAM: 1
  16-bit 524288-entry 2-port 2-bank RAM: 2   # <= Huge RAM is requested. 
  16-bit 8192-entry 2-port 2-bank RAM: 9
  16-bit 2048-entry 2-port 2-bank RAM: 10
[Substream (spec: num)]
  ('_max', (16, 0, True, 4)): 1
  ('acc_rshift_round_frac', (64, 0, True, 64, 0, True)): 1
  ('add_tree', (64, 0, True, 1)): 1
  ('add_tree', (64, 0, True, 9)): 1
  ('mul_rshift_round_clip', (64, 0, True, 16, 0, True, 80, 0, True, 16, 0, True, False)): 1
  ('mul_rshift_round_madd', (16, 0, True, 16, 0, True, 32, 0, True)): 9
  ('reduce_max', (16, 0, True)): 1
[Stream (spec: num)]
  (((<class 'nngen.operator.conv2d.conv2d'>, <dtype int16>, <dtype int16>, <dtype int32>, <dtype int16>), <dtype int16>, 1), 3, 3, None, <dtype int64>, 1, 1, 1, 1, 9, 9): 1
  (((<class 'nngen.operator.pool_serial.max_pool_serial'>, <dtype int16>), <dtype int16>, 1), 2, 2, True, 1): 1
  (((<class 'nngen.operator.pad.pad'>, <dtype int16>), <dtype int16>, 1), 1, 1, 1): 1
  (((<class 'nngen.operator.pool.max_pool'>, <dtype int16>), <dtype int16>, 1), 2, 2, 1): 1
  (((<class 'nngen.operator.conv2d.conv2d'>, <dtype int16>, <dtype int16>, <dtype int32>, <dtype int16>), <dtype int16>, 1), 1, 1, None, <dtype int64>, 1, 1, 1, 1, 1, 1): 1
  ((<class 'nngen.operator.upsampling2d.upsampling2d'>, <dtype int16>), <dtype int16>, 1): 1
  ((<class 'nngen.operator.normalize.scaled_concat'>, <dtype int16>, <dtype int16>), <dtype int16>, 1): 1
  (((<class 'nngen.operator.basic._lazy_reshape'>, <dtype int16>), <dtype int16>, 1), False): 1
  ((<class 'nngen.operator.basic.transpose'>, <dtype int16>), <dtype int16>, 1): 1
  (((<class 'nngen.operator.basic._lazy_reshape'>, <dtype int16>), <dtype int16>, 1), True): 1
  (((<class 'nngen.operator.slice_.slice_'>, <dtype int16>), <dtype int16>, 1), 2, 1): 1
  ((<class 'nngen.operator.basic.multiply'>, (((<class 'nngen.operator.sigmoid.sigmoid'>, <dtype int16>), <dtype int16>, 1), 8, 6.0, 0.95), (((<class 'nngen.operator.sigmoid.sigmoid'>, <dtype int16>), <dtype int16>, 1), 8, 6.0, 0.95)), <dtype int16>, 1): 1
[State IDs in main_fsm]
  (3, 4, 'act', 'None')
  (12, 14, 'Conv_0', 'control_conv2d_73')
  (19, 21, 'MaxPool_3', 'control_max_pool_serial_75')
  (29, 31, 'Conv_4', 'control_conv2d_73')
  (36, 38, 'MaxPool_7', 'control_max_pool_serial_75')
  (46, 48, 'Conv_8', 'control_conv2d_73')
  (53, 55, 'MaxPool_11', 'control_max_pool_serial_75')
  (63, 65, 'Conv_12', 'control_conv2d_73')
  (70, 72, 'MaxPool_15', 'control_max_pool_serial_75')
  (80, 82, 'Conv_16', 'control_conv2d_73')
  (87, 89, 'MaxPool_19', 'control_max_pool_serial_75')
  (97, 99, 'Conv_20', 'control_conv2d_73')
  (103, 105, 'Pad_23', 'control_pad_100')
  (109, 111, 'MaxPool_24', 'control_max_pool_101')
  (119, 121, 'Conv_25', 'control_conv2d_73')
  (129, 131, 'Conv_28', 'control_conv2d_108')
  (139, 141, 'Conv_55', 'control_conv2d_108')
  (145, 147, 'Upsample_59', 'control_upsampling2d_128')
  (153, 155, 'Concat_60', 'control_scaled_concat_129')
  (163, 165, 'Conv_61', 'control_conv2d_73')
  (173, 175, 'Conv_64', 'control_conv2d_108')
  (180, 182, 'Reshape_66', 'control__lazy_reshape_136')
  (187, 189, 'Transpose_67', 'control_transpose_137')
  (190, 191, 'Reshape_69', 'None')
  (196, 198, None, 'control_slice__141')
  (203, 205, None, 'control_slice__141')
  (211, 213, 'Mul_82', 'control_multiply_143')
  (221, 223, 'Conv_31', 'control_conv2d_73')
  (231, 233, 'Conv_34', 'control_conv2d_108')
  (238, 240, 'Reshape_36', 'control__lazy_reshape_136')
  (245, 247, 'Transpose_37', 'control_transpose_137')
  (248, 249, 'Reshape_39', 'None')
  (254, 256, None, 'control_slice__141')
  (261, 263, None, 'control_slice__141')
  (269, 271, 'Mul_52', 'control_multiply_143')
  (277, 279, 'Concat_85', 'control_scaled_concat_129')
[Control (name (# states: num))]
  main_fsm (# states: 285)
  control_conv2d_73 (# states: 56)
  control_max_pool_serial_75 (# states: 26)
  control_pad_100 (# states: 20)
  control_max_pool_101 (# states: 28)
  control_conv2d_108 (# states: 40)
  control_upsampling2d_128 (# states: 31)
  control_scaled_concat_129 (# states: 37)
  control__lazy_reshape_136 (# states: 22)
  control_transpose_137 (# states: 22)
  control_slice__141 (# states: 19)
  control_multiply_143 (# states: 44)
[Register Map]
    0 (R ): header0 (default: 0x00000000)
    4 (R ): header1 (default: 0x00000000)
    8 (R ): header2 (default: 0x00000000)
   12 (R ): header3 (default: 0x00000000)
   16 ( W): Start (set '1' to run)
   20 (R ): Busy (returns '1' when running)
   24 ( W): Reset (set '1' to initialize internal logic)
   28 (R ): Opcode from extern objects to SW (returns '0' when idle)
   32 ( W): Resume extern objects (set '1' to resume)
   36 (R ): Interrupt Status Register
   40 ( W): Interrupt Enable Register
   44 ( W): Interrupt Acknowledge Register
   48 (R ): State Counter
   52 ( W): Count Target
   56 ( W): Count Divider
   60 (X): reserved ..
  120 (X): .. reserved
  124 (R ): Address space amount
  128 (RW): Global address offset (default: 0)
  132 (RW): Address of temporal storages (size: 17356KB)
  136 (RW): Address of output (scaled_concat) 'Concat_85' (size: 397KB, dtype: int16, shape: (2535, 80), alignment: 2 words (4 bytes)), aligned shape: (2535, 80)
  140 (RW): Address of placeholder 'act' (size: 1352KB, dtype: int16, shape: (1, 416, 416, 3), alignment: 2 words (4 bytes)), aligned shape: (1, 416, 416, 4)
  144 (RW): Address of variables 'module_list.0.Conv2d.weight', 'onnx_Conv_0_conv.bias', 'onnx_Conv_0_conv.scale', 'module_list.2.Conv2d.weight', 'onnx_Conv_4_conv.bias', 'onnx_Conv_4_conv.scale', 'module_list.4.Conv2d.weight', 'onnx_Conv_8_conv.bias', 'onnx_Conv_8_conv.scale', 'module_list.6.Conv2d.weight', 'onnx_Conv_12_conv.bias', 'onnx_Conv_12_conv.scale', 'module_list.8.Conv2d.weight', 'onnx_Conv_16_conv.bias', 'onnx_Conv_16_conv.scale', 'module_list.10.Conv2d.weight', 'onnx_Conv_20_conv.bias', 'onnx_Conv_20_conv.scale', 'module_list.12.Conv2d.weight', 'onnx_Conv_25_conv.bias', 'onnx_Conv_25_conv.scale', 'module_list.13.Conv2d.weight', 'onnx_Conv_28_conv.bias', 'onnx_Conv_28_conv.scale', 'module_list.14.Conv2d.weight', 'onnx_Conv_31_conv.bias', 'onnx_Conv_31_conv.scale', 'module_list.15.Conv2d.weight', 'module_list.15.Conv2d.bias', 'onnx_Conv_34_conv.scale', 'module_list.18.Conv2d.weight', 'onnx_Conv_55_conv.bias', 'onnx_Conv_55_conv.scale', 'module_list.21.Conv2d.weight', 'onnx_Conv_61_conv.bias', 'onnx_Conv_61_conv.scale', 'module_list.22.Conv2d.weight', 'module_list.22.Conv2d.bias', 'onnx_Conv_64_conv.scale' (size: 17298KB)
[Default Memory Map (start - end)] (entire range: [0 - 37274879], size: 36402KB)
  [       0 -   405631]: output (scaled_concat) 'Concat_85' (size: 397KB, dtype: int16, shape: (2535, 80), alignment: 2 words (4 bytes)), aligned shape: (2535, 80)
  [  405632 -  1790079]: placeholder 'act' (size: 1352KB, dtype: int16, shape: (1, 416, 416, 3), alignment: 2 words (4 bytes)), aligned shape: (1, 416, 416, 4)
  [ 1790080 -  1791231]: variable 'module_list.0.Conv2d.weight' (size: 2KB, dtype: int16, shape: (16, 3, 3, 3), alignment: 2 words (4 bytes)), aligned shape: (16, 3, 3, 4)
  [ 1791232 -  1791295]: variable 'onnx_Conv_0_conv.bias' (size: 64B, dtype: int32, shape: (16,), alignment: 1 words (4 bytes)), aligned shape: (16,)
  [ 1791296 -  1791359]: variable 'onnx_Conv_0_conv.scale' (size: 64B, dtype: int16, shape: (16,), alignment: 2 words (4 bytes)), aligned shape: (16,)
  [ 1791360 -  1800575]: variable 'module_list.2.Conv2d.weight' (size: 9KB, dtype: int16, shape: (32, 3, 3, 16), alignment: 2 words (4 bytes)), aligned shape: (32, 3, 3, 16)
  [ 1800576 -  1800703]: variable 'onnx_Conv_4_conv.bias' (size: 128B, dtype: int32, shape: (32,), alignment: 1 words (4 bytes)), aligned shape: (32,)
  [ 1800704 -  1800767]: variable 'onnx_Conv_4_conv.scale' (size: 64B, dtype: int16, shape: (32,), alignment: 2 words (4 bytes)), aligned shape: (32,)
  [ 1800768 -  1837631]: variable 'module_list.4.Conv2d.weight' (size: 36KB, dtype: int16, shape: (64, 3, 3, 32), alignment: 2 words (4 bytes)), aligned shape: (64, 3, 3, 32)
  [ 1837632 -  1837887]: variable 'onnx_Conv_8_conv.bias' (size: 256B, dtype: int32, shape: (64,), alignment: 1 words (4 bytes)), aligned shape: (64,)
  [ 1837888 -  1838015]: variable 'onnx_Conv_8_conv.scale' (size: 128B, dtype: int16, shape: (64,), alignment: 2 words (4 bytes)), aligned shape: (64,)
  [ 1838016 -  1985471]: variable 'module_list.6.Conv2d.weight' (size: 144KB, dtype: int16, shape: (128, 3, 3, 64), alignment: 2 words (4 bytes)), aligned shape: (128, 3, 3, 64)
  [ 1985472 -  1985983]: variable 'onnx_Conv_12_conv.bias' (size: 512B, dtype: int32, shape: (128,), alignment: 1 words (4 bytes)), aligned shape: (128,)
  [ 1985984 -  1986239]: variable 'onnx_Conv_12_conv.scale' (size: 256B, dtype: int16, shape: (128,), alignment: 2 words (4 bytes)), aligned shape: (128,)
  [ 1986240 -  2576063]: variable 'module_list.8.Conv2d.weight' (size: 576KB, dtype: int16, shape: (256, 3, 3, 128), alignment: 2 words (4 bytes)), aligned shape: (256, 3, 3, 128)
  [ 2576064 -  2577087]: variable 'onnx_Conv_16_conv.bias' (size: 1KB, dtype: int32, shape: (256,), alignment: 1 words (4 bytes)), aligned shape: (256,)
  [ 2577088 -  2577599]: variable 'onnx_Conv_16_conv.scale' (size: 512B, dtype: int16, shape: (256,), alignment: 2 words (4 bytes)), aligned shape: (256,)
  [ 2577600 -  4936895]: variable 'module_list.10.Conv2d.weight' (size: 2304KB, dtype: int16, shape: (512, 3, 3, 256), alignment: 2 words (4 bytes)), aligned shape: (512, 3, 3, 256)
  [ 4936896 -  4938943]: variable 'onnx_Conv_20_conv.bias' (size: 2KB, dtype: int32, shape: (512,), alignment: 1 words (4 bytes)), aligned shape: (512,)
  [ 4938944 -  4939967]: variable 'onnx_Conv_20_conv.scale' (size: 1KB, dtype: int16, shape: (512,), alignment: 2 words (4 bytes)), aligned shape: (512,)
  [ 4939968 - 14377151]: variable 'module_list.12.Conv2d.weight' (size: 9216KB, dtype: int16, shape: (1024, 3, 3, 512), alignment: 2 words (4 bytes)), aligned shape: (1024, 3, 3, 512)
  [14377152 - 14381247]: variable 'onnx_Conv_25_conv.bias' (size: 4KB, dtype: int32, shape: (1024,), alignment: 1 words (4 bytes)), aligned shape: (1024,)
  [14381248 - 14383295]: variable 'onnx_Conv_25_conv.scale' (size: 2KB, dtype: int16, shape: (1024,), alignment: 2 words (4 bytes)), aligned shape: (1024,)
  [14383296 - 14907583]: variable 'module_list.13.Conv2d.weight' (size: 512KB, dtype: int16, shape: (256, 1, 1, 1024), alignment: 2 words (4 bytes)), aligned shape: (256, 1, 1, 1024)
  [14907584 - 14908607]: variable 'onnx_Conv_28_conv.bias' (size: 1KB, dtype: int32, shape: (256,), alignment: 1 words (4 bytes)), aligned shape: (256,)
  [14908608 - 14909119]: variable 'onnx_Conv_28_conv.scale' (size: 512B, dtype: int16, shape: (256,), alignment: 2 words (4 bytes)), aligned shape: (256,)
  [14909120 - 17268415]: variable 'module_list.14.Conv2d.weight' (size: 2304KB, dtype: int16, shape: (512, 3, 3, 256), alignment: 2 words (4 bytes)), aligned shape: (512, 3, 3, 256)
  [17268416 - 17270463]: variable 'onnx_Conv_31_conv.bias' (size: 2KB, dtype: int32, shape: (512,), alignment: 1 words (4 bytes)), aligned shape: (512,)
  [17270464 - 17271487]: variable 'onnx_Conv_31_conv.scale' (size: 1KB, dtype: int16, shape: (512,), alignment: 2 words (4 bytes)), aligned shape: (512,)
  [17271488 - 17532607]: variable 'module_list.15.Conv2d.weight' (size: 255KB, dtype: int16, shape: (255, 1, 1, 512), alignment: 2 words (4 bytes)), aligned shape: (255, 1, 1, 512)
  [17532608 - 17533631]: variable 'module_list.15.Conv2d.bias' (size: 1KB, dtype: int32, shape: (255,), alignment: 1 words (4 bytes)), aligned shape: (255,)
  [17533632 - 17533695]: variable 'onnx_Conv_34_conv.scale' (size: 64B, dtype: int16, shape: (1,), alignment: 2 words (4 bytes)), aligned shape: (2,)
  [17533696 - 17599231]: variable 'module_list.18.Conv2d.weight' (size: 64KB, dtype: int16, shape: (128, 1, 1, 256), alignment: 2 words (4 bytes)), aligned shape: (128, 1, 1, 256)
  [17599232 - 17599743]: variable 'onnx_Conv_55_conv.bias' (size: 512B, dtype: int32, shape: (128,), alignment: 1 words (4 bytes)), aligned shape: (128,)
  [17599744 - 17599999]: variable 'onnx_Conv_55_conv.scale' (size: 256B, dtype: int16, shape: (128,), alignment: 2 words (4 bytes)), aligned shape: (128,)
  [17600000 - 19369471]: variable 'module_list.21.Conv2d.weight' (size: 1728KB, dtype: int16, shape: (256, 3, 3, 384), alignment: 2 words (4 bytes)), aligned shape: (256, 3, 3, 384)
  [19369472 - 19370495]: variable 'onnx_Conv_61_conv.bias' (size: 1KB, dtype: int32, shape: (256,), alignment: 1 words (4 bytes)), aligned shape: (256,)
  [19370496 - 19371007]: variable 'onnx_Conv_61_conv.scale' (size: 512B, dtype: int16, shape: (256,), alignment: 2 words (4 bytes)), aligned shape: (256,)
  [19371008 - 19501567]: variable 'module_list.22.Conv2d.weight' (size: 128KB, dtype: int16, shape: (255, 1, 1, 256), alignment: 2 words (4 bytes)), aligned shape: (255, 1, 1, 256)
  [19501568 - 19502591]: variable 'module_list.22.Conv2d.bias' (size: 1KB, dtype: int32, shape: (255,), alignment: 1 words (4 bytes)), aligned shape: (255,)
  [19502592 - 19502655]: variable 'onnx_Conv_64_conv.scale' (size: 64B, dtype: int16, shape: (1,), alignment: 2 words (4 bytes)), aligned shape: (2,)
  [19502656 - 37274879]: temporal storages (size: 17356KB)