Closed leimao closed 3 months ago
Add --profilingVerbosity=detailed
in trtexec cmd.
Seems to be working. Thank you.
$ trtexec --onnx=resnet18-v1-7.onnx --fp16 --int8 --sparsity=force --saveEngine=resnet18-v1-7.engine --exportProfile=resnet18-v1-7.txt --exportLayerInfo=resnet18-v1-7.json --profilingVerbosity=detailed
$ cat resnet18-v1-7.json
{"Layers": [{
"Name": "Reformatting CopyNode for Input Tensor 0 to resnetv15_conv0_fwd + resnetv15_batchnorm0_fwd + resnetv15_relu0_fwd + resnetv15_pool0_fwd",
"LayerType": "Reformat",
"Inputs": [
{
"Name": "data",
"Location": "Device",
"Dimensions": [1,3,224,224],
"Format/Datatype": "Row major linear FP32"
}],
"Outputs": [
{
"Name": "Reformatted Input Tensor 0 to resnetv15_conv0_fwd + resnetv15_batchnorm0_fwd + resnetv15_relu0_fwd + resnetv15_pool0_fwd",
"Location": "Device",
"Dimensions": [1,3,224,224],
"Format/Datatype": "Row major Int8 format"
}],
"ParameterType": "Reformat",
"Origin": "REFORMAT",
"TacticValue": "0x00000000000003e8",
"StreamId": 0,
"Metadata": ""
},{
"Name": "resnetv15_conv0_fwd + resnetv15_batchnorm0_fwd + resnetv15_relu0_fwd + resnetv15_pool0_fwd",
"LayerType": "CaskConvActPool",
"Inputs": [
{
"Name": "Reformatted Input Tensor 0 to resnetv15_conv0_fwd + resnetv15_batchnorm0_fwd + resnetv15_relu0_fwd + resnetv15_pool0_fwd",
"Location": "Device",
"Dimensions": [1,3,224,224],
"Format/Datatype": "Row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_pool0_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "ConvActPool",
"ConvParameterType": "Convolution",
"ConvKernel": [7,7],
"ConvPaddingMode": "kEXPLICIT_ROUND_DOWN",
"ConvPrePadding": [3,3],
"ConvPostPadding": [3,3],
"ConvStride": [2,2],
"ConvDilation": [1,1],
"ConvOutMaps": 64,
"ConvGroups": 1,
"ConvWeights": {"Type": "Int8", "Count": 9408},
"ConvBias": {"Type": "Float", "Count": 64},
"ConvHasBias": 1,
"ConvHasReLU": 1,
"ConvHasSparseWeights": 0,
"ConvHasDynamicFilter": 0,
"ConvHasDynamicBias": 0,
"ConvHasResidual": 0,
"ConvConvXAsActInputIdx": -1,
"ConvBiasAsActInputIdx": -1,
"ConvResAsActInputIdx": -1,
"ConvActivation": "RELU",
"PoolingParameterType": "Pooling",
"PoolingPoolingType": "MAX",
"PoolingWindowSize": [3,3],
"PoolingPaddingMode": "kEXPLICIT_ROUND_DOWN",
"PoolingPrePadding": [1,1],
"PoolingPostPadding": [1,1],
"PoolingStride": [2,2],
"PoolingBlendFactor": 0,
"PoolingAverageCountExcludesPadding": 1,
"TacticValue": "0xb71c75095873646c",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_conv0_fwd]\u001e[ONNX Layer: resnetv15_batchnorm0_fwd]\u001e[ONNX Layer: resnetv15_relu0_fwd]\u001e[ONNX Layer: resnetv15_pool0_fwd]"
},{
"Name": "resnetv15_stage1_conv0_fwd + resnetv15_stage1_batchnorm0_fwd + resnetv15_stage1_relu0_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_pool0_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage1_relu0_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 64,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 36864},
"Bias": {"Type": "Float", "Count": 64},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage1_conv0_fwd]\u001e[ONNX Layer: resnetv15_stage1_batchnorm0_fwd]\u001e[ONNX Layer: resnetv15_stage1_relu0_fwd]"
},{
"Name": "resnetv15_stage1_conv1_fwd + resnetv15_stage1_batchnorm1_fwd + resnetv15_stage1__plus0 + resnetv15_stage1_activation0",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage1_relu0_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_pool0_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage1_activation0",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 64,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 36864},
"Bias": {"Type": "Float", "Count": 64},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x128x32_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x4c3eb07218824fb7",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage1_conv1_fwd]\u001e[ONNX Layer: resnetv15_stage1_batchnorm1_fwd]\u001e[ONNX Layer: resnetv15_stage1__plus0]\u001e[ONNX Layer: resnetv15_stage1_activation0]"
},{
"Name": "resnetv15_stage1_conv2_fwd + resnetv15_stage1_batchnorm2_fwd + resnetv15_stage1_relu1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage1_activation0",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage1_relu1_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 64,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 36864},
"Bias": {"Type": "Float", "Count": 64},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage1_conv2_fwd]\u001e[ONNX Layer: resnetv15_stage1_batchnorm2_fwd]\u001e[ONNX Layer: resnetv15_stage1_relu1_fwd]"
},{
"Name": "resnetv15_stage1_conv3_fwd + resnetv15_stage1_batchnorm3_fwd + resnetv15_stage1__plus1 + resnetv15_stage1_activation1",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage1_relu1_fwd",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage1_activation0",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage1_activation1",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 64,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 36864},
"Bias": {"Type": "Float", "Count": 64},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x128x32_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x4c3eb07218824fb7",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage1_conv3_fwd]\u001e[ONNX Layer: resnetv15_stage1_batchnorm3_fwd]\u001e[ONNX Layer: resnetv15_stage1__plus1]\u001e[ONNX Layer: resnetv15_stage1_activation1]"
},{
"Name": "resnetv15_stage2_conv0_fwd + resnetv15_stage2_batchnorm0_fwd + resnetv15_stage2_relu0_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage1_activation1",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage2_relu0_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 128,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 73728},
"Bias": {"Type": "Float", "Count": 128},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x128x32_tapsperload3_threadspercta128_r3s3_u2v2_scalebias_relu",
"TacticValue": "0xc0bbd49d4144045f",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage2_conv0_fwd]\u001e[ONNX Layer: resnetv15_stage2_batchnorm0_fwd]\u001e[ONNX Layer: resnetv15_stage2_relu0_fwd]"
},{
"Name": "resnetv15_stage2_conv1_fwd + resnetv15_stage2_batchnorm1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage2_relu0_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage2_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 128,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 147456},
"Bias": {"Type": "Float", "Count": 128},
"HasBias": 1,
"HasReLU": 0,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "NONE",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x64x64_stage6_warpsize2x2x1_g1_tensor16x8x32_t1r3s3",
"TacticValue": "0xd3f592fae61c7986",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage2_conv1_fwd]\u001e[ONNX Layer: resnetv15_stage2_batchnorm1_fwd]"
},{
"Name": "resnetv15_stage2_conv2_fwd + resnetv15_stage2_batchnorm2_fwd + resnetv15_stage2__plus0 + resnetv15_stage2_activation0",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage1_activation1",
"Location": "Device",
"Dimensions": [1,64,56,56],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage2_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage2_activation0",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [1,1],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [0,0],
"PostPadding": [0,0],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 128,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 8192},
"Bias": {"Type": "Float", "Count": 128},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x64x64_stage6_warpsize2x2x1_g1_tensor16x8x32_t1r1s1",
"TacticValue": "0x596666386c88024b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage2_conv2_fwd]\u001e[ONNX Layer: resnetv15_stage2_batchnorm2_fwd]\u001e[ONNX Layer: resnetv15_stage2__plus0]\u001e[ONNX Layer: resnetv15_stage2_activation0]"
},{
"Name": "resnetv15_stage2_conv3_fwd + resnetv15_stage2_batchnorm3_fwd + resnetv15_stage2_relu1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage2_activation0",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage2_relu1_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 128,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 147456},
"Bias": {"Type": "Float", "Count": 128},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage2_conv3_fwd]\u001e[ONNX Layer: resnetv15_stage2_batchnorm3_fwd]\u001e[ONNX Layer: resnetv15_stage2_relu1_fwd]"
},{
"Name": "resnetv15_stage2_conv4_fwd + resnetv15_stage2_batchnorm4_fwd + resnetv15_stage2__plus1 + resnetv15_stage2_activation1",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage2_relu1_fwd",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage2_activation0",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage2_activation1",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 128,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 147456},
"Bias": {"Type": "Float", "Count": 128},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage2_conv4_fwd]\u001e[ONNX Layer: resnetv15_stage2_batchnorm4_fwd]\u001e[ONNX Layer: resnetv15_stage2__plus1]\u001e[ONNX Layer: resnetv15_stage2_activation1]"
},{
"Name": "resnetv15_stage3_conv0_fwd + resnetv15_stage3_batchnorm0_fwd + resnetv15_stage3_relu0_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage2_activation1",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage3_relu0_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 256,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 294912},
"Bias": {"Type": "Float", "Count": 256},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r3s3",
"TacticValue": "0x4749124f62d8bd23",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage3_conv0_fwd]\u001e[ONNX Layer: resnetv15_stage3_batchnorm0_fwd]\u001e[ONNX Layer: resnetv15_stage3_relu0_fwd]"
},{
"Name": "resnetv15_stage3_conv1_fwd + resnetv15_stage3_batchnorm1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage3_relu0_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage3_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 256,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 589824},
"Bias": {"Type": "Float", "Count": 256},
"HasBias": 1,
"HasReLU": 0,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "NONE",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r3s3",
"TacticValue": "0x4749124f62d8bd23",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage3_conv1_fwd]\u001e[ONNX Layer: resnetv15_stage3_batchnorm1_fwd]"
},{
"Name": "resnetv15_stage3_conv2_fwd + resnetv15_stage3_batchnorm2_fwd + resnetv15_stage3__plus0 + resnetv15_stage3_activation0",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage2_activation1",
"Location": "Device",
"Dimensions": [1,128,28,28],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage3_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage3_activation0",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [1,1],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [0,0],
"PostPadding": [0,0],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 256,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 32768},
"Bias": {"Type": "Float", "Count": 256},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r1s1",
"TacticValue": "0xcddae68de84cc6ee",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage3_conv2_fwd]\u001e[ONNX Layer: resnetv15_stage3_batchnorm2_fwd]\u001e[ONNX Layer: resnetv15_stage3__plus0]\u001e[ONNX Layer: resnetv15_stage3_activation0]"
},{
"Name": "resnetv15_stage3_conv3_fwd + resnetv15_stage3_batchnorm3_fwd + resnetv15_stage3_relu1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage3_activation0",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage3_relu1_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 256,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 589824},
"Bias": {"Type": "Float", "Count": 256},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage3_conv3_fwd]\u001e[ONNX Layer: resnetv15_stage3_batchnorm3_fwd]\u001e[ONNX Layer: resnetv15_stage3_relu1_fwd]"
},{
"Name": "resnetv15_stage3_conv4_fwd + resnetv15_stage3_batchnorm4_fwd + resnetv15_stage3__plus1 + resnetv15_stage3_activation1",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage3_relu1_fwd",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage3_activation0",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage3_activation1",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 256,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 589824},
"Bias": {"Type": "Float", "Count": 256},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage3_conv4_fwd]\u001e[ONNX Layer: resnetv15_stage3_batchnorm4_fwd]\u001e[ONNX Layer: resnetv15_stage3__plus1]\u001e[ONNX Layer: resnetv15_stage3_activation1]"
},{
"Name": "resnetv15_stage4_conv0_fwd + resnetv15_stage4_batchnorm0_fwd + resnetv15_stage4_relu0_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage3_activation1",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage4_relu0_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 512,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 1179648},
"Bias": {"Type": "Float", "Count": 512},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r3s3",
"TacticValue": "0x4749124f62d8bd23",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage4_conv0_fwd]\u001e[ONNX Layer: resnetv15_stage4_batchnorm0_fwd]\u001e[ONNX Layer: resnetv15_stage4_relu0_fwd]"
},{
"Name": "resnetv15_stage4_conv1_fwd + resnetv15_stage4_batchnorm1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage4_relu0_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage4_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 512,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 2359296},
"Bias": {"Type": "Float", "Count": 512},
"HasBias": 1,
"HasReLU": 0,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "NONE",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r3s3",
"TacticValue": "0x4749124f62d8bd23",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage4_conv1_fwd]\u001e[ONNX Layer: resnetv15_stage4_batchnorm1_fwd]"
},{
"Name": "resnetv15_stage4_conv2_fwd + resnetv15_stage4_batchnorm2_fwd + resnetv15_stage4__plus0 + resnetv15_stage4_activation0",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage3_activation1",
"Location": "Device",
"Dimensions": [1,256,14,14],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage4_batchnorm1_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage4_activation0",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [1,1],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [0,0],
"PostPadding": [0,0],
"Stride": [2,2],
"Dilation": [1,1],
"OutMaps": 512,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 131072},
"Bias": {"Type": "Float", "Count": 512},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_implicit_gemm_interleaved_i8i8_i8i32_f32_nchw_vect_c_32kcrs_vect_c_32_nchw_vect_c_32_tilesize32x32x64_stage6_warpsize2x1x1_g1_tensor16x8x32_t1r1s1",
"TacticValue": "0xcddae68de84cc6ee",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage4_conv2_fwd]\u001e[ONNX Layer: resnetv15_stage4_batchnorm2_fwd]\u001e[ONNX Layer: resnetv15_stage4__plus0]\u001e[ONNX Layer: resnetv15_stage4_activation0]"
},{
"Name": "resnetv15_stage4_conv3_fwd + resnetv15_stage4_batchnorm3_fwd + resnetv15_stage4_relu1_fwd",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage4_activation0",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage4_relu1_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 512,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 2359296},
"Bias": {"Type": "Float", "Count": 512},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage4_conv3_fwd]\u001e[ONNX Layer: resnetv15_stage4_batchnorm3_fwd]\u001e[ONNX Layer: resnetv15_stage4_relu1_fwd]"
},{
"Name": "resnetv15_stage4_conv4_fwd + resnetv15_stage4_batchnorm4_fwd + resnetv15_stage4__plus1 + resnetv15_stage4_activation1",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "resnetv15_stage4_relu1_fwd",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
},
{
"Name": "resnetv15_stage4_activation0",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_stage4_activation1",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Convolution",
"Kernel": [3,3],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [1,1],
"PostPadding": [1,1],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 512,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 2359296},
"Bias": {"Type": "Float", "Count": 512},
"HasBias": 1,
"HasReLU": 1,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 1,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "RELU",
"TacticName": "sm80_xmma_fprop_avdt_sparse_int8int8_tilesize64x64x64_tapsperload3_threadspercta128_r3s3_u1v1_scalebias_relu",
"TacticValue": "0x9251329244d2a80b",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_stage4_conv4_fwd]\u001e[ONNX Layer: resnetv15_stage4_batchnorm4_fwd]\u001e[ONNX Layer: resnetv15_stage4__plus1]\u001e[ONNX Layer: resnetv15_stage4_activation1]"
},{
"Name": "resnetv15_pool1_fwd",
"LayerType": "CaskPooling",
"Inputs": [
{
"Name": "resnetv15_stage4_activation1",
"Location": "Device",
"Dimensions": [1,512,7,7],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_pool1_fwd",
"Location": "Device",
"Dimensions": [1,512,1,1],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"ParameterType": "Pooling",
"PoolingType": "AVERAGE",
"WindowSize": [7,7],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [0,0],
"PostPadding": [0,0],
"Stride": [1,1],
"BlendFactor": 0,
"AverageCountExcludesPadding": 1,
"TacticName": "sm72_xmma_pooling_IMMA_NCxHW32_gap",
"TacticValue": "0xa3a1a62d21de759d",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_pool1_fwd]"
},{
"Name": "Reformatting CopyNode for Input Tensor 0 to resnetv15_dense0_fwd + (Unnamed Layer* 78) [ElementWise]",
"LayerType": "NoOp",
"Inputs": [
{
"Name": "resnetv15_pool1_fwd",
"Location": "Device",
"Dimensions": [1,512,1,1],
"Format/Datatype": "Thirty-two wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "Reformatted Input Tensor 0 to resnetv15_dense0_fwd + (Unnamed Layer* 78) [ElementWise]",
"Location": "Device",
"Dimensions": [1,512,1,1],
"Format/Datatype": "Four wide channel vectorized row major Int8 format"
}],
"TacticValue": "0x0000000000000000",
"StreamId": 0,
"Metadata": ""
},{
"Name": "resnetv15_dense0_fwd + (Unnamed Layer* 78) [ElementWise]",
"LayerType": "CaskConvolution",
"Inputs": [
{
"Name": "Reformatted Input Tensor 0 to resnetv15_dense0_fwd + (Unnamed Layer* 78) [ElementWise]",
"Location": "Device",
"Dimensions": [1,512,1,1],
"Format/Datatype": "Four wide channel vectorized row major Int8 format"
}],
"Outputs": [
{
"Name": "resnetv15_dense0_fwd_conv_out",
"Location": "Device",
"Dimensions": [1,1000,1,1],
"Format/Datatype": "Row major linear FP32"
}],
"ParameterType": "Convolution",
"Kernel": [1,1],
"PaddingMode": "kEXPLICIT_ROUND_DOWN",
"PrePadding": [0,0],
"PostPadding": [0,0],
"Stride": [1,1],
"Dilation": [1,1],
"OutMaps": 1000,
"Groups": 1,
"Weights": {"Type": "Int8", "Count": 512000},
"Bias": {"Type": "Float", "Count": 1000},
"HasBias": 1,
"HasReLU": 0,
"HasSparseWeights": 1,
"HasDynamicFilter": 0,
"HasDynamicBias": 0,
"HasResidual": 0,
"ConvXAsActInputIdx": -1,
"BiasAsActInputIdx": -1,
"ResAsActInputIdx": -1,
"Activation": "NONE",
"TacticName": "sm70_xmma_fprop_conv1x1_i8f32_f32_f32_nchw_vect_c_4kcrs_vect_c_4_nchw_simt_small_batch_bias_relu",
"TacticValue": "0xc073b0053ce90eac",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_dense0_fwd]"
},{
"Name": "reshape_after_resnetv15_dense0_fwd",
"LayerType": "NoOp",
"Inputs": [
{
"Name": "resnetv15_dense0_fwd_conv_out",
"Location": "Device",
"Dimensions": [1,1000,1,1],
"Format/Datatype": "Row major linear FP32"
}],
"Outputs": [
{
"Name": "resnetv15_dense0_fwd",
"Location": "Device",
"Dimensions": [1,1000],
"Format/Datatype": "Row major linear FP32"
}],
"TacticValue": "0x0000000000000000",
"StreamId": 0,
"Metadata": "[ONNX Layer: resnetv15_dense0_fwd]"
}],
"Bindings": ["data"
,"resnetv15_dense0_fwd"
]}
BTW, I suggest making --profilingVerbosity=detailed
when --exportLayerInfo
is turned on. The user will examine the layerwise info only because they are interested in these detailed information.
Description
Previously, I remember I can use
--exportLayerInfo
to dump the comprehensive layerwise info of the engine, including the precision of the layer, and the IO tensor datatype and layouts.However, for the
trtexec
from the most recent releases, it seems that these useful information is gone. If this behavior is intended, how do I save the detailed layerwise info, including sparsity, usingtrtexec
?Environment
NGC Docker container
nvcr.io/nvidia/tensorrt:24.06-py3
.Steps To Reproduce