Closed wadhwasahil closed 2 years ago
Refer to: https://github.com/jkjung-avt/tensorrt_demos/issues/149#issuecomment-654169120
Make sure you add 1 (for 'background') to num_classes
for your custom model.
For example, for the Egohands model, num_classes
= 1 ('hand') + 1 ('background') = 2
.
@jkjung-avt I have already added 1 to my num_classes. Is there something else?
"""build_engine.py
This script converts a SSD model (pb) to UFF and subsequently builds
the TensorRT engine.
Input : ssd_mobilenet_v[1|2]_[coco|egohands].pb
Output: TRT_ssd_mobilenet_v[1|2]_[coco|egohands].bin
"""
import os
import ctypes
import argparse
import numpy as np
import uff
import tensorrt as trt
import graphsurgeon as gs
DIR_NAME = os.path.dirname(__file__)
LIB_FILE = os.path.abspath(os.path.join(DIR_NAME, 'libflattenconcat.so'))
MODEL_SPECS = {
'ssd_mobilenet_v1_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v1_coco.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v1_coco.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssd_mobilenet_v1_coco.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssd_mobilenet_v1_egohands': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v1_egohands.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v1_egohands.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssd_mobilenet_v1_egohands.bin')),
'num_classes': 2,
'min_size': 0.05,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssd_mobilenet_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'frozen_inference_graph.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v2_coco.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssd_mobilenet_v2_coco.bin')),
'num_classes': 4,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssd_mobilenet_v2_egohands': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v2_egohands.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_mobilenet_v2_egohands.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssd_mobilenet_v2_egohands.bin')),
'num_classes': 91,
'min_size': 0.05,
'max_size': 0.95,
'input_order': [1, 0, 2], # order of loc_data, conf_data, priorbox_data
},
'ssd_inception_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_inception_v2_coco.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssd_inception_v2_coco.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssd_inception_v2_coco.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssdlite_mobilenet_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, 'ssdlite_mobilenet_v2_coco.pb')),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, 'ssdlite_mobilenet_v2_coco.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, 'TRT_ssdlite_mobilenet_v2_coco.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
}
INPUT_DIMS = (3, 320, 320)
DEBUG_UFF = False
def replace_addv2(graph):
"""Replace all 'AddV2' in the graph with 'Add'.
'AddV2' is not supported by UFF parser.
Reference:
1. https://github.com/jkjung-avt/tensorrt_demos/issues/113#issuecomment-629900809
"""
for node in graph.find_nodes_by_op('AddV2'):
gs.update_node(node, op='Add')
return graph
def replace_fusedbnv3(graph):
"""Replace all 'FusedBatchNormV3' in the graph with 'FusedBatchNorm'.
'FusedBatchNormV3' is not supported by UFF parser.
Reference:
1. https://devtalk.nvidia.com/default/topic/1066445/tensorrt/tensorrt-6-0-1-tensorflow-1-14-no-conversion-function-registered-for-layer-fusedbatchnormv3-yet/post/5403567/#5403567
2. https://github.com/jkjung-avt/tensorrt_demos/issues/76#issuecomment-607879831
"""
for node in graph.find_nodes_by_op('FusedBatchNormV3'):
gs.update_node(node, op='FusedBatchNorm')
return graph
def add_anchor_input(graph):
"""Add the missing const input for the GridAnchor node.
Reference:
1. https://www.minds.ai/post/deploying-ssd-mobilenet-v2-on-the-nvidia-jetson-and-nano-platforms
"""
data = np.array([1, 1], dtype=np.float32)
anchor_input = gs.create_node('AnchorInput', 'Const', value=data)
graph.append(anchor_input)
graph.find_nodes_by_op('GridAnchor_TRT')[0].input.insert(0, 'AnchorInput')
return graph
def add_plugin(graph, model, spec):
"""add_plugin
Reference:
1. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v1_coco_2018_01_28.py
2. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
3. https://devtalk.nvidia.com/default/topic/1050465/jetson-nano/how-to-write-config-py-for-converting-ssd-mobilenetv2-to-uff-format/post/5333033/#5333033
"""
numClasses = spec['num_classes']
minSize = spec['min_size']
maxSize = spec['max_size']
inputOrder = spec['input_order']
all_assert_nodes = graph.find_nodes_by_op('Assert')
graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
all_identity_nodes = graph.find_nodes_by_op('Identity')
graph.forward_inputs(all_identity_nodes)
Input = gs.create_plugin_node(
name='Input',
op='Placeholder',
shape=(1,) + INPUT_DIMS
)
PriorBox = gs.create_plugin_node(
name='MultipleGridAnchorGenerator',
op='GridAnchor_TRT',
minSize=minSize, # was 0.2
maxSize=maxSize, # was 0.95
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1, 0.1, 0.2, 0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1],
numLayers=6
)
NMS = gs.create_plugin_node(
name='NMS',
op='NMS_TRT',
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=0.3, # was 1e-8
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=numClasses, # was 91
inputOrder=inputOrder,
confSigmoid=1,
isNormalized=1
)
concat_priorbox = gs.create_node(
'concat_priorbox',
op='ConcatV2',
axis=2
)
if trt.__version__[0] >= '7':
concat_box_loc = gs.create_plugin_node(
'concat_box_loc',
op='FlattenConcat_TRT',
axis=1,
ignoreBatch=0
)
concat_box_conf = gs.create_plugin_node(
'concat_box_conf',
op='FlattenConcat_TRT',
axis=1,
ignoreBatch=0
)
else:
concat_box_loc = gs.create_plugin_node(
'concat_box_loc',
op='FlattenConcat_TRT'
)
concat_box_conf = gs.create_plugin_node(
'concat_box_conf',
op='FlattenConcat_TRT'
)
namespace_for_removal = [
'ToFloat',
'image_tensor',
'Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3',
]
namespace_plugin_map = {
'MultipleGridAnchorGenerator': PriorBox,
'Postprocessor': NMS,
'Preprocessor': Input,
'ToFloat': Input,
'Cast': Input, # added for models trained with tf 1.15+
'image_tensor': Input,
'MultipleGridAnchorGenerator/Concatenate': concat_priorbox, # for 'ssd_mobilenet_v1_coco'
'Concatenate': concat_priorbox, # for other models
'concat': concat_box_loc,
'concat_1': concat_box_conf
}
graph.remove(graph.find_nodes_by_path(['Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3']), remove_exclusive_dependencies=False) # for 'ssd_inception_v2_coco'
graph.collapse_namespaces(namespace_plugin_map)
graph = replace_addv2(graph)
graph = replace_fusedbnv3(graph)
if 'image_tensor:0' in graph.find_nodes_by_name('Input')[0].input:
graph.find_nodes_by_name('Input')[0].input.remove('image_tensor:0')
if 'Input' in graph.find_nodes_by_name('NMS')[0].input:
graph.find_nodes_by_name('NMS')[0].input.remove('Input')
# Remove the Squeeze to avoid "Assertion 'isPlugin(layerName)' failed"
graph.forward_inputs(graph.find_node_inputs_by_name(graph.graph_outputs[0], 'Squeeze'))
if 'anchors' in [node.name for node in graph.graph_outputs]:
graph.remove('anchors', remove_exclusive_dependencies=False)
if len(graph.find_nodes_by_op('GridAnchor_TRT')[0].input) < 1:
graph = add_anchor_input(graph)
if 'NMS' not in [node.name for node in graph.graph_outputs]:
graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
if 'NMS' not in [node.name for node in graph.graph_outputs]:
# We expect 'NMS' to be one of the outputs
raise RuntimeError('bad graph_outputs')
return graph
def main():
parser = argparse.ArgumentParser()
parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys()))
args = parser.parse_args()
# initialize
if trt.__version__[0] < '7':
ctypes.CDLL(LIB_FILE)
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
# compile the model into TensorRT engine
model = args.model
spec = MODEL_SPECS[model]
dynamic_graph = add_plugin(
gs.DynamicGraph(spec['input_pb']),
model,
spec)
_ = uff.from_tensorflow(
dynamic_graph.as_graph_def(),
output_nodes=['NMS'],
output_filename=spec['tmp_uff'],
text=True,
debug_mode=DEBUG_UFF)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_workspace_size = 1 << 28
builder.max_batch_size = 1
builder.fp16_mode = True
parser.register_input('Input', INPUT_DIMS)
parser.register_output('MarkOutput_0')
parser.parse(spec['tmp_uff'], network)
engine = builder.build_cuda_engine(network)
buf = engine.serialize()
with open(spec['output_bin'], 'wb') as f:
f.write(buf)
if __name__ == '__main__':
main()
Do you also want the frozen graph for this?
Model Link - https://drive.google.com/file/d/11iztQno2hoiWRR44sMomdDhR7iAYjA2j/view?usp=sharing
I think the failed assertion statement is this line:
ASSERT(numPriors * numLocClasses * nbBoxCoordinates == inputDims[param.inputOrder[0]].d[0]);
If you have made sure you have the right num_classes
, I think you should double check that you've set inputOrder
correctly. Please read https://github.com/jkjung-avt/tensorrt_demos/issues/111#issuecomment-630265855 for how to find the correct order.
Has the issue been resolved?
I am also facing the same issue.
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
@wadhwasahil @kanakedge Do you see a "ssd_mobilenet_v2_coco.pbtxt" file generated by "build_engine.py"? Could you post the content of that file?
`"""build_engine.py
This script converts a SSD model (pb) to UFF and subsequently builds the TensorRT engine.
Input : ssd_mobilenetv[1|2][coco|egohands].pb Output: TRT_ssd_mobilenetv[1|2][coco|egohands].bin """
import os import ctypes import argparse import yaml
import numpy as np import uff import tensorrt as trt import graphsurgeon as gs import logging
from SimpleCalibrator import SimpleCalibrator
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") logger = logging.getLogger(name)
DIR_NAME = "ssd_mobilenet_v1_coco_2018_01_28" NUM_CLASSES = 90 BATCHES = [1, 2, 4, 8]
LIB_FILE = os.path.abspath(os.path.join(DIR_NAME, 'libflattenconcat.so'))
models = ['ssd_mobilenet_v1_coco', 'ssd_mobilenet_v2_coco', 'ssd_inception_v2_coco', 'ssdlite_mobilenet_v2_coco']
def model_specs(DIR_NAME, model, modelfile):
MODEL_SPECS = {
'ssd_mobilenet_v1_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, modelfile)),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, model+'.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, model+'.enp')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssd_mobilenet_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, modelfile)),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, model+'.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, model+'.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [1,0,2], # order of loc_data, conf_data, priorbox_data
},
'ssd_inception_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, modelfile)),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, model+'.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, model+'.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
'ssdlite_mobilenet_v2_coco': {
'input_pb': os.path.abspath(os.path.join(
DIR_NAME, modelfile)),
'tmp_uff': os.path.abspath(os.path.join(
DIR_NAME, model+'.uff')),
'output_bin': os.path.abspath(os.path.join(
DIR_NAME, model+'.bin')),
'num_classes': 91,
'min_size': 0.2,
'max_size': 0.95,
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
},
}
return MODEL_SPECS[model]
INPUT_DIMS = (3, 300, 300) DEBUG_UFF = False
def replace_addv2(graph): """Replace all 'AddV2' in the graph with 'Add'.
'AddV2' is not supported by UFF parser.
Reference:
1. https://github.com/jkjung-avt/tensorrt_demos/issues/113#issuecomment-629900809
"""
for node in graph.find_nodes_by_op('AddV2'):
gs.update_node(node, op='Add')
return graph
def replace_fusedbnv3(graph): """Replace all 'FusedBatchNormV3' in the graph with 'FusedBatchNorm'.
'FusedBatchNormV3' is not supported by UFF parser.
Reference:
1. https://devtalk.nvidia.com/default/topic/1066445/tensorrt/tensorrt-6-0-1-tensorflow-1-14-no-conversion-function-registered-for-layer-fusedbatchnormv3-yet/post/5403567/#5403567
2. https://github.com/jkjung-avt/tensorrt_demos/issues/76#issuecomment-607879831
"""
for node in graph.find_nodes_by_op('FusedBatchNormV3'):
gs.update_node(node, op='FusedBatchNorm')
return graph
def add_anchor_input(graph): """Add the missing const input for the GridAnchor node.
Reference:
1. https://www.minds.ai/post/deploying-ssd-mobilenet-v2-on-the-nvidia-jetson-and-nano-platforms
"""
data = np.array([1, 1], dtype=np.float32)
anchor_input = gs.create_node('AnchorInput', 'Const', value=data)
graph.append(anchor_input)
graph.find_nodes_by_op('GridAnchor_TRT')[0].input.insert(0, 'AnchorInput')
return graph
def add_plugin(graph, model, spec): """add_plugin
Reference:
1. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v1_coco_2018_01_28.py
2. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
3. https://devtalk.nvidia.com/default/topic/1050465/jetson-nano/how-to-write-config-py-for-converting-ssd-mobilenetv2-to-uff-format/post/5333033/#5333033
"""
numClasses = spec['num_classes']
minSize = spec['min_size']
maxSize = spec['max_size']
inputOrder = spec['input_order']
all_assert_nodes = graph.find_nodes_by_op('Assert')
graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
all_identity_nodes = graph.find_nodes_by_op('Identity')
graph.forward_inputs(all_identity_nodes)
Input = gs.create_plugin_node(
name='Input',
op='Placeholder',
shape=(1,) + INPUT_DIMS
)
PriorBox = gs.create_plugin_node(
name='MultipleGridAnchorGenerator',
op='GridAnchor_TRT',
minSize=minSize, # was 0.2
maxSize=maxSize, # was 0.95
aspectRatios=[0.8, 1.0, 2.0, 3.0, 4.0],
variance=[0.1, 0.1, 0.2, 0.2],
featureMapShapes=[19,10,5,3,2,1],
numLayers=6
)
NMS = gs.create_plugin_node(
name='NMS',
op='NMS_TRT',
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=0.3, # was 1e-8
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=numClasses, # was 91
inputOrder=inputOrder,
confSigmoid=1,
isNormalized=1
)
concat_priorbox = gs.create_node(
'concat_priorbox',
op='ConcatV2',
axis=2
)
if trt.__version__[0] >= '7':
concat_box_loc = gs.create_plugin_node(
'concat_box_loc',
op='FlattenConcat_TRT',
axis=1,
ignoreBatch=0
)
concat_box_conf = gs.create_plugin_node(
'concat_box_conf',
op='FlattenConcat_TRT',
axis=1,
ignoreBatch=0
)
else:
concat_box_loc = gs.create_plugin_node(
'concat_box_loc',
op='FlattenConcat_TRT'
)
concat_box_conf = gs.create_plugin_node(
'concat_box_conf',
op='FlattenConcat_TRT'
)
namespace_for_removal = [
'ToFloat',
'image_tensor',
'Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3',
]
namespace_plugin_map = {
'MultipleGridAnchorGenerator': PriorBox,
'Postprocessor': NMS,
'Preprocessor': Input,
'ToFloat': Input,
'Cast': Input, # added for models trained with tf 1.15+
'image_tensor': Input,
'MultipleGridAnchorGenerator/Concatenate': concat_priorbox, # for 'ssd_mobilenet_v1_coco'
'Concatenate': concat_priorbox, # for other models
'concat': concat_box_loc,
'concat_1': concat_box_conf
}
graph.remove(graph.find_nodes_by_path(['Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3']), remove_exclusive_dependencies=False) # for 'ssd_inception_v2_coco'
graph.collapse_namespaces(namespace_plugin_map)
graph = replace_addv2(graph)
graph = replace_fusedbnv3(graph)
if 'image_tensor:0' in graph.find_nodes_by_name('Input')[0].input:
graph.find_nodes_by_name('Input')[0].input.remove('image_tensor:0')
if 'Input' in graph.find_nodes_by_name('NMS')[0].input:
graph.find_nodes_by_name('NMS')[0].input.remove('Input')
graph.forward_inputs(graph.find_node_inputs_by_name(graph.graph_outputs[0], 'Squeeze'))
if 'anchors' in [node.name for node in graph.graph_outputs]:
graph.remove('anchors', remove_exclusive_dependencies=False)
if len(graph.find_nodes_by_op('GridAnchor_TRT')[0].input) < 1:
graph = add_anchor_input(graph)
if 'NMS' not in [node.name for node in graph.graph_outputs]:
graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
if 'NMS' not in [node.name for node in graph.graph_outputs]:
raise RuntimeError('bad graph_outputs')
return graph
def add_profiles(config, inputs, opt_profiles): logger.debug("=== Optimization Profiles ===") for i, profile in enumerate(opt_profiles): for inp in inputs: _min, _opt, _max = profile.get_shape(inp.name) logger.debug("{} - OptProfile {} - Min {} Opt {} Max {}".format(inp.name, i, _min, _opt, _max)) config.add_optimization_profile(profile)
def mark_outputs(network): last_layer = network.get_layer(network.num_layers-1) if not last_layer.num_outputs: logger.error("Last layer contains no outputs.") return
for i in range(last_layer.num_outputs):
network.mark_output(last_layer.get_output(i))
def check_network(network): if not network.num_outputs: logger.warning("No output nodes found, marking last layer's outputs as network outputs. Correct this if wrong.")
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
max_len = max([len(inp.name) for inp in inputs] + [len(out.name) for out in outputs])
logger.debug("=== Network Description ===")
for i, inp in enumerate(inputs):
logger.debug("Input {0} | Name: {1:{2}} | Shape: {3}".format(i, inp.name, max_len, inp.shape))
for i, out in enumerate(outputs):
logger.debug("Output {0} | Name: {1:{2}} | Shape: {3}".format(i, out.name, max_len, out.shape))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, choices=models) parser.add_argument('--quantization', type=str) parser.add_argument('--dir', required=True, type=str) parser.add_argument('--modelfile', required=True, type=str) args = parser.parse_args()
model = args.model
dir = args.dir
modelfile = args.modelfile
spec = model_specs(dir, model, modelfile)
quantization = args.quantization
print(args)
print(spec)
if trt.__version__[0] < '7':
ctypes.CDLL(LIB_FILE)
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
if args.quantization=="int8":
builder_flag_map = {
'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK,
'refittable': trt.BuilderFlag.REFIT,
'debug': trt.BuilderFlag.DEBUG,
'strict_types': trt.BuilderFlag.STRICT_TYPES,
'int8': trt.BuilderFlag.INT8,
}
else:
builder_flag_map = {
'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK,
'refittable': trt.BuilderFlag.REFIT,
'debug': trt.BuilderFlag.DEBUG,
'strict_types': trt.BuilderFlag.STRICT_TYPES,
'fp16': trt.BuilderFlag.FP16,
}
dynamic_graph = add_plugin(
gs.DynamicGraph(spec['input_pb']),
model,
spec)
_ = uff.from_tensorflow(
dynamic_graph.as_graph_def(),
output_nodes=['NMS'],
output_filename=spec['tmp_uff'],
text=True,
debug_mode=DEBUG_UFF)
for bs in BATCHES:
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, builder.create_builder_config() as config, trt.UffParser() as parser:
if args.quantization=="int8" and not builder.platform_has_fast_int8:
print('INT8 not supported on this platform. Falling back to FP16')
builder_flag_map = {
'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK,
'refittable': trt.BuilderFlag.REFIT,
'debug': trt.BuilderFlag.DEBUG,
'strict_types': trt.BuilderFlag.STRICT_TYPES,
'fp16': trt.BuilderFlag.FP16,
}
elif args.quantization=="int8" and builder.platform_has_fast_int8:
print("\n\nInside elif INT8 optimization\n\n")
config.int8_calibrator = SimpleCalibrator("coco_calib",(300,300),"calib.bin")
size = 32
config.max_workspace_size = 1<<size
builder.max_batch_size = bs
builder.max_workspace_size = 1<<size
for flag in builder_flag_map:
logger.info("Setting {}".format(builder_flag_map[flag]))
config.set_flag(builder_flag_map[flag])
logger.info("Building Engine...")
parser.register_input('Input', INPUT_DIMS)
parser.register_output('MarkOutput_0')
parser.parse(spec['tmp_uff'], network)
print("its builder_here!",builder)
engine = builder.build_engine(network,config)
buf = engine.serialize()
with open(os.path.join(DIR_NAME, model+f"_bs{bs}_"+f"_{quantization}_"+".enp"), 'wb') as f:
f.write(buf)
if name == 'main': main() `
@wadhwasahil @kanakedge Do you see a "ssd_mobilenet_v2_coco.pbtxt" file generated by "build_engine.py"? Could you post the content of that file?
Yes "ssd_mobilenet_v2_coco.pbtxt" is generated
@kanakedge Please share the content of "ssd_mobilenet_v2_coco.pbtxt"? It would help to debug the issue.
I am also facing the same issue.
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
This actually worked for me.
@kanakedge Please share the content of "ssd_mobilenet_v2_coco.pbtxt"? It would help to debug the issue.
Please check this drive link: https://drive.google.com/drive/folders/1GxRNlUU_KwIFk-pLxEBd_iSK-5H2h2QM?usp=sharing
I am also facing the same issue.
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
This actually worked for me.
@wadhwasahil Can you share your build_engine.py?
@kanakedge Sorry for my late reply.
Please check this drive link: https://drive.google.com/drive/folders/1GxRNlUU_KwIFk-pLxEBd_iSK-5H2h2QM?usp=sharing
The key lines to note in this "ssd_mobilenet_v2_coco.pbtxt" are the inputs
to the NMS
node:
nodes {
id: "NMS"
inputs: "concat_box_conf"
inputs: "Squeeze"
inputs: "concat_priorbox"
operation: "_NMS_TRT"
Also referring to "inputOrder" in documentation of the NMS plugin:
Specifies the order of inputs {loc_data, conf_data, priorbox_data}, in other words, inputOrder[0] is for loc_data, inputOrder[1] is for conf_data and inputOrder[2] is for priorbox_data. For example, if your inputs in the memory are in the order of loc_data, priorbox_data, conf_data, then inputOrder should be [0, 2, 1].
So the correct setting of "inputOrder" for your "ssd_mobilenet_v2_coco" model should be: [1, 0, 2].
Close due to no response.
I am trying to follow GitHub - jkjung-avt/tensorrt_demos: TensorRT MODNet, YOLOv4, YOLOv3, SSD, MTCNN, and GoogLeNet to convert custom trained SSD Mobilenet V2 model to TensorRT format. I get this error while running the conversion.
assertion/opt/tensorrt/TensorRT/plugin/nmsPlugin/nmsPlugin.cpp,246
I have tried changing the inputOrder on this line https://github.com/jkjung-avt/tensorrt_demos/blob/a061e44a82e1ca097f57e5a32f20daf5bebe7ade/ssd/build_engine.py#L58 to [0, 2, 1] as the pbtxt file generated contains the following format
id: "NMS" inputs: "Squeeze" inputs: "concat_priorbox" inputs: "concat_box_conf" Environment TensorRT Version: 7.0.0 Tensorflow. Version: 1.15
NVIDIA LINK - https://forums.developer.nvidia.com/t/assertion-error-while-converting-mobilenet-v2-to-trt/214654