Closed thfylsty closed 1 year ago
The implementation of onnx_export will be released in the near future.
@ymlab Hi, How is the progress of the implementation of onnx_export? look forward to your reply, thanks. I want to validate the FPS of the model after using TensorRT acceleration.
@ymlab Hi, How is the progress of the implementation of onnx_export? I want to validate the FPS of the model after using TensorRT acceleration.
I have completed exporttoonnx.py and accelerated it with tensorrt, which is almost consistent with the author's announcement.There are tensorrt plugins in the author's code, just register them in tensorrt.
Just refer to the following code.
script/view_tranform_cuda/cpp/src/cuda_accelerated_functions_cu_project_v3.cu
Author exports two onnx 2d and 3d, and uses project.cu for projection calculation betweern them . You can also export one onnx and register the projection as a plugin. I try success.
@ymlab Hi, How is the progress of the implementation of onnx_export? I want to validate the FPS of the model after using TensorRT acceleration.
I have completed exporttoonnx.py and accelerated it with tensorrt, which is almost consistent with the author's announcement.There are tensorrt plugins in the author's code, just register them in tensorrt.
@thfylsty thanks for your reply. can you share your exporttoonnx.py and use TensorRT to accelerate its code?
I deleted the time sequence and only used 3 cameras. The code is for reference only.
The author has also completed this part of the code, but it may not be completely perfect? I modified it according to his export code.
The most important thing is to input the image as "input", and correctly complete the 2D part. "project" uses Trt plugin to complete it, and complete the 3D part. Finally, decode the results in Trt.
class TestPlugin(torch.autograd.Function):
@staticmethod
def symbolic(g, input):
return g.op("Plugin", input, name_s="Project2Dto3D", info_s="")
# 这里的forward只是为了让onnx导出时可以执行,实际上写与不写意义不大,只需要返回同等的输出维度即可
@staticmethod
def forward(ctx, input):
return torch.zeros((1,64,200,200,4))
@DETECTORS.register_module()
class FastBEVTRT(FastBEV):
def __init__(self,**kwargs):
super().__init__(**kwargs)
n_images = self.camera_num
stride = 4
# img_extrinsic = np.ones([n_images,4,4]) # camnum x 4 x 4 np
# img_intrinsic = np.ones([4,4]) # camnum x 4 x 4 np
img_extrinsic = np.array([[[0.0293,-0.99,0.0061,-0.57],[0.005,-0.00598,-0.999,0.78],[0.99,0.029,0.0055,0.675],[0,0,0,1]]]) # camnum x 4 x 4 np
img_intrinsic = np.array([[[454.477,0,662.663,0],[0,453.804,352.785,0],[0,0,1,0],[0,0,0,1]]]) # 4 x 4 np
img_extrinsic = img_extrinsic.repeat(n_images, 0)
img_intrinsic = img_intrinsic.repeat(n_images, 0)
for i in range(n_images):
img_extrinsic[i] *= (1+i/10)
point_cloud_range = np.array([-50, -50, -5, 50, 50, 3])
origin = (point_cloud_range[:3] + point_cloud_range[3:]) / 2.
img_meta = {"lidar2img":{"extrinsic":img_extrinsic,"intrinsic":img_intrinsic,"origin":origin},}
img_shape=[512,512]
self.img_shape = img_shape
self.img_meta = img_meta
self.fea_shape = [img_shape[0]/4,img_shape[0]/4]
self.height,self.width = img_shape[0]//stride,img_shape[1]//stride
extrinsic_noise = 0
device = "cpu"
self.device = device
self.upsample_list = []
scale = [2,4,8]
for i in range(3):
self.upsample_list.append(torch.nn.Upsample(size=None, scale_factor=scale[i], mode='bilinear', align_corners=False))
def onnx_export_2d(self, img):
x = self.backbone(img)
feas = list(self.neck(x))
for i in range(3):
feas[i+1] = self.upsample_list[i](feas[i+1])
x = torch.cat(feas, dim=1)
x = self.neck_fuse(x)
return x
def onnx_export_3d(self, x, _=None):
x = self.neck_3d(x) # [1, 64, 200, 200, 4])
cls_score, bbox_pred, dir_cls_preds = self.bbox_head(x)
cls_score = cls_score[0].sigmoid()
# # for onnx export
x = torch.cat((cls_score, bbox_pred[0], dir_cls_preds[0]),dim=1)
return x
def project_torch(self,features):
stride_i = math.ceil(self.img_shape[-1] / features.shape[-1]) # P4 880 / 32 = 27.5
projection = self._compute_projection(
self.img_meta, stride_i, noise=self.extrinsic_noise).to(features.device)
n_voxels, voxel_size = self.n_voxels[0], self.voxel_size[0]
points = get_points( # [3, vx, vy, vz]
n_voxels=torch.tensor(n_voxels),
voxel_size=torch.tensor(voxel_size),
origin=torch.tensor(self.img_meta["lidar2img"]["origin"]),
).to(features.device)
volume = backproject_inplace(
features[:, :, :self.height, :self.width], points, projection) # [c,
volume = volume.unsqueeze(0)
return volume
def forward(
self,
img
):
# 把bs维度消掉,在fastbev中,bs是用来存相机的
img = img.squeeze(0) ## torch.Size([1, cn, 3, 512, 512]) --> torch.Size([cn, 3, 512, 512])
# print("x fea",img.sum(),img.mean(),img.max(),img.min())
x = self.onnx_export_2d(img) # out: n 64 1/4 1/4size --> torch.Size([3, 64, 128, 128]) [3, 128, 128,64]
# print("x fea",x.sum(),x.mean(),x.max(),x.min())
## for tensorrt 打开这里用来trt
x = x.permute(0, 2, 3, 1)
x = TestPlugin.apply(x) # out: 64 voxel[x y z] --> torch.Size([1, 64, 200, 200, 4])
# x = self.forward_trt(x) # out: 64 voxel[x y z] --> torch.Size([1, 64, 200, 200, 4])
## for torch 打开这里用来pytorch 验证trt结果是否一致
# x = self.project_torch(x) # out: 64 voxel[x y z] --> torch.Size([1, 64, 200, 200, 4])
# print("x fea",x.sum(),x.abs().sum(),x.mean(),x.max(),x.min())
x = self.onnx_export_3d(x) #out: 1 192 --> torch.Size([1, 192, 100, 100])
# print("x fea",x.sum(),x.mean(),x.max(),x.min())
return x
different camera number cost time without sequence on Jeston AGX Xavier
cam-num time(ms) 1 25.889 3 34.464 6 46.967 8 55.560
@cyn-liu the export.py
notice cfg.model.type = cfg.model.type + 'TRT'
and n_image = 8
def main():
args = parse_args()
assert args.out or args.eval or args.format_only or args.show \
or args.show_dir, \
('Please specify at least one operation (save/eval/format/show the '
'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"')
if args.eval and args.format_only:
raise ValueError('--eval and --format_only cannot be both specified')
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
print(args.config)
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# import modules from string list.
if cfg.get('custom_imports', None):
from mmcv.utils import import_modules_from_strings
import_modules_from_strings(**cfg['custom_imports'])
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
# in case the test dataset is concatenated
samples_per_gpu = 1
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
if samples_per_gpu > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
samples_per_gpu = max(
[ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
if samples_per_gpu > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# set random seeds
if args.seed is not None:
set_random_seed(args.seed, deterministic=args.deterministic)
# build the model and load checkpoint
if args.vis:
nms_thr = 0.0001
try:
cfg.model.test_cfg.nms_thr = nms_thr
except:
print('### imvoxelnet except in train.py ###')
cfg.test_cfg.nms_thr = nms_thr
if args.extrinsic_noise > 0:
for i in range(3):
print('### test camera extrinsic robustness ###')
cfg.model.extrinsic_noise = args.extrinsic_noise
cfg.model.train_cfg = None
cfg.model.type = cfg.model.type + 'TRT'
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
model_path = "weights/pth/export.pth"
from mmcv.runner import save_checkpoint,load_checkpoint
if os.path.exists(model_path):
load_checkpoint(model,model_path)
# model = torch.load(model_path)
else:
save_checkpoint(model,model_path)
# torch.save(model_path,model_path)
# fp16_cfg = cfg.get('fp16', None)
# if fp16_cfg is not None:
# wrap_fp16_model(model)
# load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model)
n_image = 8
img = torch.ones([1,n_image, 3, 512, 512])
# x = model(img)
model.eval()
onnx_path = "weights/onnxs/fastbev_plugin_n"+str(n_image)+".onnx"
with torch.no_grad():
torch.onnx.export(
model,
(img),
onnx_path,
verbose=True,
opset_version=11,
input_names=[
'input'
],
output_names=["output"],
enable_onnx_checker=False
)
from onnxsim import simplify
import onnx
model = onnx.load(onnx_path)
# 简化模型可视化
# model_simp, check = simplify(model,skip_unknown=False)
# 增加维度信息与保存
onnx.save(onnx.shape_inference.infer_shapes(model), onnx_path)
@thfylsty Thank you very much for sharing. I saw the cost time on different camera numbers you shared, and I have a question: Can you change the number of cameras for model(shared by the author) inference? (6 cameras by default, Nuscenes dataset) . Or you retrained the model?
Thank you very much for your answer.
could you publish your code? thankyou
@xll426 我(即将)更新我的导出和部署(整理完就更新)版本,但是已经在作者的基础上删除了时序推理了。而且Tensorrt的plugin和onnx导出其实也是按照作者大大的代码进行修改和二次加工的。体验原版的话,建议还是直接用作者的代码。
pytorch和onnx导出 https://github.com/thfylsty/FastBEV
Tensorrt加速 https://github.com/thfylsty/FastBEV-TensorRT
1,升级了MM框架到最新的大版本, 2,可以导出onnx 3,删除时序(因为我不用~) 4,可以更改相机个数 5,使用Tensorrt 部署
懒得打英文啦~v~
@thfylsty 看见您的仓库删除了,请问后面还有开源计划吗
@thfylsty 看见您的仓库删除了,请问后面还有开源计划吗
因为还有一些bug存在,而且对于公开数据集的兼容不好,评估函数也没完成。部署代码也不能应于公开数据集。放出来太误人了~,不好意思还开着这半成品源码。
@thfylsty 好的,谢谢
@thfylsty 好的,谢谢
作者开源的cuda部分源码基本上都可以直接用,我也是基于他的改的。
Thank you very much for your answer.