I'm doing real-time inference with a pre-trained pillar-based center point model, the point-cloud is received from ROS-Bag frame by frame. I modified the codes directly from demos/pcd_demo.py. The inference time for each frame is about 150 ms, the time-test code is here:
torch.cuda.synchronize()
start_time = time.time()
print("START TIME")
with torch.no_grad():
result = self.model(return_loss=False, rescale=False, **self.data)
torch.cuda.synchronize()
print('inference time:',time.time()-start_time)
My device is RTX3070, I think the correct inference time for each frame is about 30~50 ms, could you please give me some clue about the problem? Thanks in advance!
The following attached the whole codes for your reference:
here is the whole codes:
def get_xyz_points(cloud_array, remove_nans=True, dtype=np.float32):
'''
'''
if remove_nans:
mask = np.isfinite(cloud_array['x']) & np.isfinite(cloud_array['y']) & np.isfinite(cloud_array['z'])
cloud_array = cloud_array[mask]
points = np.zeros(cloud_array.shape + (4,), dtype=dtype)
points[...,0] = cloud_array['z']
points[...,1] = -cloud_array['y']
points[...,2] = cloud_array['x']
points[...,3] = cloud_array['intensity']/255
return points
def xyz_array_to_pointcloud2(points_sum, stamp=None, frame_id=None):
'''
Create a sensor_msgs.PointCloud2 from an array of points.
'''
msg = PointCloud2()
if stamp:
msg.header.stamp = stamp
if frame_id:
msg.header.frame_id = frame_id
msg.height = 1
msg.width = points_sum.shape[0]
msg.fields = [
PointField('x', 0, PointField.FLOAT32, 1),
PointField('y', 4, PointField.FLOAT32, 1),
PointField('z', 8, PointField.FLOAT32, 1)
# PointField('i', 12, PointField.FLOAT32, 1)
]
msg.is_bigendian = False
msg.point_step = 12
msg.row_step = points_sum.shape[0]
msg.is_dense = int(np.isfinite(points_sum).all())
msg.data = np.asarray(points_sum, np.float32).tostring()
return msg
def yaw2quaternion(yaw: float) -> Quaternion:
return Quaternion(axis=[0,0,1], radians=yaw)
class Processor_ROS:
def __init__(self, args):
self.args= args
def initialize(self):
# build the model from a config file and a checkpoint file
torch.backends.cudnn.benchmark = True
self.model = init_model(self.args.config, self.args.checkpoint, device=self.args.device)
cfg = self.model.cfg
self.device = next(self.model.parameters()).device # model device
# build the data pipeline
test_pipeline = deepcopy(cfg.data.test.pipeline)
test_pipeline = Compose(test_pipeline)
box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)
data = dict(
pts_filename="/Innovusion/Data/Waymo/testing/bin/0000000.bin",
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d,
# for ScanNet demo we need axis_align_matrix
ann_info=dict(axis_align_matrix=np.eye(4)),
sweeps=[],
# set timestamp = 0
timestamp=[0],
img_fields=[],
bbox3d_fields=[],
pts_mask_fields=[],
pts_seg_fields=[],
bbox_fields=[],
mask_fields=[],
seg_fields=[])
data = test_pipeline(data)
self.data = collate([data], samples_per_gpu=1)
def run(self, points):
# data['img_metas'] = None
self.data['points'] = [[points]]
if next(self.model.parameters()).is_cuda:
# scatter to specified GPU
self.data = scatter(self.data, [self.device.index])[0]
else:
# this is a workaround to avoid the bug of MMDataParallel
self.data['img_metas'] = self.data['img_metas'][0].data
self.data['points'] = self.data['points'][0].data
# forward the model
torch.cuda.synchronize()
start_time = time.time()
with torch.no_grad():
result = self.model(return_loss=False, rescale=False, **self.data)
torch.cuda.synchronize()
print('inference time:',time.time()-start_time)
boxes_lidar = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
scores = result[0]['pts_bbox']['scores_3d'].numpy()
types = result[0]['pts_bbox']['labels_3d'].numpy()
if self.args.score_thr > 0:
inds = scores > self.args.score_thr
scores = scores[inds]
types = types[inds]
return boxes_lidar, scores, types
def rslidar_callback(msg):
arr_bbox = BoundingBoxArray()
msg_cloud = ros_numpy.point_cloud2.pointcloud2_to_array(msg)
np_points = get_xyz_points(msg_cloud, True)
dim_5 = np.zeros((np_points.shape[0],1)).astype(np.float32) # !
np_points = np.column_stack((np_points, dim_5))
points = torch.from_numpy(np_points)
boxes_lidar, scores, types = proc_1.run(points)
print("len scores:", len(scores))
if scores.size != 0:
for i in range(scores.size):
bbox = BoundingBox()
bbox.header.frame_id = msg.header.frame_id
bbox.header.stamp = rospy.Time.now()
q = yaw2quaternion(float(-boxes_lidar[i][6]))
bbox.pose.orientation.x = q[3]
bbox.pose.orientation.y = -q[2]
bbox.pose.orientation.z = q[1]
bbox.pose.orientation.w = q[0]
bbox.pose.position.x = float(boxes_lidar[i][2])
bbox.pose.position.y = -float(boxes_lidar[i][1])
bbox.pose.position.z = float(boxes_lidar[i][0])
bbox.dimensions.x = float(boxes_lidar[i][5])
bbox.dimensions.y = float(boxes_lidar[i][4])
bbox.dimensions.z = float(boxes_lidar[i][3])
bbox.value = scores[i]
bbox.label = int(types[i])
arr_bbox.boxes.append(bbox)
arr_bbox.header.frame_id = msg.header.frame_id
arr_bbox.header.stamp = msg.header.stamp
if len(arr_bbox.boxes) is not 0:
pub_arr_bbox.publish(arr_bbox)
arr_bbox.boxes = []
else:
arr_bbox.boxes = []
pub_arr_bbox.publish(arr_bbox)
def main():
global proc_1
global pub_arr_bbox
parser = ArgumentParser()
# parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('--config', default="/Innovusion/Projects/mmdetection3d/checkpoints/centerpoint_pillar_waymo_one_baseline/centerpoint_032pillar_second_secfpn_circlenms_4x8_cyclic_20e_waymo.py", help='Config file')
parser.add_argument('--checkpoint', default="/Innovusion/Projects/mmdetection3d/checkpoints/centerpoint_pillar_waymo_one_baseline/epoch_30.pth", help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.0, help='bbox score threshold')
args = parser.parse_args()
proc_1 = Processor_ROS(args)
proc_1.initialize()
rospy.init_node('mmdetection3d_ros_node')
sub_lidar_topic = [ "/velodyne_points",
"/top/rslidar_points",
"/points_raw",
"/lidar_protector/merged_cloud",
"/merged_cloud",
"/lidar_top",
"/roi_pclouds",
"/livox/lidar",
"/iv_points",
"/SimOneSM_PointCloud_0"]
sub_ = rospy.Subscriber(sub_lidar_topic[8], PointCloud2, rslidar_callback, queue_size=1, buff_size=2**24)
pub_arr_bbox = rospy.Publisher("pp_boxes", BoundingBoxArray, queue_size=1)
print("[+] mmdetection3d ros_node has started!")
rospy.spin()
if __name__ == '__main__':
main()
Hi!
I'm doing real-time inference with a pre-trained pillar-based center point model, the point-cloud is received from ROS-Bag frame by frame. I modified the codes directly from
demos/pcd_demo.py
. The inference time for each frame is about 150 ms, the time-test code is here:My device is RTX3070, I think the correct inference time for each frame is about 30~50 ms, could you please give me some clue about the problem? Thanks in advance!
The following attached the whole codes for your reference:
here is the whole codes: