nutonomy / nuscenes-devkit

The devkit of the nuScenes dataset.
https://www.nuScenes.org
Other
2.2k stars 617 forks source link

Mapping lane and road divider markup to camera images #948

Closed glebss closed 1 year ago

glebss commented 1 year ago

Hello.

I am trying to extract the road_divider and lane_divider layers from the map and convert them to the markup in the camera image. I use the source code from the render_map_in_image function as a basis.

Below is the code I use:

import numpy as np
from PIL import Image
from pyquaternion import Quaternion
import matplotlib.pyplot as plt
from nuscenes.map_expansion.map_api import NuScenesMap
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points

dataroot = '/mnt/data/datasets/nuscenes/v1.0-trainval'
nusc = NuScenes(version='v1.0-trainval', dataroot=dataroot, verbose=True)
nusc_map = NuScenesMap(dataroot=dataroot, map_name='singapore-onenorth')
patch_radius = 1000
near_plane = 1e-8

sample_token = nusc.sample[0]['token']
layer_names = ['road_divider', 'lane_divider']
camera_channel = 'CAM_FRONT'

sample_record = nusc.get('sample', sample_token)
scene_record = nusc.get('scene', sample_record['scene_token'])
log_record = nusc.get('log', scene_record['log_token'])
log_location = log_record['location']
assert log_location == nusc_map.map_name

cam_token = sample_record['data'][camera_channel]
cam_record = nusc.get('sample_data', cam_token)
cam_path = nusc.get_sample_data_path(cam_token)
im = Image.open(cam_path)
im_size = im.size
cs_record = nusc.get('calibrated_sensor', cam_record['calibrated_sensor_token'])
cam_intrinsic = np.array(cs_record['camera_intrinsic'])

poserecord = nusc.get('ego_pose', cam_record['ego_pose_token'])
ego_pose = poserecord['translation']
box_coords = (
    ego_pose[0] - patch_radius,
    ego_pose[1] - patch_radius,
    ego_pose[0] + patch_radius,
    ego_pose[1] + patch_radius,
)

records_in_patch = nusc_map.explorer.get_records_in_patch(box_coords, layer_names)
fig = plt.figure(figsize=(9, 16))
ax = fig.add_axes([0, 0, 1, 1])
ax.set_xlim(0, im_size[0])
ax.set_ylim(0, im_size[1])
ax.imshow(im)
for layer_name in layer_names:
    for token in records_in_patch[layer_name]:
        record = nusc_map.explorer.map_api.get(layer_name, token)
        line_token = record['line_token']
        line = nusc_map.explorer.map_api.extract_line(line_token)
        points = np.array(line.xy)
        points = np.vstack((points, np.zeros((1, points.shape[1]))))

         # Transform into the ego vehicle frame for the timestamp of the image.
        points = points - np.array(poserecord['translation']).reshape((-1, 1))
        points = np.dot(Quaternion(poserecord['rotation']).rotation_matrix.T, points)

        # Transform into camera.
        points = points - np.array(cs_record['translation']).reshape((-1, 1))
        points = np.dot(Quaternion(cs_record['rotation']).rotation_matrix.T, points)

        depths = points[2, :]
        behind = depths < near_plane
        if np.all(behind):
            continue
        points = view_points(points, cam_intrinsic, normalize=True)
        inside = np.ones(points.shape[1], dtype=bool)
        inside = np.logical_and(inside, points[0, :] > 1)
        inside = np.logical_and(inside, points[0, :] < im.size[0] - 1)
        inside = np.logical_and(inside, points[1, :] > 1)
        inside = np.logical_and(inside, points[1, :] < im.size[1] - 1)
        if np.all(np.logical_not(inside)):
            continue
        points = points[:2, :]
        points = [(p0, p1) for (p0, p1) in zip(points[0], points[1])]
        xs = [p[0] for p in points]
        ys = [p[1] for p in points]
        plt.scatter(xs, ys, s=1, c='r')

plt.axis('off')
ax.invert_yaxis()

However, this code doesn't work right. The resulting rendered lines look like this: image

In what part of the presented code am I doing something wrong? Any help is appreciated.

Thanks.

whyekit-motional commented 1 year ago

@glebss I think you are close :smile:

The dots in your rendering probably belong to lane dividers / road dividers that are behind the hill

I made some edits to your code (and left you some comments to explain why I made those changes), and chosen another sample to better show the rendered lane dividers / road dividers:

import numpy as np
from PIL import Image
from pyquaternion import Quaternion
import matplotlib.pyplot as plt
from nuscenes.map_expansion.map_api import NuScenesMap
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points

dataroot = '/data/sets/nuscenes/'
nusc = NuScenes(version='v1.0-mini', dataroot=dataroot, verbose=True)
nusc_map = NuScenesMap(dataroot=dataroot, map_name='singapore-onenorth')
patch_radius = 50  # 1000 is too much, and will lead to map objects which are too far away from the camera being rendered.
near_plane = 1e-8

sample_token = nusc.sample[0]['token']
layer_names = ['road_divider', 'lane_divider']
camera_channel = 'CAM_FRONT'

sample_record = nusc.get('sample', sample_token)
scene_record = nusc.get('scene', sample_record['scene_token'])
log_record = nusc.get('log', scene_record['log_token'])
log_location = log_record['location']
assert log_location == nusc_map.map_name

cam_token = sample_record['data'][camera_channel]
cam_record = nusc.get('sample_data', cam_token)
cam_path = nusc.get_sample_data_path(cam_token)
im = Image.open(cam_path)
im_size = im.size
cs_record = nusc.get('calibrated_sensor', cam_record['calibrated_sensor_token'])
cam_intrinsic = np.array(cs_record['camera_intrinsic'])

poserecord = nusc.get('ego_pose', cam_record['ego_pose_token'])
ego_pose = poserecord['translation']
box_coords = (
    ego_pose[0] - patch_radius,
    ego_pose[1] - patch_radius,
    ego_pose[0] + patch_radius,
    ego_pose[1] + patch_radius,
)

records_in_patch = nusc_map.explorer.get_records_in_patch(box_coords, layer_names)
fig = plt.figure(figsize=(9, 16))
ax = fig.add_axes([0, 0, 1, 1])
ax.set_xlim(0, im_size[0])
ax.set_ylim(0, im_size[1])
ax.imshow(im)
for layer_name in layer_names:
    for token in records_in_patch[layer_name]:
        record = nusc_map.explorer.map_api.get(layer_name, token)
        line_token = record['line_token']
        line = nusc_map.explorer.map_api.extract_line(line_token)
        points = np.array(line.xy)
        points = np.vstack((points, np.zeros((1, points.shape[1]))))

        # Transform into the ego vehicle frame for the timestamp of the image.
        points = points - np.array(poserecord['translation']).reshape((-1, 1))
        points = np.dot(Quaternion(poserecord['rotation']).rotation_matrix.T, points)

        # Transform into camera.
        points = points - np.array(cs_record['translation']).reshape((-1, 1))
        points = np.dot(Quaternion(cs_record['rotation']).rotation_matrix.T, points)

#        depths = points[2, :]
#         behind = depths < near_plane
#         if np.all(behind):
#             continue

        # Get points belonging to the map objects which are not behind the camera, 
        # and not too far away in front of the camera. 
        depths = points[2, :]
        points = points[:, (depths > 0) & (depths < patch_radius)] 

        points = view_points(points, cam_intrinsic, normalize=True)
        inside = np.ones(points.shape[1], dtype=bool)
        inside = np.logical_and(inside, points[0, :] > 1)
        inside = np.logical_and(inside, points[0, :] < im.size[0] - 1)
        inside = np.logical_and(inside, points[1, :] > 1)
        inside = np.logical_and(inside, points[1, :] < im.size[1] - 1)
        if np.all(np.logical_not(inside)):
            continue
        points = points[:2, :]
        points = [(p0, p1) for (p0, p1) in zip(points[0], points[1])]
        xs = [p[0] for p in points]
        ys = [p[1] for p in points]
#         plt.scatter(xs, ys, s=1, c='r') 
        plt.plot(xs, ys, 'ro', linestyle="--")  # Render using lines instead, since the markings are lines. 

plt.axis('off')
ax.invert_yaxis()

The above edits produce the following rendering: image

glebss commented 1 year ago

@whyekit-motional Thank you for your support!

Indeed, with your modifications now it looks much better.