NVlabs / FoundationPose

[CVPR 2024 Highlight] FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects
https://nvlabs.github.io/FoundationPose/
Other
1.47k stars 196 forks source link

Can I visualize run_linemod.py rgb images just like rundemo #33

Closed wsq1010 closed 6 months ago

wsq1010 commented 7 months ago

Thanks for your contribution, can I see the lined rgb image by changing the debug parameter when running run_linemod.py

wsq1010 commented 7 months ago

My vis_refiner.png is blank

wenbowen123 commented 7 months ago

we haven't implemented this, but you can refer to the run_demo.py. It shouldn't be hard to adapt here.

huiwenzhang commented 6 months ago

Thanks for your contribution, can I see the lined rgb image by changing the debug parameter when running run_linemod.py

This is how I visualize the linemod, modify the function run_pose_estimation_worker() in run_linemod.py as:

def run_pose_estimation_worker(reader, i_frames, est:FoundationPose=None, debug=0, ob_id=None, device='cuda:0'):
  torch.cuda.set_device(device)
  est.to_device(device)
  est.glctx = dr.RasterizeCudaContext(device=device)

  result = NestDict()

 # add code here, to get mesh
  mesh_file = reader.get_gt_mesh_file(ob_id)
  mesh = trimesh.load(mesh_file)
  # change mesh unit to meter
  mesh.vertices = mesh.vertices/1000
  to_origin, extents = trimesh.bounds.oriented_bounds(mesh)
  bbox = np.stack([-extents/2, extents/2], axis=0).reshape(2,3)

  for i, i_frame in enumerate(i_frames):
    logging.info(f"{i}/{len(i_frames)}, i_frame:{i_frame}, ob_id:{ob_id}")
    video_id = reader.get_video_id()
    color = reader.get_color(i_frame)
    depth = reader.get_depth(i_frame)
    id_str = reader.id_strs[i_frame]
    H,W = color.shape[:2]

    debug_dir =est.debug_dir

    ob_mask = get_mask(reader, i_frame, ob_id, detect_type=detect_type)
    if ob_mask is None:
      logging.info("ob_mask not found, skip")
      result[video_id][id_str][ob_id] = np.eye(4)
      return result

    est.gt_pose = reader.get_gt_pose(i_frame, ob_id)

    pose = est.register(K=reader.K, rgb=color, depth=depth, ob_mask=ob_mask, ob_id=ob_id)
    logging.info(f"pose:\n{pose}")

    if debug>=3:
      m = est.mesh_ori.copy()
      tmp = m.copy()
      tmp.apply_transform(pose)
      tmp.export(f'{debug_dir}/model_tf.obj')

    # add code here, visulization
    center_pose = pose@np.linalg.inv(to_origin)
    vis = draw_posed_3d_box(reader.K, img=color, ob_in_cam=center_pose, bbox=bbox)
    vis = draw_xyz_axis(color, ob_in_cam=center_pose, scale=0.1, K=reader.K, thickness=2, transparency=0, is_input_rgb=True)
    cv2.imshow('1', vis[...,::-1])
    cv2.waitKey(1)

    result[video_id][id_str][ob_id] = pose

  return result