shanice-l / gdrnpp_bop2022

PyTorch Implementation of GDRNPP, winner (most of the awards) of the BOP Challenge 2022 at ECCV'22
Apache License 2.0
215 stars 49 forks source link

script for visualizing the results on tless dataset #100

Closed monajalal closed 5 months ago

monajalal commented 5 months ago

While I notice there is a script for visualizing the results on ycbv dataset, I don't find a script for visualizing the results on tless dataset. Could you please share with us the script for visualizing the inference results of tless after retrieving the csv file?

(gdrnpp) mona@ada:~/gdrnpp_bop2022/core/gdrn_modeling/tools/tless$ ls
total 84K
drwxrwxr-x 13 mona mona 4.0K Nov 21 10:40 ..
drwxrwxr-x  2 mona mona 4.0K Nov 21 10:40 .
-rw-rw-r--  1 mona mona 8.5K Nov 21 10:40 tless_primesense_1_gen_xyz.py
-rw-rw-r--  1 mona mona 8.4K Nov 21 10:40 tless_pbr_1_gen_xyz.py
-rw-rw-r--  1 mona mona 6.2K Nov 21 10:40 tless_3_test_alignK.py
-rw-rw-r--  1 mona mona 7.1K Nov 21 10:40 tless_3b_gen_test_alignK.py
-rw-rw-r--  1 mona mona 7.3K Nov 21 10:40 tless_2_test_resize.py
-rw-rw-r--  1 mona mona 7.0K Nov 21 10:40 tless_2b_gen_train_primesense_resize.py
-rw-rw-r--  1 mona mona 2.1K Nov 21 10:40 tless_1_compute_keypoints_3d.py
-rw-rw-r--  1 mona mona 1.9K Nov 21 10:40 tless_1_compute_fps.py
-rw-rw-r--  1 mona mona 2.2K Nov 21 10:40 test_eular_rotation.py
-rw-rw-r--  1 mona mona 1.9K Nov 21 10:40 convert_det_to_our_format.py
-rw-rw-r--  1 mona mona 2.8K Nov 21 10:40 combine_det_pose.py    
(gdrnpp) mona@ada:~/gdrnpp_bop2022/core/gdrn_modeling/tools/tless$ ls ../ycbv/
total 36K
drwxrwxr-x 13 mona mona 4.0K Nov 21 10:40 ..
drwxrwxr-x  2 mona mona 4.0K Jan  2 13:32 .
-rw-rw-r--  1 mona mona 7.7K Nov 21 10:40 ycbv_3_vis_poses_full.py
-rw-rw-r--  1 mona mona 8.3K Nov 21 10:40 ycbv_2_vis_poses.py
-rw-rw-r--  1 mona mona 1.9K Nov 21 10:40 ycbv_1_compute_fps.py
-rw-rw-r--  1 mona mona    0 Nov 21 10:40 __init__.py
-rw-rw-r--  1 mona mona 1.9K Nov 21 10:40 convert_det_to_our_format.py

I have:

(gdrnpp) mona@ada:~/gdrnpp_bop2022/output$ ls gdrn/tless/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_tless/inference_model_final_wo_optim/tless_bop_test_primesense/
total 13M
drwxrwxr-x 3 mona mona 4.0K Jan  3 09:51 ..
drwxrwxr-x 3 mona mona 4.0K Jan  3 09:53 .
drwxrwxr-x 7 mona mona 4.0K Jan  3 10:07 convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-tless-test-iter0_tless-test
-rw-rw-r-- 1 mona mona  13M Jan  3 09:51 convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-tless-test-iter0_tless-test.csv

https://github.com/shanice-l/gdrnpp_bop2022/issues/22#issuecomment-1780906781

monajalal commented 5 months ago

This worked for me:

import mmcv
import os
import os.path as osp
import numpy as np
import sys
from tqdm import tqdm
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
import torch

import pandas as pd

cur_dir = osp.dirname(osp.abspath(__file__))
sys.path.insert(0, osp.join(cur_dir, "../../../../"))

from lib.vis_utils.colormap import colormap
from lib.utils.mask_utils import cocosegm2mask, get_edge
from core.utils.data_utils import crop_resize_by_warp_affine, read_image_mmcv
from core.gdrn_modeling.datasets.dataset_factory import register_datasets
from lib.pysixd import misc
from transforms3d.quaternions import quat2mat
from lib.egl_renderer.egl_renderer_v3 import EGLRenderer

from core.utils.my_visualizer import MyVisualizer, _GREY, _GREEN, _BLUE

def load_predicted_csv(fname):
    df = pd.read_csv(fname)
    info_list = df.to_dict("records")
    return info_list

def parse_Rt_in_csv(_item):
    return np.array([float(i) for i in _item.strip(" ").split(" ")])

out_size = 512
score_thr = 0.3
colors = colormap(rgb=False, maximum=255)

id2obj = {i: str(i) for i in range(1, 31)}

objects = list(id2obj.values())

tensor_kwargs = {"device": torch.device("cuda"), "dtype": torch.float32}
image_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
seg_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
# image_tensor = torch.empty((480, 640, 4), **tensor_kwargs).detach()

model_dir = "datasets/BOP_DATASETS/tless/models_cad"

model_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.ply") for obj_id in id2obj]

ren = EGLRenderer(
    model_paths,
    vertex_scale=0.001,
    use_cache=True,
    width=out_size,
    height=out_size,
)

# NOTE:
# pred_path = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/a6-cPnP-AugAAETrunc-BG0.5-lmo-real-pbr0.1-40e-test_lmo_test_preds.pkl"
pred_path = "output/gdrn/tless/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_tless/inference_model_final_wo_optim/tless_bop_test_primesense/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-tless-test-iter0_tless-test.csv"
# vis_dir = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/vis_gt_pred"
vis_dir = "output/gdrn/tless/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_tless/inference_model_final_wo_optim/tless_bop_test_primesense/tless_vis_gt_pred_full"

mmcv.mkdir_or_exist(vis_dir)

preds_csv = load_predicted_csv(pred_path)

preds = {}
for item in preds_csv:
    scene_id = item['scene_id']
    im_id = item['im_id']
    obj_id = item['obj_id']
    obj_name = id2obj[obj_id] 
    file_name = f"{scene_id}/{im_id}"  
    # print('file_name: ', file_name)
    if obj_name not in preds:
        preds[obj_name] = {}

    if file_name not in preds[obj_name]:
        preds[obj_name][file_name] = {}

    preds[obj_name][file_name]["score"] = item["score"]
    preds[obj_name][file_name]["R"] = parse_Rt_in_csv(item["R"]).reshape(3, 3)
    preds[obj_name][file_name]["t"] = parse_Rt_in_csv(item["t"])

# dataset_name = "tless_bop_test"
# dataset_name  = "tless_train_primesense"
dataset_name = "tless_bop_test_primesense"

print(dataset_name)
register_datasets([dataset_name])

meta = MetadataCatalog.get(dataset_name)
print("MetadataCatalog: ", meta)
objs = meta.objs

dset_dicts = DatasetCatalog.get(dataset_name)
for d in tqdm(dset_dicts):
    K = d["cam"]
    file_name = d["file_name"]
    path_parts = file_name.split('/')
    scene_id = path_parts[-3] 
    scene_id = str(int(scene_id))
    im_id_with_extension = path_parts[-1]  
    im_id = os.path.splitext(im_id_with_extension)[0]  
    im_id = str(int(im_id))  
    converted_format = f"{scene_id}/{im_id}"

    img = read_image_mmcv(file_name, format="BGR")

    scene_im_id_split = d["scene_im_id"].split("/")
    scene_id = scene_im_id_split[0]
    im_id = int(scene_im_id_split[1])

    imH, imW = img.shape[:2]
    annos = d["annotations"]
    masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
    bboxes = [anno["bbox"] for anno in annos]
    bbox_modes = [anno["bbox_mode"] for anno in annos]
    bboxes_xyxy = np.array(
        [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
    )
    kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]

    quats = [anno["quat"] for anno in annos]
    transes = [anno["trans"] for anno in annos]
    Rs = [quat2mat(quat) for quat in quats]
    # 0-based label
    cat_ids = [anno["category_id"] for anno in annos]

    kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]

    obj_names = [objs[cat_id] for cat_id in cat_ids]

    kpts_2d_est = []
    est_Rs = []
    est_ts = []

    kpts_2d_gt = []
    gt_Rs = []
    gt_ts = []

    kpts_3d_list_sel = []
    labels = []

    maxx, maxy, minx, miny = 0, 0, 1000, 1000
    for anno_i, anno in enumerate(annos):
        kpt_2d_gt = kpts_2d[anno_i]
        obj_name = obj_names[anno_i]
        try:
            # R_est = preds[obj_name][file_name]["R"]
            # t_est = preds[obj_name][file_name]["t"]
            # score = preds[obj_name][file_name]["score"]
            R_est = preds[obj_name][converted_format]["R"]
            t_est = preds[obj_name][converted_format]["t"]
            score = preds[obj_name][converted_format]["score"]
        except:
            continue
        if score < score_thr:
            continue

        labels.append(objects.index(obj_name))  # 0-based label

        est_Rs.append(R_est)
        est_ts.append(t_est)

        kpts_3d_list_sel.append(kpts_3d_list[anno_i])
        kpt_2d_est = misc.project_pts(kpts_3d_list[anno_i], K, R_est, t_est)
        kpts_2d_est.append(kpt_2d_est)

        gt_Rs.append(Rs[anno_i])
        gt_ts.append(transes[anno_i])
        kpts_2d_gt.append(kpts_2d[anno_i])

        for i in range(len(kpt_2d_est)):
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_est[i][0]),
                max(maxy, kpt_2d_est[i][1]),
                min(minx, kpt_2d_est[i][0]),
                min(miny, kpt_2d_est[i][1]),
            )
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_gt[i][0]),
                max(maxy, kpt_2d_gt[i][1]),
                min(minx, kpt_2d_gt[i][0]),
                min(miny, kpt_2d_gt[i][1]),
            )
    center = np.array([(minx + maxx) / 2, (miny + maxy) / 2])
    scale = max(maxx - minx, maxy - miny) * 1.5  # + 10
    crop_minx = max(0, center[0] - scale / 2)
    crop_miny = max(0, center[1] - scale / 2)
    crop_maxx = min(imW - 1, center[0] + scale / 2)
    crop_maxy = min(imH - 1, center[1] + scale / 2)
    scale = min(scale, min(crop_maxx - crop_minx, crop_maxy - crop_miny))

    zoomed_im = crop_resize_by_warp_affine(img, center, scale, out_size)
    im_zoom_gray = mmcv.bgr2gray(zoomed_im, keepdim=True)
    im_zoom_gray_3 = np.concatenate([im_zoom_gray, im_zoom_gray, im_zoom_gray], axis=2)
    # print(im_zoom_gray.shape)
    K_zoom = K.copy()
    K_zoom[0, 2] -= center[0] - scale / 2
    K_zoom[1, 2] -= center[1] - scale / 2
    K_zoom[0, :] *= out_size / scale
    K_zoom[1, :] *= out_size / scale

    gt_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(gt_Rs, gt_ts)]
    poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(est_Rs, est_ts)]

    ren.render(
        labels,
        poses,
        K=K_zoom,
        image_tensor=image_tensor,
        background=im_zoom_gray_3,
    )
    ren_bgr = (image_tensor[:, :, :3].detach().cpu().numpy() + 0.5).astype("uint8")

    # gt_masks = []
    # est_masks = []
    for label, gt_pose, est_pose in zip(labels, gt_poses, poses):
        ren.render([label], [gt_pose], K=K_zoom, seg_tensor=seg_tensor)
        gt_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        ren.render([label], [est_pose], K=K_zoom, seg_tensor=seg_tensor)
        est_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        gt_edge = get_edge(gt_mask, bw=3, out_channel=1)
        est_edge = get_edge(est_mask, bw=3, out_channel=1)

        # zoomed_im[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        # zoomed_im[est_edge != 0] = np.array(mmcv.color_val("green"))

        ren_bgr[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        ren_bgr[est_edge != 0] = np.array(mmcv.color_val("green"))

    vis_im = ren_bgr

    # vis_im_add = (im_zoom_gray_3 * 0.3 + ren_bgr * 0.7).astype("uint8")

    kpts_2d_gt_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, gt_Rs, gt_ts)]
    # print('kpts_2d_gt_zoom: ', kpts_2d_gt_zoom)
    kpts_2d_est_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, est_Rs, est_ts)]
    # print('kpts_2d_est_zoom: ', kpts_2d_est_zoom)
    linewidth = 3
    visualizer = MyVisualizer(zoomed_im[:, :, ::-1], meta)
    for kpt_2d_gt_zoom, kpt_2d_est_zoom in zip(kpts_2d_gt_zoom, kpts_2d_est_zoom):
        visualizer.draw_bbox3d_and_center(
            kpt_2d_gt_zoom, top_color=_BLUE, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
        visualizer.draw_bbox3d_and_center(
            kpt_2d_est_zoom, top_color=_GREEN, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
    vis_im = visualizer.get_output()
    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    vis_im.save(save_path)

    save_path_0 = osp.join(vis_dir, "{}_{:06d}_im.png".format(scene_id, im_id))
    mmcv.imwrite(zoomed_im, save_path_0)

    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    # from detectron2.utils.visualizer import VisImage
    # import cv2
    # image = vis_im.get_image()
    # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    # cv2.imshow('Image', image)  # Show the image in a window
    # cv2.waitKey(0)  # Wait for a key press to close the window
    # cv2.destroyAllWindows()  # Close all windows

    vis_im_img = vis_im.get_image()
    #mmcv.imwrite(vis_im, save_path)
    mmcv.imwrite(vis_im_img, save_path)

    # if True:
    #     # grid_show([zoomed_im[:, :, ::-1], vis_im[:, :, ::-1]], ["im", "est"], row=1, col=2)
    #     # im_show = cv2.hconcat([zoomed_im, vis_im, vis_im_add])
    #     im_show = cv2.hconcat([zoomed_im, vis_im])
    #     cv2.imshow("im_est", im_show)
    #     if cv2.waitKey(0) == 27:
    #         break  # esc to quit

name it tless_2_vis_poses_full.py and run it as:

(gdrnpp) mona@ada:~/gdrnpp_bop2022$ python core/gdrn_modeling/tools/tless/tless_2_vis_poses_full.py