shanice-l / gdrnpp_bop2022

PyTorch Implementation of GDRNPP, winner (most of the awards) of the BOP Challenge 2022 at ECCV'22
Apache License 2.0
229 stars 49 forks source link

lmo_2_vis_poses.py", preds = mmcv.load(pred_path) TypeError: Unsupported format: csv #110

Closed monajalal closed 8 months ago

monajalal commented 8 months ago

Can you please provide support for csv files? When I do the evaluation on your pretrained weights, a csv file is saved not a pkl file.

  File "/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/lmo_2_vis_poses.py", line 73, in <module>
    preds = mmcv.load(pred_path)
  File "/home/mona/.local/lib/python3.10/site-packages/mmcv/fileio/io.py", line 57, in load
    raise TypeError(f'Unsupported format: {file_format}')
TypeError: Unsupported format: csv

full code as in lmo_2_vis_poses.py:

import mmcv
import os.path as osp
import numpy as np
import sys
from tqdm import tqdm
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
import torch

cur_dir = osp.dirname(osp.abspath(__file__))
sys.path.insert(0, osp.join(cur_dir, "../../../../"))

from lib.vis_utils.colormap import colormap
from lib.utils.mask_utils import cocosegm2mask, get_edge
from core.utils.data_utils import crop_resize_by_warp_affine, read_image_mmcv
from core.gdrn_modeling.datasets.dataset_factory import register_datasets
from lib.pysixd import misc
from transforms3d.quaternions import quat2mat
from lib.egl_renderer.egl_renderer_v3 import EGLRenderer

from core.utils.my_visualizer import MyVisualizer, _GREY, _GREEN, _BLUE

out_size = 512
score_thr = 0.3
colors = colormap(rgb=False, maximum=255)

id2obj = {
    1: "ape",
    #  2: 'benchvise',
    #  3: 'bowl',
    #  4: 'camera',
    5: "can",
    6: "cat",
    #  7: 'cup',
    8: "driller",
    9: "duck",
    10: "eggbox",
    11: "glue",
    12: "holepuncher",
    #  13: 'iron',
    #  14: 'lamp',
    #  15: 'phone'
}
objects = list(id2obj.values())

tensor_kwargs = {"device": torch.device("cuda"), "dtype": torch.float32}
image_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
seg_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
# image_tensor = torch.empty((480, 640, 4), **tensor_kwargs).detach()

model_dir = "datasets/BOP_DATASETS/lmo/models/"

model_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.ply") for obj_id in id2obj]

ren = EGLRenderer(
    model_paths,
    vertex_scale=0.001,
    use_cache=True,
    width=out_size,
    height=out_size,
)

# NOTE:
# pred_path = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/a6-cPnP-AugAAETrunc-BG0.5-lmo-real-pbr0.1-40e-test_lmo_test_preds.pkl"
pred_path = "./output/gdrn/lmo_pbr/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_lmo/inference_model_final_wo_optim/lmo_bop_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-lmo-test-iter0_lmo-test.csv"
# vis_dir = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/vis_gt_pred"
vis_dir = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/lmo_vis_gt_pred_full"

mmcv.mkdir_or_exist(vis_dir)

print(pred_path)
preds = mmcv.load(pred_path)

dataset_name = "lmo_test"
print(dataset_name)
register_datasets([dataset_name])

meta = MetadataCatalog.get(dataset_name)
print("MetadataCatalog: ", meta)
objs = meta.objs

dset_dicts = DatasetCatalog.get(dataset_name)
for d in tqdm(dset_dicts):
    K = d["cam"]
    file_name = d["file_name"]
    img = read_image_mmcv(file_name, format="BGR")

    scene_im_id_split = d["scene_im_id"].split("/")
    scene_id = scene_im_id_split[0]
    im_id = int(scene_im_id_split[1])

    imH, imW = img.shape[:2]
    annos = d["annotations"]
    masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
    bboxes = [anno["bbox"] for anno in annos]
    bbox_modes = [anno["bbox_mode"] for anno in annos]
    bboxes_xyxy = np.array(
        [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
    )
    kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
    quats = [anno["quat"] for anno in annos]
    transes = [anno["trans"] for anno in annos]
    Rs = [quat2mat(quat) for quat in quats]
    # 0-based label
    cat_ids = [anno["category_id"] for anno in annos]

    kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]

    obj_names = [objs[cat_id] for cat_id in cat_ids]

    kpts_2d_est = []
    est_Rs = []
    est_ts = []

    kpts_2d_gt = []
    gt_Rs = []
    gt_ts = []

    kpts_3d_list_sel = []
    labels = []

    maxx, maxy, minx, miny = 0, 0, 1000, 1000
    for anno_i, anno in enumerate(annos):
        kpt_2d_gt = kpts_2d[anno_i]
        obj_name = obj_names[anno_i]

        try:
            R_est = preds[obj_name][file_name]["R"]
            t_est = preds[obj_name][file_name]["t"]
            score = preds[obj_name][file_name]["score"]
        except:
            continue
        if score < score_thr:
            continue

        labels.append(objects.index(obj_name))  # 0-based label

        est_Rs.append(R_est)
        est_ts.append(t_est)

        kpts_3d_list_sel.append(kpts_3d_list[anno_i])
        kpt_2d_est = misc.project_pts(kpts_3d_list[anno_i], K, R_est, t_est)
        kpts_2d_est.append(kpt_2d_est)

        gt_Rs.append(Rs[anno_i])
        gt_ts.append(transes[anno_i])
        kpts_2d_gt.append(kpts_2d[anno_i])

        for i in range(len(kpt_2d_est)):
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_est[i][0]),
                max(maxy, kpt_2d_est[i][1]),
                min(minx, kpt_2d_est[i][0]),
                min(miny, kpt_2d_est[i][1]),
            )
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_gt[i][0]),
                max(maxy, kpt_2d_gt[i][1]),
                min(minx, kpt_2d_gt[i][0]),
                min(miny, kpt_2d_gt[i][1]),
            )
    center = np.array([(minx + maxx) / 2, (miny + maxy) / 2])
    scale = max(maxx - minx, maxy - miny) * 1.5  # + 10
    crop_minx = max(0, center[0] - scale / 2)
    crop_miny = max(0, center[1] - scale / 2)
    crop_maxx = min(imW - 1, center[0] + scale / 2)
    crop_maxy = min(imH - 1, center[1] + scale / 2)
    scale = min(scale, min(crop_maxx - crop_minx, crop_maxy - crop_miny))

    zoomed_im = crop_resize_by_warp_affine(img, center, scale, out_size)
    im_zoom_gray = mmcv.bgr2gray(zoomed_im, keepdim=True)
    im_zoom_gray_3 = np.concatenate([im_zoom_gray, im_zoom_gray, im_zoom_gray], axis=2)
    # print(im_zoom_gray.shape)
    K_zoom = K.copy()
    K_zoom[0, 2] -= center[0] - scale / 2
    K_zoom[1, 2] -= center[1] - scale / 2
    K_zoom[0, :] *= out_size / scale
    K_zoom[1, :] *= out_size / scale

    gt_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(gt_Rs, gt_ts)]
    poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(est_Rs, est_ts)]

    ren.render(
        labels,
        poses,
        K=K_zoom,
        image_tensor=image_tensor,
        background=im_zoom_gray_3,
    )
    ren_bgr = (image_tensor[:, :, :3].detach().cpu().numpy() + 0.5).astype("uint8")

    # gt_masks = []
    # est_masks = []
    for label, gt_pose, est_pose in zip(labels, gt_poses, poses):
        ren.render([label], [gt_pose], K=K_zoom, seg_tensor=seg_tensor)
        gt_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        ren.render([label], [est_pose], K=K_zoom, seg_tensor=seg_tensor)
        est_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        gt_edge = get_edge(gt_mask, bw=3, out_channel=1)
        est_edge = get_edge(est_mask, bw=3, out_channel=1)

        # zoomed_im[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        # zoomed_im[est_edge != 0] = np.array(mmcv.color_val("green"))

        ren_bgr[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        ren_bgr[est_edge != 0] = np.array(mmcv.color_val("green"))

    vis_im = ren_bgr

    # vis_im_add = (im_zoom_gray_3 * 0.3 + ren_bgr * 0.7).astype("uint8")

    kpts_2d_gt_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, gt_Rs, gt_ts)]
    kpts_2d_est_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, est_Rs, est_ts)]
    linewidth = 3
    visualizer = MyVisualizer(zoomed_im[:, :, ::-1], meta)
    for kpt_2d_gt_zoom, kpt_2d_est_zoom in zip(kpts_2d_gt_zoom, kpts_2d_est_zoom):
        visualizer.draw_bbox3d_and_center(
            kpt_2d_gt_zoom, top_color=_BLUE, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
        visualizer.draw_bbox3d_and_center(
            kpt_2d_est_zoom, top_color=_GREEN, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
    vis_im = visualizer.get_output()
    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    vis_im.save(save_path)

    save_path_0 = osp.join(vis_dir, "{}_{:06d}_im.png".format(scene_id, im_id))
    mmcv.imwrite(zoomed_im, save_path_0)

    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    mmcv.imwrite(vis_im, save_path)

    # if True:
    #     # grid_show([zoomed_im[:, :, ::-1], vis_im[:, :, ::-1]], ["im", "est"], row=1, col=2)
    #     # im_show = cv2.hconcat([zoomed_im, vis_im, vis_im_add])
    #     im_show = cv2.hconcat([zoomed_im, vis_im])
    #     cv2.imshow("im_est", im_show)
    #     if cv2.waitKey(0) == 27:
    #         break  # esc to quit

full message:

(gdrnpp) mona@ada:~/gdrnpp_bop2022$ python core/gdrn_modeling/tools/lmo/lmo_2_vis_poses.py 
/home/mona/.local/lib/python3.10/site-packages/mmcv/__init__.py:20: UserWarning: On January 1, 2023, MMCV will release v2.0.0, in which it will remove components related to the training process and add a data transformation module. In addition, it will rename the package names mmcv to mmcv-lite and mmcv-full to mmcv. See https://github.com/open-mmlab/mmcv/blob/master/docs/en/compatibility.md for more details.
  warnings.warn(
/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/../../../../lib/pysixd/misc.py:586: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def get_obj_im_c(K, t):
/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/../../../../lib/pysixd/misc.py:765: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def compute_2d_bbox_xyxy_from_pose(points, pose, K, width=640, height=480, clip=False):
/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/../../../../lib/pysixd/misc.py:793: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def compute_2d_bbox_xyxy_from_pose_v2(points, pose, K, width=640, height=480, clip=False):
/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/../../../../lib/pysixd/misc.py:822: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  def compute_2d_bbox_xywh_from_pose(points, pose, K, width=640, height=480, clip=False):
Creating GL context for Cuda device 0
;(egl_renderer) Loaded EGL version: 1.5.
  0%|                                                                                                                                                                                                                                                                                                                                           | 0/8 [00:00<?, ?it/s][0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000001.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000001.ply loaded cache file: .cache/5a69126f0e708423031ae00695ad654a_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.6901960968971252
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000005.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000005.ply loaded cache file: .cache/2a63b9c7db7d4cb071cdae2803cc9785_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.9137254953384399
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000006.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000006.ply loaded cache file: .cache/c22328d3bc0a315a6c30d71ec5721697_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.9098039269447327
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000008.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000008.ply loaded cache file: .cache/b87ccbce6c3f76097ebc6e36e725b843_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.7843137383460999
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000009.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000009.ply loaded cache file: .cache/60dbff1e143e852cd74727cfdd8ee3c5_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.9529411792755127
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000010.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000010.ply loaded cache file: .cache/64a98ada262b40dba88884049926d85d_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.9372549057006836
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000011.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000011.ply loaded cache file: .cache/0c31b6adc192f9379f49e053fcce6868_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.800000011920929
[0122_085758@egl_renderer_v3.py:558] datasets/BOP_DATASETS/lmo/models/obj_000012.ply
[0122_085758@meshutil.py:403] datasets/BOP_DATASETS/lmo/models/obj_000012.ply loaded cache file: .cache/55aff959da94ec771030b987311bfce2_load_mesh_pysixd.pkl
[0122_085758@egl_renderer_v3.py:567] is_textured: False | is_cad: False | is_materialed: False
[0122_085758@egl_renderer_v3.py:570] ['vertices', 'normals', 'colors', 'faces', 'texturecoords', 'is_cad', 'uMatDiffuse', 'uMatSpecular', 'uMatAmbient', 'uMatShininess']
[0122_085758@egl_renderer_v3.py:585] colors: 0.4431372582912445
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 175.96it/s]
./output/gdrn/lmo_pbr/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_lmo/inference_model_final_wo_optim/lmo_bop_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-lmo-test-iter0_lmo-test.csv
Traceback (most recent call last):
  File "/home/mona/gdrnpp_bop2022/core/gdrn_modeling/tools/lmo/lmo_2_vis_poses.py", line 73, in <module>
    preds = mmcv.load(pred_path)
  File "/home/mona/.local/lib/python3.10/site-packages/mmcv/fileio/io.py", line 57, in load
    raise TypeError(f'Unsupported format: {file_format}')
TypeError: Unsupported format: csv

Here's the first 5 lines of the csv file for lmo:

(gdrnpp) mona@ada:~/gdrnpp_bop2022$ head -5 ./output/gdrn/lmo_pbr/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_lmo/inference_model_final_wo_optim/lmo_bop_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-lmo-test-iter0_lmo-test.csv
scene_id,im_id,obj_id,score,R,t,time
2,3,1,0.05217500030994415,-0.2535116374492645 0.9566980004310608 0.14304131269454956 0.9103896021842957 0.285951167345047 -0.29903629422187805 -0.3269902169704437 0.05441415309906006 -0.9434598684310913,-425.2376708984375 172.81687927246094 1129.8612060546875,0.4210243449633708
2,3,5,0.9502661228179932,0.9517205953598022 0.3029145300388336 -0.04970643296837807 0.24394260346889496 -0.8446343541145325 -0.47653424739837646 -0.18633291125297546 0.44140195846557617 -0.8777496218681335,135.6344757080078 45.09806442260742 974.8419799804688,0.4210243449633708
2,3,6,0.9458780288696289,0.24011392891407013 -0.9618154168128967 -0.13136383891105652 -0.9335251450538635 -0.19167155027389526 -0.30297327041625977 0.2662257254123688 0.19537955522537231 -0.9439018368721008,116.73883819580078 -320.5184020996094 1244.8701171875,0.4210243449633708
2,3,8,0.9572007656097412,0.43722131848335266 0.8989073038101196 -0.028338680043816566 0.7779494524002075 -0.3938230276107788 -0.4895896911621094 -0.4512562155723572 0.1920129954814911 -0.8714922666549683,103.55290985107422 -88.23550415039062 1039.24658203125,0.4210243449633708
monajalal commented 8 months ago

I added a csv parser and did some further modifications. Not sure if this is fully correct but feel free to add any comment.

import mmcv
import os
import os.path as osp
import numpy as np
import sys
from tqdm import tqdm
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
import torch

import pandas as pd

cur_dir = osp.dirname(osp.abspath(__file__))
sys.path.insert(0, osp.join(cur_dir, "../../../../"))

from lib.vis_utils.colormap import colormap
from lib.utils.mask_utils import cocosegm2mask, get_edge
from core.utils.data_utils import crop_resize_by_warp_affine, read_image_mmcv
from core.gdrn_modeling.datasets.dataset_factory import register_datasets
from lib.pysixd import misc
from transforms3d.quaternions import quat2mat
from lib.egl_renderer.egl_renderer_v3 import EGLRenderer

from core.utils.my_visualizer import MyVisualizer, _GREY, _GREEN, _BLUE

def load_predicted_csv(fname):
    df = pd.read_csv(fname)
    info_list = df.to_dict("records")
    return info_list

def parse_Rt_in_csv(_item):
    return np.array([float(i) for i in _item.strip(" ").split(" ")])

out_size = 512
score_thr = 0.3
colors = colormap(rgb=False, maximum=255)

id2obj = {
    1: "ape",
    #  2: 'benchvise',
    #  3: 'bowl',
    #  4: 'camera',
    5: "can",
    6: "cat",
    #  7: 'cup',
    8: "driller",
    9: "duck",
    10: "eggbox",
    11: "glue",
    12: "holepuncher",
    #  13: 'iron',
    #  14: 'lamp',
    #  15: 'phone'
}
objects = list(id2obj.values())

tensor_kwargs = {"device": torch.device("cuda"), "dtype": torch.float32}
image_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
seg_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach()
# image_tensor = torch.empty((480, 640, 4), **tensor_kwargs).detach()

model_dir = "datasets/BOP_DATASETS/lmo/models/"

model_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.ply") for obj_id in id2obj]

ren = EGLRenderer(
    model_paths,
    vertex_scale=0.001,
    use_cache=True,
    width=out_size,
    height=out_size,
)

# NOTE:
# pred_path = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/a6-cPnP-AugAAETrunc-BG0.5-lmo-real-pbr0.1-40e-test_lmo_test_preds.pkl"
pred_path = "./output/gdrn/lmo_pbr/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_lmo/inference_model_final_wo_optim/lmo_bop_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-lmo-test-iter0_lmo-test.csv"
# vis_dir = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/vis_gt_pred"
vis_dir = "output/gdrn/lmo/a6_cPnP_AugAAETrunc_BG0.5_lmo_real_pbr0.1_40e/inference_model_final/lmo_test/lmo_vis_gt_pred_full"

mmcv.mkdir_or_exist(vis_dir)

preds_csv = load_predicted_csv(pred_path)

preds = {}
for item in preds_csv:
    scene_id = item['scene_id']
    im_id = item['im_id']
    obj_id = item['obj_id']
    obj_name = id2obj[obj_id] 
    file_name = f"{scene_id}/{im_id}"  
    print('file_name: ', file_name)
    if obj_name not in preds:
        preds[obj_name] = {}

    if file_name not in preds[obj_name]:
        preds[obj_name][file_name] = {}

    preds[obj_name][file_name]["score"] = item["score"]
    preds[obj_name][file_name]["R"] = parse_Rt_in_csv(item["R"]).reshape(3, 3)
    preds[obj_name][file_name]["t"] = parse_Rt_in_csv(item["t"])

# dataset_name = "lmo_test"
dataset_name = "lmo_bop_test"

print(dataset_name)
register_datasets([dataset_name])

meta = MetadataCatalog.get(dataset_name)
print("MetadataCatalog: ", meta)
objs = meta.objs

dset_dicts = DatasetCatalog.get(dataset_name)
for d in tqdm(dset_dicts):
    K = d["cam"]
    file_name = d["file_name"]
    path_parts = file_name.split('/')
    scene_id = path_parts[-3] 
    scene_id = str(int(scene_id))
    im_id_with_extension = path_parts[-1]  
    im_id = os.path.splitext(im_id_with_extension)[0]  
    im_id = str(int(im_id))  
    converted_format = f"{scene_id}/{im_id}"

    img = read_image_mmcv(file_name, format="BGR")

    scene_im_id_split = d["scene_im_id"].split("/")
    scene_id = scene_im_id_split[0]
    im_id = int(scene_im_id_split[1])

    imH, imW = img.shape[:2]
    annos = d["annotations"]
    masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
    bboxes = [anno["bbox"] for anno in annos]
    bbox_modes = [anno["bbox_mode"] for anno in annos]
    bboxes_xyxy = np.array(
        [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
    )
    kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]

    quats = [anno["quat"] for anno in annos]
    transes = [anno["trans"] for anno in annos]
    Rs = [quat2mat(quat) for quat in quats]
    # 0-based label
    cat_ids = [anno["category_id"] for anno in annos]

    kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]

    obj_names = [objs[cat_id] for cat_id in cat_ids]

    kpts_2d_est = []
    est_Rs = []
    est_ts = []

    kpts_2d_gt = []
    gt_Rs = []
    gt_ts = []

    kpts_3d_list_sel = []
    labels = []

    maxx, maxy, minx, miny = 0, 0, 1000, 1000
    for anno_i, anno in enumerate(annos):
        kpt_2d_gt = kpts_2d[anno_i]
        obj_name = obj_names[anno_i]
        try:
            # R_est = preds[obj_name][file_name]["R"]
            # t_est = preds[obj_name][file_name]["t"]
            # score = preds[obj_name][file_name]["score"]
            R_est = preds[obj_name][converted_format]["R"]
            t_est = preds[obj_name][converted_format]["t"]
            score = preds[obj_name][converted_format]["score"]
        except:
            continue
        if score < score_thr:
            continue

        labels.append(objects.index(obj_name))  # 0-based label

        est_Rs.append(R_est)
        est_ts.append(t_est)

        kpts_3d_list_sel.append(kpts_3d_list[anno_i])
        kpt_2d_est = misc.project_pts(kpts_3d_list[anno_i], K, R_est, t_est)
        kpts_2d_est.append(kpt_2d_est)

        gt_Rs.append(Rs[anno_i])
        gt_ts.append(transes[anno_i])
        kpts_2d_gt.append(kpts_2d[anno_i])

        for i in range(len(kpt_2d_est)):
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_est[i][0]),
                max(maxy, kpt_2d_est[i][1]),
                min(minx, kpt_2d_est[i][0]),
                min(miny, kpt_2d_est[i][1]),
            )
            maxx, maxy, minx, miny = (
                max(maxx, kpt_2d_gt[i][0]),
                max(maxy, kpt_2d_gt[i][1]),
                min(minx, kpt_2d_gt[i][0]),
                min(miny, kpt_2d_gt[i][1]),
            )
    center = np.array([(minx + maxx) / 2, (miny + maxy) / 2])
    scale = max(maxx - minx, maxy - miny) * 1.5  # + 10
    crop_minx = max(0, center[0] - scale / 2)
    crop_miny = max(0, center[1] - scale / 2)
    crop_maxx = min(imW - 1, center[0] + scale / 2)
    crop_maxy = min(imH - 1, center[1] + scale / 2)
    scale = min(scale, min(crop_maxx - crop_minx, crop_maxy - crop_miny))

    zoomed_im = crop_resize_by_warp_affine(img, center, scale, out_size)
    im_zoom_gray = mmcv.bgr2gray(zoomed_im, keepdim=True)
    im_zoom_gray_3 = np.concatenate([im_zoom_gray, im_zoom_gray, im_zoom_gray], axis=2)
    # print(im_zoom_gray.shape)
    K_zoom = K.copy()
    K_zoom[0, 2] -= center[0] - scale / 2
    K_zoom[1, 2] -= center[1] - scale / 2
    K_zoom[0, :] *= out_size / scale
    K_zoom[1, :] *= out_size / scale

    gt_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(gt_Rs, gt_ts)]
    poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(est_Rs, est_ts)]

    ren.render(
        labels,
        poses,
        K=K_zoom,
        image_tensor=image_tensor,
        background=im_zoom_gray_3,
    )
    ren_bgr = (image_tensor[:, :, :3].detach().cpu().numpy() + 0.5).astype("uint8")

    # gt_masks = []
    # est_masks = []
    for label, gt_pose, est_pose in zip(labels, gt_poses, poses):
        ren.render([label], [gt_pose], K=K_zoom, seg_tensor=seg_tensor)
        gt_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        ren.render([label], [est_pose], K=K_zoom, seg_tensor=seg_tensor)
        est_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8")

        gt_edge = get_edge(gt_mask, bw=3, out_channel=1)
        est_edge = get_edge(est_mask, bw=3, out_channel=1)

        # zoomed_im[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        # zoomed_im[est_edge != 0] = np.array(mmcv.color_val("green"))

        ren_bgr[gt_edge != 0] = np.array(mmcv.color_val("blue"))
        ren_bgr[est_edge != 0] = np.array(mmcv.color_val("green"))

    vis_im = ren_bgr

    # vis_im_add = (im_zoom_gray_3 * 0.3 + ren_bgr * 0.7).astype("uint8")

    kpts_2d_gt_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, gt_Rs, gt_ts)]
    # print('kpts_2d_gt_zoom: ', kpts_2d_gt_zoom)
    kpts_2d_est_zoom = [misc.project_pts(kpt3d, K_zoom, R, t) for kpt3d, R, t in zip(kpts_3d_list_sel, est_Rs, est_ts)]
    # print('kpts_2d_est_zoom: ', kpts_2d_est_zoom)
    linewidth = 3
    visualizer = MyVisualizer(zoomed_im[:, :, ::-1], meta)
    for kpt_2d_gt_zoom, kpt_2d_est_zoom in zip(kpts_2d_gt_zoom, kpts_2d_est_zoom):
        visualizer.draw_bbox3d_and_center(
            kpt_2d_gt_zoom, top_color=_BLUE, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
        visualizer.draw_bbox3d_and_center(
            kpt_2d_est_zoom, top_color=_GREEN, bottom_color=_GREY, linewidth=linewidth, draw_center=True
        )
    vis_im = visualizer.get_output()
    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    vis_im.save(save_path)

    save_path_0 = osp.join(vis_dir, "{}_{:06d}_im.png".format(scene_id, im_id))
    mmcv.imwrite(zoomed_im, save_path_0)

    save_path = osp.join(vis_dir, "{}_{:06d}_gt_est.png".format(scene_id, im_id))
    # from detectron2.utils.visualizer import VisImage
    # import cv2
    # image = vis_im.get_image()
    # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    # cv2.imshow('Image', image)  # Show the image in a window
    # cv2.waitKey(0)  # Wait for a key press to close the window
    # cv2.destroyAllWindows()  # Close all windows

    vis_im_img = vis_im.get_image()
    #mmcv.imwrite(vis_im, save_path)
    mmcv.imwrite(vis_im_img, save_path)

    # if True:
    #     # grid_show([zoomed_im[:, :, ::-1], vis_im[:, :, ::-1]], ["im", "est"], row=1, col=2)
    #     # im_show = cv2.hconcat([zoomed_im, vis_im, vis_im_add])
    #     im_show = cv2.hconcat([zoomed_im, vis_im])
    #     cv2.imshow("im_est", im_show)
    #     if cv2.waitKey(0) == 27:
    #         break  # esc to quit

I'm getting output like this:

image