Periodic Vibration Gaussian: Dynamic Urban Scene Reconstruction and Real-time Rendering
改变相机视角和推进时间后观察到不稳定的重建结果 #33

您好,感谢您严谨且优异的工作成果和分享! 我按照readme的说明做了waymo场景的训练和重建,使用的是预处理好的waymo数据集的0145050场景,在本地复现得到了略好于论文中提到的效果。 000 001

但在我进一步想要略微移动相机视角和推进时间后,得到的重建结果是比较模糊的,下图是相机的y方向移动了0.03m后得到的重建结果 000 001

在另一方向例如z方向移动了0.3m,可能会出现更多的重影 000 001

推进时间0.5s后也得到了类似的图像 000 001


  def update(self):
        self.world_view_transform = torch.tensor(getWorld2View2(self.R, self.T, self.trans, self.scale)).transpose(0, 1).cuda()
        if self.cx is not None:
            self.FoVx = 2 * math.atan(0.5*self.image_width / self.fx)
            self.FoVy = 2 * math.atan(0.5*self.image_height / self.fy)
            self.projection_matrix = getProjectionMatrixCenterShift(self.znear, self.zfar, self.cx, self.cy, self.fx, self.fy,
                                                                    self.image_width, self.image_height).transpose(0, 1).cuda()
            self.cx = self.image_width / 2
            self.cy = self.image_height / 2
            self.fx = self.image_width / (2 * np.tan(self.FoVx * 0.5))
            self.fy = self.image_height / (2 * np.tan(self.FoVy * 0.5))
            self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx,
                                                         fovY=self.FoVy).transpose(0, 1).cuda()
        self.full_proj_transform = (
        self.camera_center = self.world_view_transform.inverse()[3, :3]
        self.c2w = self.world_view_transform.transpose(0, 1).inverse()
        self.grid = kornia.utils.create_meshgrid(self.image_height, self.image_width, normalized_coordinates=False, device='cuda')[0]


    for config in validation_configs:
        if config['cameras'] and len(config['cameras']) > 0:
            l1_test = 0.0
            psnr_test = 0.0
            ssim_test = 0.0
            lpips_test = 0.0
            outdir = os.path.join(args.model_path, "eval", config['name'] + f"_{iteration}" + "_render")

            for idx, viewpoint in enumerate(tqdm(config['cameras'])):
                if idx > 10:
                # change viewpoint
                viewpoint.timestamp += 0.05
                # update the camera
                render_pkg = renderFunc(viewpoint, scene.gaussians, *renderArgs, env_map=env_map)
                image = torch.clamp(render_pkg["render"], 0.0, 1.0)
                gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0)

                depth = render_pkg['depth']
                alpha = render_pkg['alpha']
                sky_depth = 900
                depth = depth / alpha.clamp_min(EPS)
                if env_map is not None:
                    if args.depth_blend_mode == 0:  # harmonic mean
                        depth = 1 / (alpha / depth.clamp_min(EPS) + (1 - alpha) / sky_depth).clamp_min(EPS)
                    elif args.depth_blend_mode == 1:
                        depth = alpha * depth + (1 - alpha) * sky_depth

                depth = visualize_depth(depth)
                alpha = alpha.repeat(3, 1, 1)

                grid = [gt_image, image, alpha, depth]
                grid = make_grid(grid, nrow=2)

                save_image(grid, os.path.join(outdir, f"{viewpoint.colmap_id:03d}.png"))

                l1_test += F.l1_loss(image, gt_image).double()
                psnr_test += psnr(image, gt_image).double()
                ssim_test += ssim(image, gt_image).double()
                lpips_test += lpips(image, gt_image, net_type='vgg').double()  # very slow

            psnr_test /= len(config['cameras'])
            l1_test /= len(config['cameras'])
            ssim_test /= len(config['cameras'])
            lpips_test /= len(config['cameras'])

            print("\n[ITER {}] Evaluating {}: L1 {} PSNR {} SSIM {} LPIPS {}".format(iteration, config['name'], l1_test, psnr_test, ssim_test, lpips_test))
            with open(os.path.join(outdir, "metrics.json"), "w") as f:
                json.dump({"split": config['name'], "iteration": iteration, "psnr": psnr_test.item(), "ssim": ssim_test.item(), "lpips": lpips_test.item()}, f)

我使用的系统是ubuntu20.04,显卡RTX 3090,具体的环境配置如下 requirements.txt


Fumore commented 1 month ago

你好,相机偏移量较大时渲染novel view质量较低是基于gaussian的街景重建方法普遍存在的问题。我们的方法比较依赖点云初始化的质量,对gaussian_model.py中的create_from_pcd函数进行如下改动,去掉近处的随机初始化点云,只保留远景的随机初始化点云会得到更加稳健的重建效果。

def create_from_pcd(self, pcd: BasicPointCloud, spatial_lr_scale: float):
    self.spatial_lr_scale = spatial_lr_scale
    fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
    fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
    features = torch.zeros((fused_color.shape[0], 3, self.get_max_sh_channels)).float().cuda()
    features[:, :3, 0] = fused_color
    features[:, 3:, 1:] = 0.0
    ## random up and far
    r_max = 100000
    r_min = 15  ## around 150m for far point init
    num_sph = self.random_init_point
    theta = 2*torch.pi*torch.rand(num_sph)
    phi = (torch.pi/2*0.99*torch.rand(num_sph))**1.5 # x**a decay
    s = torch.rand(num_sph)
    r_1 = s*1/r_min+(1-s)*1/r_max
    r = 1/r_1
    pts_sph = torch.stack([r*torch.cos(theta)*torch.cos(phi), r*torch.sin(theta)*torch.cos(phi), r*torch.sin(phi)],dim=-1).cuda()
    # r_rec = r_min
    # num_rec = self.random_init_point
    # pts_rec = torch.stack([r_rec*(torch.rand(num_rec)-0.5),r_rec*(torch.rand(num_rec)-0.5),
    #                        r_rec*(torch.rand(num_rec))],dim=-1).cuda()
    # pts_sph = torch.cat([pts_rec, pts_sph], dim=0)
    pts_sph[:,2] = -pts_sph[:,2]+1
    fused_point_cloud = torch.cat([fused_point_cloud, pts_sph], dim=0)
    features = torch.cat([features,
                          torch.zeros([pts_sph.size(0), features.size(1), features.size(2)]).float().cuda()],