starsong98 commented 2 months ago

Hello. Thank you for releasing the code for your amazing paper!

I have a question regarding your evaluation of real-world generalization on the DAVIS 2017 dataset by backward warping.

In the paper it says that you used the DAVIS annotation masks to compute occlusion masks, and exclude occluded region for computing photometric/SSIM losses. How exactly did you compute these occlusion masks? Was there some baseline occlusion estimation model you used? I cannot seem to find the code for that in the released code.

Best regards,

HanLingsgjk commented 1 month ago

def Davis_vis(model, iters=6): imroot ='/home/lh/CSCV_occ/Davis/JPEGImages/480p' dirictor = os.listdir(imroot) dataroot = '/home/lh/CSCV_occ/ScaleFlowplusplus' output_filenameroot= os.path.join(dataroot) if os.path.exists(output_filenameroot) == False: os.makedirs(output_filenameroot) dchange = None txtpath = dataroot+'/records.txt' record = open(txtpath, 'w') ssim_list_fgall = [] ssim_list_bgall = [] ploss_list_fgall = [] ploss_list_bgall = [] for dir in dirictor: images1 = sorted(glob(osp.join(imroot,dir, '*.jpg'))) images2 = images1[1:] images1.pop() print(dir) pathsplit = dir.split('/') nameid = pathsplit[-1]

创建光流文件夹

    output_filenameflow = os.path.join(dataroot, 'flow/',nameid)
    if os.path.exists(output_filenameflow) == False:
        os.makedirs(output_filenameflow)
    # 重投影图片
    output_filenameim = os.path.join(dataroot, 'imout/',nameid)
    if os.path.exists(output_filenameim) == False:
        os.makedirs(output_filenameim)

    output_filenamedc = os.path.join(dataroot, 'dc/', nameid)
    if os.path.exists(output_filenamedc) == False:
        os.makedirs(output_filenamedc)

    #先计算每个序列的平均损失
    ssim_list_fg = []
    ssim_list_bg = []
    ploss_list_fg = []
    ploss_list_bg = []
    for id in range(images1.__len__()):
        print(id)
        img1 = frame_utils.read_gen(images1[id])
        img2 = frame_utils.read_gen(images2[id])
        maskpath1 = images1[id].replace('JPEGImages', 'Annotations').replace('.jpg', '.png')
        maskpath2 = images2[id].replace('JPEGImages', 'Annotations').replace('.jpg', '.png')

        pathsplit = maskpath1.split('/')
        idout = pathsplit[-1].split('.')[0]

        foremask1 = frame_utils.read_gen(maskpath1)
        foremask2 = frame_utils.read_gen(maskpath2)

        img1 = np.array(img1).astype(np.uint8)[..., :3]
        img2 = np.array(img2).astype(np.uint8)[..., :3]
        foremask2 = np.array(foremask2).astype(np.uint8)
        foremask1 = np.array(foremask1).astype(np.uint8)

        if foremask2.shape.__len__()>2:
            foremask2 = foremask2[:,:,0]
        if foremask1.shape.__len__()>2:
            foremask1 = foremask1[:,:,0]
        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
        image1 = img1[None].cuda()
        image2 = img2[None].cuda()
        padder = InputPadder(image1.shape, mode='kitti',sp=16)
        image1, image2 = padder.pad(image1, image2)

        #计算光流
        flow_low, flow_pr,_,dchange = model(image1, image2, iters=iters, test_mode=True)
        #flow_low, flow_pr = model(image1, image2, iters=iters, test_mode=True)
        #res = gma_forward(image1, image2)
        #flow_pr = res['flow_preds'][0]
        #flow_pr = MDFlow_forward(image1, image2)

        flow = padder.unpad(flow_pr[0]).detach().cpu()
        image2 = padder.unpad(image2).detach().cpu()

        N, C, H, W = image2.shape
        foremask2 = torch.from_numpy(foremask2).float().view(1,1,H, W)
        coords0 = coords_grid(N, H, W)
        coordw = coords0 + flow

        maskout = bilinear_sampler(foremask2, coordw)
        imgout = bilinear_sampler(image2, coordw)
        imgout = imgout.permute(2, 3, 1, 0).squeeze(3).numpy()

        ssim,sall,ploss = get_ssim(imgout,img1.permute(1, 2, 0).numpy())

        if dchange is not None:
            frame_id = idout+'.png'
            dchange = padder.unpad(dchange[0,0]).detach().cpu().numpy()
            datamin = np.min(dchange)
            datamax = np.max(dchange)
            mid_data = (datamin + datamax) * 0.5
            lenthmid = 1 / (mid_data - datamin)
            dchange = ((dchange - mid_data) * lenthmid).clip(-1, 1) * 128 + 128
            colormap = plt.get_cmap('plasma')#plasma viridis
            heatmap = (colormap((dchange).astype(np.uint8)) * 2 ** 8).astype(np.uint16)[:, :, :3]
            heatmap = cv2.cvtColor(heatmap, cv2.COLOR_RGB2BGR)

            cv2.imwrite('%s/%s' % (output_filenamedc, frame_id), heatmap*255)

        coordw = coordw.numpy()
        flowocc = (coordw[0,0] >0) * (coordw[0,0] <W) * (coordw[0,1] >0) * (coordw[0,1] <H)
        flowocc = flowocc.astype(np.float64)

        maskshow = maskout[0,0].numpy()
        maskshow2 = foremask1.astype(np.float64)
        occ = np.abs(maskshow2 - maskshow) < 0.0001
        imgout = imgout*occ[:,:,np.newaxis]

        #首先分出前景和背景，分别计算前景和背景的SSIM
        maskfg = flowocc*(foremask1.astype(np.float64)/255.)#前景掩膜
        maskbg = flowocc*((255-maskshow)/255.)
        ssimfg = sall[maskfg>0].mean()
        ssimbg = sall[maskbg > 0].mean()

        print("ssimfg:", ssimfg,"ssimbg:", ssimbg)
        ploss = ploss.astype(np.uint8)
        plossfg = ploss[maskfg>0].mean()
        plossbg = ploss[maskbg>0].mean()

        filenname = idout+'_sfg_'+str(ssimfg)[0:6]+'_sbg_'+str(ssimbg)[0:6]+'.png'
        filennamep = idout + '_pfg_' + str(plossfg)[0:6] + '_pbg_' + str(plossbg)[0:6] + '.png'

        if maskfg.max()>0.5:
            ssim_list_fg.append(ssimfg)
            ssim_list_bg.append(ssimbg)
            ploss_list_fg.append(plossfg)
            ploss_list_bg.append(plossbg)
        else:
            ssim_list_bg.append(ssimbg)
            ploss_list_bg.append(plossbg)
        flowviz = (flow2rgb(flow.permute(1, 2, 0).numpy())*255).astype(np.uint8)
        flowviz = cv2.cvtColor(flowviz, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(output_filenameflow, filenname),flowviz)
        imgout = imgout.astype(np.uint8)
        imgout = cv2.cvtColor(imgout, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(output_filenameim, filennamep), imgout.astype(np.uint8))

        #要存的东西
        #去了遮挡的投影图 前景背景SSIM，Ploss
        #光流可视化。SSIM和光度损失放在标题里
        #深度变化率可视化（可选）
    ssim_list_fg = np.array(ssim_list_fg)
    ssim_fg_mean = ssim_list_fg.mean()
    ssim_list_bg = np.array(ssim_list_bg)
    ssim_bg_mean = ssim_list_bg.mean()
    ploss_list_fg = np.array(ploss_list_fg)
    ploss_fg_mean = ploss_list_fg.mean()
    ploss_list_bg = np.array(ploss_list_bg)
    ploss_bg_mean = ploss_list_bg.mean()

    filercord = dir + '_' + str(ssim_fg_mean)[0:6] + '_' + str(ssim_bg_mean)[0:6]+ '_' + str(ploss_fg_mean)[0:6] + '_' + str(ploss_bg_mean)[0:6]
    record.write(filercord + "\n")

    ssim_list_fgall.append(ssim_fg_mean)
    ssim_list_bgall.append(ssim_bg_mean)
    ploss_list_fgall.append(ploss_fg_mean)
    ploss_list_bgall.append(ploss_bg_mean)

ssim_list_fgall = np.array(ssim_list_fgall)
ssim_fg_meanall = ssim_list_fgall.mean()
ssim_list_bgall = np.array(ssim_list_bgall)
ssim_bg_meanall = ssim_list_bgall.mean()
ploss_list_fgall = np.array(ploss_list_fgall)
ploss_fg_meanall = ploss_list_fgall.mean()
ploss_list_bgall = np.array(ploss_list_bgall)
ploss_bg_meanall = ploss_list_bgall.mean()
filercord = 'ALL'+dir + '_' + str(ssim_fg_meanall)[0:6] + '_' + str(ssim_bg_meanall)[0:6] + '_' + str(ploss_fg_meanall)[0:6] + '_' + str(ploss_bg_meanall)[0:6]
record.write(filercord + "\n")
record.close()

HanLingsgjk commented 1 month ago

You can refer to it. This is the code I evaluated. My occlusion calculation is relatively rough, mainly based on the optical flow results, removing pixels that move outside the image, as well as the part that moves from the background to the foreground

Then the foreground is the foreground mask provided by DAVIS, and the background is the remaining part

HanLingsgjk / UnifiedGeneralization

Computing Occlusion masks for DAVIS real-world performance evaluation #1

创建光流文件夹