facebookresearch / pytorch3d

PyTorch3D is FAIR's library of reusable components for deep learning with 3D data
https://pytorch3d.org/
Other
8.84k stars 1.32k forks source link

Differentiable rednering #1855

Open yejr0229 opened 3 months ago

yejr0229 commented 3 months ago

Hi, I have a uv map named 'init_texture' which is a nn.parameters, I send it to TexturesUV and render a image:

TexturesUV( maps=init_texture.unsqueeze(0).to(device) / 255, faces_uvs=faces.textures_idx[None, ...], verts_uvs=verts_uvs[None, ...]) images, fragments = renderer(mesh)

Then I compute loss between this image and a generated image, the images and nn.param did have grad but is very small render image and generated image: image the grad for render image and nn.param: image

Seems like the grad didn't pass back to my 'init_texture', I wonder is TexturesUV() or renderer() can't pass the grad?

bottler commented 3 months ago

It depends on the exact settings of the renderer. Different configurations are differentiable in different ways. Perhaps you can share more of your code.

yejr0229 commented 3 months ago

Thanks a lot, following is the code.

# This is the code to define the renderer
renderer = init_renderer(cameras,
    shader=init_soft_phong_shader(
        camera=cameras,
        blend_params=BlendParams(),
        device=device),
    image_size=image_size, 
    faces_per_pixel=faces_per_pixel
)
# This is the definition of the init_renderer() function
def init_renderer(camera, shader, image_size, faces_per_pixel):
    raster_settings = RasterizationSettings(image_size=image_size, faces_per_pixel=faces_per_pixel)
    renderer = MeshRendererWithFragments(
        rasterizer=MeshRasterizer(
            cameras=camera,
            raster_settings=raster_settings
        ),
        shader=shader
    )
    return renderer
# This is the definition of the init_soft_phong_shader() function
def init_soft_phong_shader(camera, blend_params, device):
    lights = AmbientLights(device=device)
    shader = SoftPhongShader(
        cameras=camera,
        lights=lights,
        device=device,
        blend_params=blend_params
    )

    return shader
bottler commented 3 months ago

Perhaps share the complete code? And also, what is <image>.abs().max()? perhaps the gradient is negative.

yejr0229 commented 3 months ago

the grad is small whether is positive or negative: 企业微信截图_17244162316352

and here is my complete code:


pbar = tqdm(range(total_iters), desc='fitting mesh colors')  
for iter in pbar:

    optimizer.zero_grad()
    ### rendering screen PBR masks  [H, W, 1]  
    albedo_msk_within_uv = render_mask_within_uv(exist_albedo, mesh, faces, verts_uvs, camera_1_view, image_size, faces_per_pixel, device)
    roughness_msk_within_uv = render_mask_within_uv(exist_roughness, mesh, faces, verts_uvs, camera_1_view, image_size, faces_per_pixel, device)
    metallic_msk_within_uv = render_mask_within_uv(exist_metallic, mesh, faces, verts_uvs, camera_1_view, image_size, faces_per_pixel, device)

    ### rendering screen PBR    imageio.imwrite('deubg_diffback/render_white.png', (masked_pred_albedo * 255).clamp(0, 255).byte().cpu().numpy())
    pred_albedo, masked_pred_albedo, object_mask_albedo = render_within_texture(init_albedo, albedo_msk_within_uv, mesh, faces, 
                                                verts_uvs, camera_1_view, image_size, faces_per_pixel, device)
    masked_pred_albedo.retain_grad()
    pred_roughness, masked_pred_roughness, object_mask_roughness = render_within_texture(init_roughness, roughness_msk_within_uv, mesh, faces,
                                                verts_uvs, camera_1_view, image_size, faces_per_pixel, device)
    pred_metallic, masked_pred_metallic, object_mask_metallic = render_within_texture(init_metallic, metallic_msk_within_uv, mesh, faces,
                                                verts_uvs, camera_1_view, image_size, faces_per_pixel, device)

    gen_albedo_1_view = torch.tensor(albedo).to(device) * albedo_msk_within_uv
    # a=(gen_albedo_1_view*255).detach().cpu().numpy().astype(np.uint8)  imageio.imwrite('deubg_diffback/gen.png', a)
    gen_roughness_1_view = torch.tensor(roughness).to(device) * roughness_msk_within_uv
    gen_metallic_1_view = torch.tensor(metallic).to(device) * metallic_msk_within_uv

    loss = ((masked_pred_albedo.flatten() - gen_albedo_1_view.flatten().detach()).pow(2)).sum()   #.mean()
    loss += ((masked_pred_roughness.flatten() - gen_roughness_1_view.flatten().detach()).pow(2)).sum()   #.mean()
    loss += ((masked_pred_metallic.flatten() - gen_metallic_1_view.flatten().detach()).pow(2)).sum()   #.mean()

    loss.backward()   
    optimizer.step()   

def render_within_texture(init_texture, mask_within_uv, mesh, faces, verts_uvs, camera_1_view, image_size, faces_per_pixel, device):

    mesh.textures = TexturesUV(
            maps=init_texture.unsqueeze(0).to(device) / 255,
            faces_uvs=faces.textures_idx[None, ...],
            verts_uvs=verts_uvs[None, ...])
    _, _, render_image_tensor_1_view, _, _, _, _ = render_one_view_with_camera(mesh, camera_1_view,
            image_size, faces_per_pixel, device)

    object_mask = render_image_tensor_1_view[..., -1]
    object_mask = torch.where(object_mask != 0, torch.ones_like(object_mask), object_mask)
    object_mask = object_mask.squeeze(0).unsqueeze(-1)   # [H, W, 1]
    flat_mask = object_mask.flatten()

    render_image_tensor_1_view = render_image_tensor_1_view[... ,:3].squeeze(0)
    masked_pred_image = render_image_tensor_1_view * mask_within_uv

    return render_image_tensor_1_view, masked_pred_image, object_mask

def render_one_view_with_camera(mesh, cameras, image_size, faces_per_pixel, device):

    renderer = init_renderer(cameras,
        shader=init_soft_phong_shader(
            camera=cameras,
            blend_params=BlendParams(),
            device=device),
        image_size=image_size, 
        faces_per_pixel=faces_per_pixel
    )
    init_images_tensor, normal_maps_tensor, similarity_tensor, depth_maps_tensor, fragments = render(mesh, renderer, cameras)

    return (
        cameras, renderer,
        init_images_tensor, normal_maps_tensor, similarity_tensor, depth_maps_tensor, fragments
    )

def render(mesh, renderer, cameras, pad_value=10):
    def phong_normal_shading(meshes, fragments, cameras) -> torch.Tensor:

    def similarity_shading(meshes, fragments):

    def get_relative_depth_map(fragments, pad_value=pad_value):

    images, fragments = renderer(mesh)  
    normal_maps = phong_normal_shading(mesh, fragments, cameras).squeeze(-2)
    similarity_maps = similarity_shading(mesh, fragments).squeeze(-2) # -1 - 1
    depth_maps = get_relative_depth_map(fragments)
'''
bottler commented 3 months ago

This is quite complicated and I can't get to the bottom of it for you. can you gradually simplify? Are you trying to simulate actual PBR? I wonder if there may be other mesh differentiable rendering libraries which support PBR now