BachiLi / diffvg

Differentiable Vector Graphics Rasterization
https://people.csail.mit.edu/tzumao/diffvg/
Apache License 2.0
892 stars 150 forks source link

Bug to gradient wrt circle radius? #39

Open yyuting opened 2 years ago

yyuting commented 2 years ago

Hi, I was trying to modify the single_circle_outline.py to optimize a ring shape by setting the fill color of a circle to None and using a nonzero stroke width. But the optimization never converges, and the radius of the circle always shrinks to a value smaller than 0. So I rendered a gradient map for pixel color wrt the radius parameter, and realize its value disagrees with finite-difference. Specifically, for the inner circle, its gradient has an opposite sign as in FD. I'm guessing it's a bug in the implementation, but also want to check whether this behavior is caused by the math derivation? Code that generates disagreeing gradient map for diffvg and finite diff:

import pydiffvg
import diffvg
import torch
import skimage
import numpy as np
import os
import time
import platform
import sys

# Use GPU if available
pydiffvg.set_use_gpu(torch.cuda.is_available())
render = pydiffvg.RenderFunction.apply

def ring():
    circle = pydiffvg.Circle(radius = torch.tensor(80.0),
                             center = torch.tensor([128.0, 128.0]),
                             stroke_width = torch.tensor(16.0))

    shapes = [circle]
    circle_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([0]),
        fill_color = None,
        stroke_color = torch.tensor([1.0, 0.3, 0.6, 1.0]))
    shape_groups = [circle_group]

    return shapes, shape_groups

def get_img(shapes, shape_groups, width, height, radius_val, nsamples=2, seed=None):

    shapes[0].radius = radius_val

    scene_args = pydiffvg.RenderFunction.serialize_scene(\
        width, height, shapes, shape_groups)

    img = render(width, # width
                 height, # height
                 nsamples,   # num_samples_x
                 nsamples,   # num_samples_y
                 seed if seed is not None else 0,   # seed
                 None,
                 *scene_args)

    return img

def main():

    if '--res_x' in sys.argv:
        res_x_idx = sys.argv.index('--res_x')
        res_x = int(sys.argv[res_x_idx + 1])
    else:
        res_x = 256

    if '--res_y' in sys.argv:
        res_y_idx = sys.argv.index('--res_y')
        res_y = int(sys.argv[res_y_idx + 1])
    else:
        res_y = 256

    if '--nsamples' in sys.argv:
        nsamples_idx = sys.argv.index('--nsamples')
        nsamples = int(sys.argv[nsamples_idx + 1])
    else:
        nsamples = 2

    canvas_width = res_x
    canvas_height = res_y

    outdir = ''

    save_dir = outdir

    canvas_width, canvas_height = 256, 256

    shapes, shape_groups = ring()

    radius_val = torch.tensor(80., requires_grad=True)

    img = get_img(shapes, shape_groups, res_x, res_y, radius_val, nsamples=nsamples).cpu().detach().numpy().copy()[..., 0]
    skimage.io.imsave(os.path.join(outdir, 'img.png'), img)

    radius_val.data += 1.
    img_pos = get_img(shapes, shape_groups, res_x, res_y, radius_val, nsamples=nsamples).cpu().detach().numpy().copy()[..., 0]
    radius_val.data -= 2.
    img_neg = get_img(shapes, shape_groups, res_x, res_y, radius_val, nsamples=nsamples).cpu().detach().numpy().copy()[..., 0]

    fd_wrt_radius = (img_pos - img_neg) / 2.

    skimage.io.imsave(os.path.join(outdir, 'fd_gradient_wrt_radius.png'), fd_wrt_radius)

    sparse_x, sparse_y = np.where(fd_wrt_radius != 0)

    # only compute gradient of R channel wrt radius
    diffvg_wrt_radius = np.zeros((res_x, res_y))

    radius_val.data += 1.
    for i in range(sparse_x.shape[0]):
        if radius_val.grad is not None:
            radius_val.grad.data.zero_()
        get_img(shapes, shape_groups, res_x, res_y, radius_val, nsamples=nsamples)[sparse_x[i], sparse_y[i], 0].backward()
        diffvg_wrt_radius[sparse_x[i], sparse_y[i]] = radius_val.grad

        if i % 100 == 0:
            print(i)

    # use skimage to avoid negative value being clipped

    skimage.io.imsave(os.path.join(outdir, 'diffvg_gradient_wrt_radius.png'), diffvg_wrt_radius)

if __name__ == '__main__':
    main()

Code to reproduce the non-convergent optimization (slight modification from single_circle_outline.py):

import pydiffvg
import torch
import skimage
import numpy as np

# Use GPU if available
pydiffvg.set_use_gpu(torch.cuda.is_available())

canvas_width, canvas_height = 256, 256
circle = pydiffvg.Circle(radius = torch.tensor(40.0),
                         center = torch.tensor([128.0, 128.0]),
                         stroke_width = torch.tensor(5.0))
shapes = [circle]
circle_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([0]),
    fill_color = None,
    stroke_color = torch.tensor([0.6, 0.3, 0.6, 0.8]))
shape_groups = [circle_group]
scene_args = pydiffvg.RenderFunction.serialize_scene(\
    canvas_width, canvas_height, shapes, shape_groups)

render = pydiffvg.RenderFunction.apply
img = render(256, # width
             256, # height
             2,   # num_samples_x
             2,   # num_samples_y
             0,   # seed
             None,
             *scene_args)
# The output image is in linear RGB space. Do Gamma correction before saving the image.
pydiffvg.imwrite(img.cpu(), 'target.png', gamma=2.2)
target = img.clone()

# Move the circle to produce initial guess
# normalize radius & center for easier learning rate
radius_n = torch.tensor(20.0 / 256.0, requires_grad=True)
center_n = torch.tensor([108.0 / 256.0, 138.0 / 256.0], requires_grad=True)
stroke_color = torch.tensor([0.4, 0.7, 0.5, 0.5], requires_grad=True)
stroke_width_n = torch.tensor(10.0 / 100.0, requires_grad=True)
circle.radius = radius_n * 256
circle.center = center_n * 256
circle.stroke_width = stroke_width_n * 100
circle_group.stroke_color = stroke_color
scene_args = pydiffvg.RenderFunction.serialize_scene(\
    canvas_width, canvas_height, shapes, shape_groups)
img = render(256, # width
             256, # height
             2,   # num_samples_x
             2,   # num_samples_y
             1,   # seed
             None,
             *scene_args)
pydiffvg.imwrite(img.cpu(), 'init.png', gamma=2.2)

# Optimize for radius & center
optimizer = torch.optim.Adam([radius_n, center_n, stroke_color, stroke_width_n], lr=1e-2)
# Run 200 Adam iterations.
for t in range(200):
    print('iteration:', t)
    optimizer.zero_grad()
    # Forward pass: render the image.
    if radius_n < 0:
        # avoids rendering error
        radius_n.data = torch.tensor(np.float32(0.1))
    circle.radius = radius_n * 256
    circle.center = center_n * 256
    circle.stroke_width = stroke_width_n * 100
    circle_group.stroke_color = stroke_color
    scene_args = pydiffvg.RenderFunction.serialize_scene(\
        canvas_width, canvas_height, shapes, shape_groups)
    img = render(256,   # width
                 256,   # height
                 2,     # num_samples_x
                 2,     # num_samples_y
                 t+1,   # seed
                 None,
                 *scene_args)
    # Compute the loss function. Here it is L2.
    loss = (img - target).pow(2).sum()
    print('loss:', loss.item())

    # Backpropagate the gradients.
    loss.backward()
    # Print the gradients
    print('radius.grad:', radius_n.grad)
    print('center.grad:', center_n.grad)
    print('stroke_color.grad:', stroke_color.grad)
    print('stroke_width.grad:', stroke_width_n.grad)

    # Take a gradient descent step.
    optimizer.step()
    # Print the current params.
    print('radius:', circle.radius)
    print('center:', circle.center)
    print('stroke_width:', circle.stroke_width)
    print('stroke_color:', circle_group.stroke_color)

# Render the final result.
scene_args = pydiffvg.RenderFunction.serialize_scene(\
    canvas_width, canvas_height, shapes, shape_groups)
img = render(256,   # width
             256,   # height
             2,     # num_samples_x
             2,     # num_samples_y
             202,    # seed
             None,
             *scene_args)
# Save the images and differences.
pydiffvg.imwrite(img.cpu(), 'final.png', gamma=2.2)
askaradeniz commented 1 year ago

Hi, I am facing the same problem with the circles. When I change the fill color to a transparent color ([0, 0, 0, 0]) and have a circle with a nonzero stroke width, the circle is iteratively getting smaller and being lost. Any update on this issue?

@yyuting Were you able to solve your problem?

nityanandmathur commented 1 year ago

I am facing the same error while using squares instead of circles. Were you able to solve this issue @yyuting, @askaradeniz ?