siegel8 commented 1 year ago

I am attempting to run an optimization based on example 6 and I am running into an issue where the backwards propagation of the gradients will fail after a finite number of iterations. By looking into the traceback, the issue seems to arise from the eigen decomposition function, specifically the P and Q matrices. In the paper, it seems the P/Q can become singular matrices and lead to instabilities, but the problem persists if I switch to double-precision-point-floating numbers, though it occurs at a later step. For completeness, code is included. The main changes are changing the material to Tungsten (defined via index 2.5+22.5j), wavelength to 5 microns, design region is 3 um x 3 um with 5 nm mesh, the structure is illuminated from the backwards direction with a Tungsten back-reflector, and I have a two part FoM.

`import os os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

Import

import numpy as np import torch import scipy.io from matplotlib import pyplot as plt import time

import torcwa import Materials

Hardware

If GPU support TF32 tensor core, the matmul operation is faster than FP32 but with less precision.

If you need accurate operation, you have to disable the flag below.

torch.backends.cuda.matmul.allow_tf32 = False

sim_dtype = torch.complex128

geo_dtype = torch.float64

sim_dtype = torch.complex64 geo_dtype = torch.float32

device = torch.device('cuda')

Simulation environment

light

lamb0 = torch.tensor(5000.,dtype=geo_dtype,device=device) # nm inc_ang = 0.(np.pi/180) # radian azi_ang = 0.(np.pi/180) # radian theta = torch.linspace(0., 60.,2,dtype=geo_dtype,device=device) # nm

material

substrate_eps = 1.462 silicon_eps = Materials.aSiH.apply(lamb0)2

Changing material to be Tungsten (W) for testing purposes. Let's just see if it runs at all.

W = 1.33+5.24j #At 532 nm

W = 2.9 +22.5j # At 5 microns silicon_eps = W**2 print(silicon_eps) print(W)

geometry

L = [3000., 3000.] # nm / nm torcwa.rcwa_geo.dtype = geo_dtype torcwa.rcwa_geo.device = device torcwa.rcwa_geo.Lx = L[0] torcwa.rcwa_geo.Ly = L[1] torcwa.rcwa_geo.nx = 600 torcwa.rcwa_geo.ny = 600 torcwa.rcwa_geo.grid() torcwa.rcwa_geo.edge_sharpness = 1000.

x_axis = torcwa.rcwa_geo.x.cpu() y_axis = torcwa.rcwa_geo.y.cpu()

layers

layer0_thickness = 300.`

`# Objective function def objective_function(rho): order = [12,12] A_sum = [] for theta_inc in range(len(theta)): inc_ang = theta[theta_inc]*(np.pi/180) sim = torcwa.rcwa(freq=1/lamb0,order=order,L=L,dtype=sim_dtype,device=device)

Linear in Index, non-linear in eps.

    #plt.imshow(rho.detach().cpu().numpy())
    #plt.colorbar()
    substrate_eps_W = W**2
    sim.add_input_layer(eps=substrate_eps_W)
    sim.set_incident_angle(inc_ang=inc_ang,azi_ang=azi_ang,angle_layer='output')
    # Linear in Index, non-linear in eps.
    layer0_eps = substrate_eps
    layer1_index = rho*W + (1.-rho)
    layer1_eps = layer1_index**2
    layer1_thickness = layer0_thickness
    #layer0_eps = rho*silicon_eps + (1.-rho)
    sim.add_layer(thickness=layer0_thickness,eps=layer0_eps)
    sim.add_layer(thickness=layer1_thickness,eps=layer1_eps)
    sim.solve_global_smatrix()
    #t1xx = sim.S_parameters(orders=[1,0],direction='forward',port='transmission',polarization='xx',ref_order=[0,0])
    #t1yy = sim.S_parameters(orders=[1,0],direction='forward',port='transmission',polarization='yy',ref_order=[0,0])
    #t1xy = sim.S_parameters(orders=[1,0],direction='forward',port='transmission',polarization='xy',ref_order=[0,0])
    #t1yx = sim.S_parameters(orders=[1,0],direction='forward',port='transmission',polarization='yx',ref_order=[0,0])

    if theta_inc == 1: # extra orders for 60 deg.
        r0xx2 = sim.S_parameters(orders=[0,0],direction='backward',port='reflection',polarization='xx',ref_order=[0,0])
        #t0xx2 = sim.S_parameters(orders=[0,0],direction='forward',port='transmission',polarization='xx',ref_order=[0,0])
        rn1xx = sim.S_parameters(orders=[-1,0],direction='backward',port='reflection',polarization='xx',ref_order=[0,0])
       # tn1xx = sim.S_parameters(orders=[-1,0],direction='forward',port='transmission',polarization='xx',ref_order=[0,0])
        Abs = 1-torch.abs(r0xx2)**2-torch.abs(rn1xx)**2
     #   print(Abs)
        A_sum.append(Abs)
    else:
        r0xx = sim.S_parameters(orders=[0,0],direction='backward',port='reflection',polarization='xx',ref_order=[0,0])
        #t0xx = sim.S_parameters(orders=[0,0],direction='forward',port='transmission',polarization='xx',ref_order=[0,0])

        Abs =torch.abs(r0xx)**2
      #  print(Abs)
        A_sum.append(Abs)       
return sum(A_sum)/len(theta) #Normalizing by number of angles    `

`torch.autograd.set_detect_anomaly(True)

Perform optimization

optimizer parameters for ADAM optimizer

gar_initial = 0.02 beta1 = 0.9 beta2 = 0.999 epsilon = 1.e-8 iter_max = 200 beta = np.exp(np.arange(start=0,stop=iter_max)np.log(1000)/iter_max) gar = gar_initial 0.5(1+np.cos(np.arange(start=0,stop=iter_max)np.pi/iter_max))

blur kernel

blur_radius = 300. dx, dy = L[0]/torcwa.rcwa_geo.nx, L[1]/torcwa.rcwa_geo.ny x_kernel_axis = (torch.arange(torcwa.rcwa_geo.nx,dtype=geo_dtype,device=device)-(torcwa.rcwa_geo.nx-1)/2)dx y_kernel_axis = (torch.arange(torcwa.rcwa_geo.ny,dtype=geo_dtype,device=device)-(torcwa.rcwa_geo.ny-1)/2)dy x_kernel_grid, y_kernel_grid = torch.meshgrid(x_kernel_axis,y_kernel_axis,indexing='ij') g = torch.exp(-(x_kernel_grid2+y_kernel_grid2)/blur_radius**2) g = g/torch.sum(g) g_fft = torch.fft.fftshift(torch.fft.fft2(torch.fft.ifftshift(g)))

torch.manual_seed(333) rho = torch.rand((torcwa.rcwa_geo.nx,torcwa.rcwa_geo.ny),dtype=geo_dtype,device=device) rho = (rho + torch.fliplr(rho))/2 rho_fft = torch.fft.fftshift(torch.fft.fft2(torch.fft.ifftshift(rho))) rho = torch.real(torch.fft.fftshift(torch.fft.ifft2(torch.fft.ifftshift(rho_fft*g_fft)))) momentum = torch.zeros_like(rho) velocity = torch.zeros_like(rho)

rho_history = [] FoM_history = []

start_time = time.time() for it in range(0,iter_max): rho.requiresgrad(True) rho_fft = torch.fft.fftshift(torch.fft.fft2(torch.fft.ifftshift(rho))) rho_bar = torch.real(torch.fft.fftshift(torch.fft.ifft2(torch.fft.ifftshift(rho_fftg_fft)))) rho_tilda = 1/2 + torch.tanh(2beta[it]rho_bar-beta[it])/(2np.math.tanh(beta[it])) if it == 0: test = rho_tilda

Plot

#    plt.imshow(test.detach().cpu().numpy())
 #   plt.colorbar()
FoM = objective_function(rho_tilda)
FoM.backward()

with torch.no_grad():
    rho_gradient = rho.grad
    #print(rho_gradient)
    rho.grad = None

    rho_history.append(rho_tilda.detach().cpu().numpy())
    FoM = float(FoM.detach().cpu().numpy())
    FoM_history.append(FoM)

    momentum = (beta1*momentum + (1-beta1)*rho_gradient)
    velocity = (beta2*velocity + (1-beta2)*(rho_gradient**2))
    rho += gar[it]*(momentum / (1-beta1**(it+1))) / torch.sqrt((velocity / (1-beta2**(it+1))) + epsilon)
    rho[rho>1] = 1
    rho[rho<0] = 0
    rho = (rho + torch.fliplr(rho))/2
    #plt.imshow(rho_gradient.detach().cpu().numpy())
    #plt.colorbar()
    end_time = time.time()
    elapsed_time = end_time - start_time
    print('Iteration:',it,'/ FoM:',int(FoM*10000)/10000,'/ Elapsed time:',str(int(elapsed_time))+' s')`

kch3782 commented 1 year ago

I think this problem is caused by the divergence of the eigendecomposition gradient by the degenerated eigenvalue. Instead of the broadening parameter value set by default in the program, try 1e-10 as tested in the paper. For this, after importing torcwa, try inserting the code below.

torcwa.Eig.broadening_parameter = 1e-10

The current default setting is too small and sometimes diverges, so I plan to set the default value to 1e-10 in a future version.

Please let me know if you have any problems. Thank you.