Open oskooi opened 1 month ago
Was it working better in an earlier version? Might be worth trying it before the subpixel averaging stuff?
It seemed fine (optimization gave good results) in Mo's testbed problem for the LDOS and RGB metalens. Is this specific to eigenmodes?
It seemed fine (optimization gave good results) in Mo's testbed problem for the LDOS and RGB metalens. Is this specific to eigenmodes?
The problem seems does seem to be related to only the EigenmodeCoefficient
objective function. The FourierFields
objective function produces the correct adjoint gradients for both polarization states for the 2D test problem of a 1D binary grating. As shown in the results below, the error in the adjoint gradient is small and decreasing with resolution for the FourierFields
objective function but not when using EigenmodeCoefficient
.
The reason why this has gone unnoticed is that the unit tests in test_adjoint_solver.py
are at a fixed resolution:
Currently, there is no test which verifies that the error decreases with resolution. This seems to have been an oversight on our part.
1. FourierFields: error in adjoint gradient decreases with resolution
S pol.
RESOLUTION_UM = 100
deriv:, 4.131907559212777e-06 (finite difference), 4.130901276598576e-06 (adjoint), 0.0002435394789890465 (error)
RESOLUTION_UM = 200
deriv:, -4.538633987749563e-05 (finite difference), -4.538670944040398e-05 (adjoint), 8.142602143050678e-06 (error)
P pol.
RESOLUTION_UM = 100
deriv:, -0.0001480278552321579 (finite difference), -0.00015076095261196923 (adjoint), 0.018463399172574103 (error)
RESOLUTION_UM = 200
deriv:, -0.00032299238796440477 (finite difference), -0.0003256201526867752 (adjoint), 0.008135686227565322 (error)
2. EigenmodeCoefficient: error in adjoint gradient does not decrease with resolution
S pol.
RESOLUTION_UM = 100
deriv:, -1.2706980867527307e-07 (finite difference), -1.688313720150539e-07 (adjoint), 0.3286505565338697 (error)
RESOLUTION_UM = 200
deriv:, -1.297130803878943e-07 (finite difference), -3.294403672094597e-08 (adjoint), 0.7460237886385086 (error)
P pol.
RESOLUTION_UM = 100
deriv:, 9.529563753662984e-07 (finite difference), 9.312545610252271e-07 (adjoint), 0.022773145657092177 (error)
RESOLUTION_UM = 200
deriv:, 1.1634343205502162e-06 (finite difference), 1.283012147320518e-06 (adjoint), 0.1027800406590641 (error)
from enum import Enum
from autograd import numpy as npa
import matplotlib.pyplot as plt
import meep as mp
import meep.adjoint as mpa
import numpy as np
RESOLUTION_UM = 100
WAVELENGTH_UM = 0.5
GRATING_PERIOD_UM = 1.427
GRATING_HEIGHT_UM = 0.518
GRATING_DUTY_CYCLE = 0.665
SUBSTRATE_UM = 1.1
PML_UM = 1.0
AIR_UM = 1.2
N_GLASS = 1.5
DESIGN_REGION_SIZE = mp.Vector3(
GRATING_HEIGHT_UM,
GRATING_DUTY_CYCLE * GRATING_PERIOD_UM,
0
)
DESIGN_REGION_RESOLUTION = int(5 * RESOLUTION_UM)
NX = int(DESIGN_REGION_SIZE.x * DESIGN_REGION_RESOLUTION) + 1
NY = int(DESIGN_REGION_SIZE.y * DESIGN_REGION_RESOLUTION) + 1
DEBUG_OUTPUT = True
Polarization = Enum("Polarization", "S P")
ObjectiveFunction = Enum(
"ObjectiveFunction", "EigenmodeCoefficient FourierFields"
)
def binary_grating_1d(
pol: Polarization,
objective_function: ObjectiveFunction
) -> mpa.OptimizationProblem:
"""Sets up the adjoint problem for a 1D binary grating.
Args:
pol: polarization of the source and monitors.
objective_function: type of objective function.
Returns:
A `meep.adjoint.OptimizationProblem` class object.
"""
frequency = 1 / WAVELENGTH_UM
pml_layers = [mp.PML(direction=mp.X, thickness=PML_UM)]
glass = mp.Medium(index=N_GLASS)
size_x_um = PML_UM + SUBSTRATE_UM + GRATING_HEIGHT_UM + AIR_UM + PML_UM
size_y_um = GRATING_PERIOD_UM
cell_size = mp.Vector3(size_x_um, size_y_um, 0)
k_point = mp.Vector3()
if pol.name == "S":
eig_parity = mp.ODD_Z
src_cmpt = mp.Ez
else:
eig_parity = mp.EVEN_Z
src_cmpt = mp.Hz
src_pt = mp.Vector3(-0.5 * size_x_um + PML_UM, 0, 0)
sources = [
mp.Source(
mp.GaussianSource(frequency, fwidth=0.1 * frequency),
component=src_cmpt,
center=src_pt,
size=mp.Vector3(0, size_y_um, 0),
)
]
matgrid = mp.MaterialGrid(
mp.Vector3(NX, NY),
mp.air,
glass,
weights=np.ones((NX, NY)),
do_averaging=False
)
matgrid_region = mpa.DesignRegion(
matgrid,
volume=mp.Volume(
center=mp.Vector3(
(-0.5 * size_x_um + PML_UM + SUBSTRATE_UM +
0.5 * DESIGN_REGION_SIZE.x),
0,
0
),
size=DESIGN_REGION_SIZE,
),
)
geometry = [
mp.Block(
material=glass,
size=mp.Vector3(PML_UM + SUBSTRATE_UM, mp.inf, mp.inf),
center=mp.Vector3(
-0.5 * size_x_um + 0.5 * (PML_UM + SUBSTRATE_UM), 0, 0
),
),
mp.Block(
material=matgrid,
size=matgrid_region.size,
center=matgrid_region.center,
),
]
sim = mp.Simulation(
resolution=RESOLUTION_UM,
cell_size=cell_size,
boundary_layers=pml_layers,
k_point=k_point,
sources=sources,
geometry=geometry,
)
tran_pt = mp.Vector3(0.5 * size_x_um - PML_UM, 0, 0)
if objective_function == "FourierFields":
obj_list = [
mpa.FourierFields(
sim,
mp.Volume(
center=tran_pt,
size=mp.Vector3(0, size_y_um, 0),
),
src_cmpt,
yee_grid=True
)
]
def obj_func(dft_field: np.ndarray) -> float:
return npa.sum(npa.absolute(dft_field)**2)
else:
order_y = 1
kdiff = mp.Vector3(
(frequency**2 - (order_y / GRATING_PERIOD_UM)**2)**0.5,
order_y / GRATING_PERIOD_UM,
0
)
obj_list = [
mpa.EigenmodeCoefficient(
sim,
mp.Volume(
center=tran_pt,
size=mp.Vector3(0, size_y_um, 0),
),
mode=1,
kpoint_func=lambda *not_used: kdiff,
eig_parity=eig_parity,
eig_vol=mp.Volume(
center=tran_pt,
size=mp.Vector3(0, 1 / RESOLUTION_UM, 0)
),
),
]
def obj_func(mode_coeff):
return npa.abs(mode_coeff)**2
opt = mpa.OptimizationProblem(
simulation=sim,
objective_functions=obj_func,
objective_arguments=obj_list,
design_regions=[matgrid_region],
frequencies=[frequency],
)
return opt
if __name__ == "__main__":
polarization = Polarization.P
objective_function = ObjectiveFunction.EigenmodeCoefficient
opt = binary_grating_1d(polarization, objective_function)
weights = 0.9 * np.ones((NX * NY))
weights_perturb_mag = 1e-5
rng = np.random.RandomState(59387385)
weights_perturb = weights_perturb_mag * rng.rand(NX*NY)
unperturbed_val, unperturbed_grad = opt([weights], need_gradient=True)
if DEBUG_OUTPUT:
fig, ax = plt.subplots()
opt.plot2D(init_opt=False, ax=ax)
fig.savefig(
'binary_grating_1D_layout.png',
dpi=150,
bbox_inches='tight'
)
perturbed_val, _ = opt([weights + weights_perturb], need_gradient=False)
if unperturbed_grad.ndim < 2:
unperturbed_grad = np.expand_dims(unperturbed_grad, axis=1)
adj_deriv = (weights_perturb[None, :] @ unperturbed_grad)[0][0]
if objective_function.name == "EigenmodeCoefficient":
fnd_deriv = perturbed_val[0] - unperturbed_val[0]
else:
fnd_deriv = perturbed_val - unperturbed_val
err = (abs(fnd_deriv - adj_deriv) / abs(fnd_deriv))
print(f"deriv:, {fnd_deriv} (finite difference), "
f"{adj_deriv} (adjoint), {err} (error)")
I also confirmed that this problem is not specific to the use of the kpoint_func
and eig_vol
features of EigenmodeCoefficient
(used to define a planewave) in the example above involving a binary grating. The same problem is observed for waveguide modes. I verfied this using the mode-converter testbed problem.
To summarize our findings thus far:
The error in the adjoint gradient from the EigenmodeCoefficient
object of the adjoint solver does not converge to zero with resolution in 2D for both polarizations (S and P).
S pol.
RESOLUTION_UM = 50
val:, [0.08980739] (unperturbed), [0.08981534] (perturbed)
deriv:, 7.944895897227244e-06 (finite difference), 7.944841515847232e-06 (adjoint), 6.844819707661953e-06 (error)
RESOLUTION_UM = 100
val:, [0.06203604] (unperturbed), [0.06204059] (perturbed)
deriv:, 4.551447275048803e-06 (finite difference), 4.550967330465875e-06 (adjoint), 0.00010544878451289303 (error)
P pol.
RESOLUTION_UM = 50
val:, [0.31007378] (unperturbed), [0.31007955] (perturbed)
deriv:, 5.764384595485783e-06 (finite difference), 5.618737194858058e-06 (adjoint), 0.02526677361912748 (error)
RESOLUTION_UM = 100
val:, [0.26868186] (unperturbed), [0.26868841] (perturbed)
deriv:, 6.545767417431847e-06 (finite difference), 6.525344209134978e-06 (adjoint), 0.0031200632400229587 (error)
from typing import List, NamedTuple, Tuple
from autograd import numpy as npa
import matplotlib.pyplot as plt
import meep as mp
import meep.adjoint as mpa
import numpy as np
RESOLUTION_UM = 50
WAVELENGTH_UM = 1.0
WAVEGUIDE_UM = mp.Vector3(3.0, 0.4)
PADDING_UM = 0.6
PML_UM = 1.0
DESIGN_REGION_UM = mp.Vector3(1.6, 1.6)
DESIGN_REGION_RESOLUTION_UM = int(2 * RESOLUTION_UM)
NX_DESIGN_GRID = int(DESIGN_REGION_UM.x * DESIGN_REGION_RESOLUTION_UM) + 1
NY_DESIGN_GRID = int(DESIGN_REGION_UM.y * DESIGN_REGION_RESOLUTION_UM) + 1
MODE_SYMMETRY = mp.ODD_Z + mp.EVEN_Y
SILICON = mp.Medium(index=3.5)
SILICON_DIOXIDE = mp.Medium(index=1.5)
DEBUG_OUTPUT = False
cell_um = mp.Vector3(
PML_UM + WAVEGUIDE_UM.x + DESIGN_REGION_UM.x + WAVEGUIDE_UM.x + PML_UM,
PML_UM + PADDING_UM + DESIGN_REGION_UM.y + PADDING_UM + PML_UM,
0,
)
frequency = 1 / WAVELENGTH_UM
pml_layers = [mp.PML(thickness=PML_UM)]
src_pt = mp.Vector3(-0.5 * cell_um.x + PML_UM, 0, 0)
mon_pt = mp.Vector3(0.5 * cell_um.x - PML_UM)
stop_cond = mp.stop_when_fields_decayed(50, mp.Ez, mp.Vector3(mon_pt.x, 0.23), 1e-6)
def straight_waveguide() -> (np.ndarray, NamedTuple):
"""Computes the DFT fields from the mode source in a straight waveguide.
The DFT fields are used as normalization of the transmittance measurement
during the optimization.
Returns:
A 2-tuple consisting of (1) a 1D array of DFT fields and (2) the DFT
fields object returned by `meep.get_flux_data`.
"""
sources = [
mp.EigenModeSource(
src=mp.GaussianSource(frequency, fwidth=0.1 * frequency),
size=mp.Vector3(0, cell_um.y, 0),
center=src_pt,
eig_band=1,
eig_parity=MODE_SYMMETRY,
)
]
geometry = [
mp.Block(
size=mp.Vector3(mp.inf, WAVEGUIDE_UM.y, mp.inf),
center=mp.Vector3(),
material=SILICON,
)
]
symmetries = [mp.Mirror(direction=mp.Y)]
sim = mp.Simulation(
resolution=RESOLUTION_UM,
default_material=SILICON_DIOXIDE,
cell_size=cell_um,
sources=sources,
geometry=geometry,
boundary_layers=pml_layers,
k_point=mp.Vector3(),
symmetries=symmetries
)
mode_monitor = sim.add_mode_monitor(
frequency,
0,
1,
mp.ModeRegion(center=mon_pt, size=mp.Vector3(0, cell_um.y, 0)),
yee_grid=True,
)
sim.run(until_after_sources=stop_cond)
res = sim.get_eigenmode_coefficients(
mode_monitor,
[1],
eig_parity=MODE_SYMMETRY,
)
coeffs = res.alpha
input_flux = np.abs(coeffs[0, :, 0]) ** 2
input_flux_data = sim.get_flux_data(mode_monitor)
return input_flux, input_flux_data
def mode_converter_optimization(
input_flux: np.ndarray,
input_flux_data: NamedTuple,
) -> mpa.OptimizationProblem:
"""Sets up the adjoint optimization of the waveguide mode converter.
Args:
input_flux: 1D array of DFT fields from the normalization run.
input_flux_data: DFT fields object returned by `meep.get_flux_data`.
Returns:
A `meep.adjoint.OptimizationProblem` class object.
"""
matgrid = mp.MaterialGrid(
mp.Vector3(NX_DESIGN_GRID, NY_DESIGN_GRID, 0),
SILICON_DIOXIDE,
SILICON,
weights=np.ones((NX_DESIGN_GRID, NY_DESIGN_GRID)),
do_averaging=False,
)
matgrid_region = mpa.DesignRegion(
matgrid,
volume=mp.Volume(center=mp.Vector3(), size=DESIGN_REGION_UM),
)
matgrid_geometry = [
mp.Block(
center=matgrid_region.center,
size=matgrid_region.size,
material=matgrid,
)
]
geometry = [
mp.Block(
center=mp.Vector3(),
size=mp.Vector3(mp.inf, WAVEGUIDE_UM.y, mp.inf),
material=SILICON,
)
]
geometry += matgrid_geometry
sources = [
mp.EigenModeSource(
src=mp.GaussianSource(frequency, fwidth=0.1 * frequency),
size=mp.Vector3(0, cell_um.y, 0),
center=src_pt,
eig_band=1,
eig_parity=MODE_SYMMETRY,
),
]
symmetries = [mp.Mirror(direction=mp.Y)]
sim = mp.Simulation(
resolution=RESOLUTION_UM,
default_material=SILICON_DIOXIDE,
cell_size=cell_um,
sources=sources,
geometry=geometry,
boundary_layers=pml_layers,
k_point=mp.Vector3(),
symmetries=symmetries
)
obj_list = [
mpa.EigenmodeCoefficient(
sim,
mp.Volume(
center=mon_pt,
size=mp.Vector3(0, cell_um.y, 0),
),
2,
eig_parity=MODE_SYMMETRY,
),
]
def obj_func(tran_mon):
return npa.power(npa.abs(tran_mon), 2) / input_flux
opt = mpa.OptimizationProblem(
simulation=sim,
objective_functions=obj_func,
objective_arguments=obj_list,
design_regions=[matgrid_region],
frequencies=[frequency],
)
return opt
if __name__ == "__main__":
input_flux, input_flux_data = straight_waveguide()
opt = mode_converter_optimization(input_flux, input_flux_data)
weights = 0.9 * np.ones((NX_DESIGN_GRID * NY_DESIGN_GRID))
weights_perturb_mag = 1e-5
rng = np.random.RandomState(59387385)
weights_perturb = weights_perturb_mag * rng.rand(NX_DESIGN_GRID*NY_DESIGN_GRID)
unperturbed_val, unperturbed_grad = opt([weights], need_gradient=True)
if DEBUG_OUTPUT:
fig, ax = plt.subplots()
opt.plot2D(init_opt=False, ax=ax)
fig.savefig(
'mode_converter_layout.png',
dpi=150,
bbox_inches='tight'
)
perturbed_val, _ = opt([weights + weights_perturb], need_gradient=False)
if unperturbed_grad.ndim < 2:
unperturbed_grad = np.expand_dims(unperturbed_grad, axis=1)
adj_deriv = (weights_perturb[None, :] @ unperturbed_grad)[0][0]
print(f"val:, {unperturbed_val} (unperturbed), {perturbed_val} (perturbed)")
fnd_deriv = perturbed_val[0] - unperturbed_val[0]
err = abs((fnd_deriv - adj_deriv) / fnd_deriv)
print(f"deriv:, {fnd_deriv} (finite difference), "
f"{adj_deriv} (adjoint), {err} (error)")
Would be good to check if it worked in https://github.com/NanoComp/meep/pull/1167 (commit fc61d28eb2f8cd545b24ffcae30014f1b0ce193e), which was @smartalecH's earliest version. If it did, do a git bisect
.
You could also try commit 75ced5296a91d8e49ad61b1bb07093e7abbb95e9, which is the commit before #1855.
(also look right before #1855... there's some funny business with how we store the dft chunks and how the 2D simulation fields are allocated if only a particular polarization is needed)
The error (compared to finite differences) should still converge with resolution (until the limit of the finite-difference accuracy is reached).
#2792 (comment) identified a bug in the adjoint gradients in 2D for the $\mathcal{P}$ polarization (electric field in the plane). The relative error in the adjoint gradient and the finite-difference result was large and not decreasing with the grid resolution.
As it turns out, the unit tests for the adjoint solver currently only involve the $\mathcal{S}$ polarization:
https://github.com/NanoComp/meep/blob/2278c66f4a5330ef83a882094ae8340bc2e5a96c/python/tests/test_adjoint_solver.py#L37-L38
The adjoint gradients for the $\mathcal{P}$ have thus far seem to have not been tested. This may be why this bug may have gone unnoticed.