Fields blowing up when running MPI but runs fine without MPI.

Hi everyone, I am trying to run a simulation for a nanobeam cavity with custom fitted material data. The simulation fields blow up whenever I run it on an HPC using Open MPI, but runs fine if I run it on my computer without Open MPI. I appreciate your help! Here's my code:
from typing import Tuple

import meep as mp
import matplotlib.pyplot as plt
import numpy as np
import nlopt
import pickle

resolution = 20

ind = 3.4
a = 220
a_min = 0.83636363636363636364
h = 170/a
r = 50/a
wvg_w = 280/a
cav_w = 180/a

fcen = a/945
df = 0.0222222222222

N = 15
N_tap = 5
pad = 4
dpml = 2

sx = pad+dpml+N*2
sy = 10
sz = 10

x_src = 36.667/a
y_src = 46.667/a

cell = mp.Vector3(sx, sy, sz)

posh = np.zeros([N])
rh = np.zeros([N])

def lorentzfunc(p: np.ndarray, x: np.ndarray) -> np.ndarray:
    """
    Returns the complex profile given a set of Lorentzian parameters p
    for a set of frequencies x.
    """
    N = len(p) // 3
    y = np.zeros(len(x))
    for n in range(N):
        A_n = p[3 * n + 0]
        x_n = p[3 * n + 1]
        g_n = p[3 * n + 2]
        y = y + A_n / (np.square(x_n) - np.square(x) - 1j * x * g_n)
    return y

def lorentzerr(p: np.ndarray, x: np.ndarray, y: np.ndarray, grad: np.ndarray) -> float:
    """
    Returns the error (or residual or loss) as the L2 norm
    of the difference of  and y over a set of frequencies x as
    well as the gradient of this error with respect to each Lorentzian
    polarizability parameter in p and saving the result in grad.
    """
    N = len(p) // 3
    yp = lorentzfunc(p, x)
    val = np.sum(np.square(abs(y - yp)))
    for n in range(N):
        A_n = p[3 * n + 0]
        x_n = p[3 * n + 1]
        g_n = p[3 * n + 2]
        d = 1 / (np.square(x_n) - np.square(x) - 1j * x * g_n)
        if grad.size > 0:
            grad[3 * n + 0] = 2 * np.real(np.dot(np.conj(yp - y), d))
            grad[3 * n + 1] = (
                -4 * x_n * A_n * np.real(np.dot(np.conj(yp - y), np.square(d)))
            )
            grad[3 * n + 2] = (
                -2 * A_n * np.imag(np.dot(np.conj(yp - y), x * np.square(d)))
            )
    return val

def lorentzfit(
    p0: np.ndarray,
    x: np.ndarray,
    y: np.ndarray,
    alg=nlopt.GN_AGS,
    tol: float = 1e-25,
    maxeval: float = 10000,
) -> Tuple[np.ndarray, float]:
    """
    Returns the optimal Lorentzian polarizability parameters and error
    which minimize the error in  relative to y for an initial
    set of Lorentzian polarizability parameters p0 over a set of
    frequencies x using the NLopt algorithm alg for a relative
    tolerance tol and a maximum number of iterations maxeval.
    """
    opt = nlopt.opt(alg, len(p0))
    opt.set_ftol_rel(tol)
    opt.set_maxeval(maxeval)
    opt.set_lower_bounds(np.zeros(len(p0)))
    opt.set_upper_bounds(float("inf") * np.ones(len(p0)))
    opt.set_min_objective(lambda p, grad: lorentzerr(p, x, y, grad))
    local_opt = nlopt.opt(nlopt.LD_LBFGS, len(p0))
    local_opt.set_ftol_rel(1e-10)
    local_opt.set_xtol_rel(1e-8)
    opt.set_local_optimizer(local_opt)
    popt = opt.optimize(p0)
    minf = opt.last_optimum_value()
    return popt, minf

if __name__ == "__main__":
    # Import the complex refractive index profile from a CSV file.
    # The file format is three comma-separated columns:
    #     wavelength (nm), real(n), imag(n).
    mydata = np.genfromtxt("Papatryfonos.csv", delimiter=",")
    n = mydata[:, 1] + 1j * mydata[:, 2]

    # Fitting parameter: the instantaneous (infinite frequency) dielectric.
    # Should be > 1.0 for stability and chosen such that
    # np.amin(np.real(eps)) is ~1.0. eps is defined below.
    eps_inf = 1.1

    # Fit only the data in the wavelength range of [wl_min, wl_max].
    wl = mydata[:, 0]
    wl_min = 850  # minimum wavelength (units of nm)
    wl_max = 1000  # maximum wavelength (units of nm)
    wlp = np.linspace(wl_min, wl_max, 50)
    nip = np.interp(wlp, wl, n)
    eps = np.square(nip) - eps_inf
    start_idx = np.where(wlp > wl_min)
    idx_start = start_idx[0][0]
    end_idx = np.where(wlp < wl_max)
    idx_end = end_idx[0][-1] + 1

    # The fitting function is ε(f) where f is the frequency, rather than ε(λ).
    freqs = 220 / wlp  # units of 1/μm
    freqs_reduced = freqs[idx_start:idx_end]
    wl_reduced = wlp[idx_start:idx_end]
    eps_reduced = eps[idx_start:idx_end]

    # Fitting parameter: number of Lorentzian terms to use in the fit
    num_lorentzians = 10

    # Number of times to repeat local optimization with random initial values.
    num_repeat = 200

    ps = np.zeros((num_repeat, 3 * num_lorentzians))
    mins = np.zeros(num_repeat)
    for m in range(num_repeat):
        # Initial values for the Lorentzian polarizability terms. Each term
        # consists of three parameters (σ, ω, γ) and is chosen randomly.
        # Note: for the case of no absorption, γ should be set to zero.
        p_rand = [10 ** (np.random.random()) for _ in range(3 * num_lorentzians)]
        ps[m, :], mins[m] = lorentzfit(
            p_rand, freqs_reduced, eps_reduced, nlopt.AUGLAG, 1e-25, 50000
        )
        ps_str = "( " + ", ".join(f"{prm:.4f}" for prm in ps[m, :]) + " )"
        print(f"iteration:, {m:3d}, ps_str, {mins[m]:.6f}")

    # Find the best performing set of parameters.
    idx_opt = np.where(np.min(mins) == mins)[0][0]
    popt_str = "( " + ", ".join(f"{prm:.4f}" for prm in ps[idx_opt]) + " )"
    print(f"optimal:, {popt_str}, {mins[idx_opt]:.6f}")

    # Define a `Medium` class object using the optimal fitting parameters.
    E_susceptibilities = []

    for n in range(num_lorentzians):
        mymaterial_freq = ps[idx_opt][3 * n + 1]
        mymaterial_gamma = ps[idx_opt][3 * n + 2]

        if mymaterial_freq == 0:
            mymaterial_sigma = ps[idx_opt][3 * n + 0]
            E_susceptibilities.append(
                mp.DrudeSusceptibility(
                    frequency=1.0, gamma=mymaterial_gamma, sigma=mymaterial_sigma
                )
            )
        else:
            mymaterial_sigma = ps[idx_opt][3 * n + 0] / mymaterial_freq**2
            E_susceptibilities.append(
                mp.LorentzianSusceptibility(
                    frequency=mymaterial_freq,
                    gamma=mymaterial_gamma,
                    sigma=mymaterial_sigma,
                )
            )

    mymaterial = mp.Medium(epsilon=eps_inf, E_susceptibilities=E_susceptibilities)
    with open("data.pickle", "wb") as f:
        pickle.dump(ps, f)
    with open("data2.pickle", "wb") as f:
        pickle.dump(min, f)

    # Plot the fit and the actual data for comparison.
    mymaterial_eps = [mymaterial.epsilon(f)[0][0] for f in freqs_reduced]

    fig, ax = plt.subplots(ncols=2)

    ax[0].plot(wl_reduced, np.real(eps_reduced) + eps_inf, "bo-", label="actual")
    ax[0].plot(wl_reduced, np.real(mymaterial_eps), "ro-", label="fit")
    ax[0].set_xlabel("wavelength (nm)")
    ax[0].set_ylabel(r"real($\epsilon$)")
    ax[0].legend()

    ax[1].plot(wl_reduced, np.imag(eps_reduced), "bo-", label="actual")
    ax[1].plot(wl_reduced, np.imag(mymaterial_eps), "ro-", label="fit")
    ax[1].set_xlabel("wavelength (nm)")
    ax[1].set_ylabel(r"imag($\epsilon$)")
    ax[1].legend()

    fig.suptitle(
        f"Comparison of Actual Material Data and Fit\n"
        f"using Drude-Lorentzian Susceptibility"
    )

    fig.subplots_adjust(wspace=0.3)
    # plt.show()
    # plt.savefig("fit.png")

    blk = mp.Block(size=mp.Vector3(mp.inf, wvg_w, h), material=mymaterial)
    geometry = [blk]

    geometry.append(mp.Cylinder(r*a_min, center=mp.Vector3(cav_w/2, 0, 0), height = mp.inf))
    geometry.append(mp.Cylinder(r*a_min, center=mp.Vector3(-cav_w/2, 0, 0), height = mp.inf))
    x = cav_w/2
    print(r*a_min*a)

    for i in range(0, N):
        if i < 5:
            x += 1-((N_tap-i)*((1-a_min)/(N_tap)))
            r_tap = r * ( 1-((N_tap-i-1)*((1-a_min)/(N_tap))) )
            geometry.append(mp.Cylinder(r_tap, center=mp.Vector3(x, 0, 0), height = mp.inf))
            geometry.append(mp.Cylinder(r_tap, center=mp.Vector3(-x, 0, 0), height = mp.inf))
            #print(1-((N_tap-i)*((1-a_min)/(N_tap))))
            print(r_tap*a)
        else:
            x += 1
            geometry.append(mp.Cylinder(r, center=mp.Vector3(x, 0, 0), height = mp.inf))
            geometry.append(mp.Cylinder(r, center=mp.Vector3(-x, 0, 0), height = mp.inf))
            print(r*a) 

    pml_layers = [mp.PML(dpml, mp.X), mp.PML(dpml, mp.Y), mp.PML(1, mp.Z)]

    # src = [mp.Source(mp.GaussianSource(fcen, fwidth=df), mp.Ey, mp.Vector3(0, 0))]
    src = [mp.Source(mp.GaussianSource(fcen, fwidth=df), mp.Ex, mp.Vector3(x_src, y_src), amplitude=np.sin(np.pi/4))]
    src.append(mp.Source(mp.GaussianSource(fcen, fwidth=df), mp.Ey, mp.Vector3(x_src, y_src), amplitude=np.sin(np.pi/4)))

    sim = mp.Simulation(cell_size=cell,
                        boundary_layers=pml_layers,
                        geometry=geometry,
                        sources=src,
                        resolution=resolution)

    nfreq = 1000 # number of frequencies at which to compute flux

    #sim.plot2D(output_plane=mp.Volume(size=mp.Vector3(sx,sy)))
    #plt.savefig("/users/php21mc/Simulations/ctc_grant/L3_normtest/sim_dom" + str(sim_n) + ".png")
    #plt.show()

    pt = mp.Vector3(0, 0)

    # sim.run(until_after_sources=mp.stop_when_fields_decayed(50,mp.Ey,pt,1e-3))
    hinv = mp.Harminv(mp.Ey, mp.Vector3(), fcen, df)

    sim.run(mp.after_sources(hinv),
            until_after_sources=5000)
NanoComp / meep

Fields blowing up when running MPI but runs fine without MPI. #2648