pandas MultiIndex error when calling Fitter.custom_plot()

micahfolsom commented 2 years ago

While adding an example to the fitting notebook for this PR, I ran into a weird error. As far as I can tell I'm doing the same exact thing as the double Gauss example earlier in the notebook. The code:

model = (
    bq.fitting.GaussDblExpModel(prefix="gbe_") +
    bq.fitting.LineModel(prefix="line_")
)

params={
    "gbe_amp": 1e4,
    "gbe_mu": 90.0,
    "gbe_sigma": 5.0,
    "gbe_ltail_ratio": 0.3,
    "gbe_ltail_slope": 0.1,
    "gbe_ltail_cutoff": 1.0,
    "gbe_rtail_ratio": 0.1,
    "gbe_rtail_slope": -0.05,
    "gbe_rtail_cutoff": 1.0,
    "line_m": -10.0,
    "line_b": 1e4,
}

x_data = np.arange(0, 200)
y_data = np.random.poisson(model.eval(x=x_data, **params))

fitter = bq.Fitter(model, x=x_data, y=y_data, y_unc=np.sqrt(y_data))
fitter.params["gbe_amp"].set(value=1e4)
fitter.params["gbe_mu"].set(value=90.0)
fitter.params["gbe_sigma"].set(value=5.0)
fitter.params["gbe_ltail_ratio"].set(value=0.3)
fitter.params["gbe_ltail_slope"].set(value=0.1)
fitter.params["gbe_ltail_cutoff"].set(value=1.0)
fitter.params["gbe_rtail_ratio"].set(value=0.1)
fitter.params["gbe_rtail_slope"].set(value=-0.05)
fitter.params["gbe_rtail_cutoff"].set(value=1.0)
fitter.params["line_m"].set(value=-10.0)
fitter.params["line_b"].set(value=1e4)
fitter.fit()
print(fitter.result)
print(lmfit.fit_report(fitter.result))
fitter.custom_plot()

For some reason, the fit works fine and yields the expected result. The failure occurs on fitter.custom_plot():

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/var/folders/rb/64d5f1916m3cxxc0xqgfbbj40000gn/T/ipykernel_4253/4083626423.py in <module>
     14 print(fitter.result)
     15 print(lmfit.fit_report(fitter.result))
---> 16 fitter.custom_plot()
     17 #plt.show()

~/repos/becquerel/becquerel/core/fitting.py in custom_plot(self, title, savefname, title_fontsize, title_fontweight, residual_type, **kwargs)
   1341             # Add some more parameter details
   1342             s += "\n"
-> 1343             param_df = self.param_dataframe(sort_by_model=True)
   1344             for model_name, sdf in param_df.groupby(level="model"):
   1345                 s += model_name + "\n"

~/repos/becquerel/becquerel/core/fitting.py in param_dataframe(self, sort_by_model)
   1092         if sort_by_model:
   1093             df.set_index(
-> 1094                 pd.MultiIndex.from_tuples(
   1095                     [tuple(p.split("_")) for p in df.index], names=["model", "param"]
   1096                 ),

~/Applications/miniconda3/envs/radkit/lib/python3.8/site-packages/pandas/core/indexes/multi.py in new_meth(self_or_cls, *args, **kwargs)
    202             kwargs["names"] = kwargs.pop("name")
    203 
--> 204         return meth(self_or_cls, *args, **kwargs)
    205 
    206     return cast(F, new_meth)

~/Applications/miniconda3/envs/radkit/lib/python3.8/site-packages/pandas/core/indexes/multi.py in from_tuples(cls, tuples, sortorder, names)
    564             arrays = cast(List[Sequence[Hashable]], arrs)
    565 
--> 566         return cls.from_arrays(arrays, sortorder=sortorder, names=names)
    567 
    568     @classmethod

~/Applications/miniconda3/envs/radkit/lib/python3.8/site-packages/pandas/core/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
    491             names = [getattr(arr, "name", None) for arr in arrays]
    492 
--> 493         return cls(
    494             levels=levels,
    495             codes=codes,

~/Applications/miniconda3/envs/radkit/lib/python3.8/site-packages/pandas/core/indexes/multi.py in __new__(cls, levels, codes, sortorder, names, dtype, copy, name, verify_integrity)
    333         if names is not None:
    334             # handles name validation
--> 335             result._set_names(names)
    336 
    337         if sortorder is not None:

~/Applications/miniconda3/envs/radkit/lib/python3.8/site-packages/pandas/core/indexes/multi.py in _set_names(self, names, level, validate)
   1436                 raise ValueError("Length of names must match length of level.")
   1437             if level is None and len(names) != self.nlevels:
-> 1438                 raise ValueError(
   1439                     "Length of names must match number of levels in MultiIndex."
   1440                 )

ValueError: Length of names must match number of levels in MultiIndex.

Any ideas what's going on here?

jvavrek commented 2 years ago

Can you print df.index in param_dataframe()?

markbandstra commented 2 years ago

Looks like it's due to more than one underscore in some of the parameters, so tuple(p.split("_")) for p in df.index has different numbers of elements.

markbandstra commented 2 years ago

Maybe we need to ensure that we enforce non-empty prefixes ending in an underscore when models are instantiated, since the sort_by_model statement here requires this convention. (DeprecationWarning for now?)

micahfolsom commented 2 years ago

This will be fixed by https://github.com/lbl-anp/becquerel/pull/312 which splits the "model_paramname" string on only the first ``, instead of all of them, so it doesn't trip up on param names with underscores

jvavrek commented 2 years ago

@micahfolsom did #312 solve this? If so, please close this issue.

lbl-anp / becquerel

pandas MultiIndex error when calling Fitter.custom_plot() #317