scikit-hep / hepstats

Statistics tools and utilities.
https://scikit-hep.org/hepstats/
69 stars 13 forks source link

Frequentist calculator does not work when loss has constraints. #101

Closed secholak closed 1 year ago

secholak commented 1 year ago

UL calculation crashes when trying to use the FrequentistCalculator with a loss that has constraints. To reproduce the issue I modified your example notebook (https://github.com/scikit-hep/hepstats/blob/master/notebooks/hypotests/upperlimit_freq_zfit.ipynb) by adding a dummy constraint.

import matplotlib.pyplot as plt
import numpy as np
import zfit

from zfit.loss import ExtendedUnbinnedNLL
from zfit.minimize import Minuit

from hepstats.hypotests import UpperLimit
from hepstats.hypotests.calculators import FrequentistCalculator
from hepstats.hypotests.parameters import POI, POIarray

import tensorflow as tf

bounds = (0.1, 3.0)

# Data and signal

np.random.seed(0)
tau = -2.0
beta = -1/tau
data = np.random.exponential(beta, 300)
peak = np.random.normal(1.2, 0.1, 10)
data = np.concatenate((data,peak))
data = data[(data > bounds[0]) & (data < bounds[1])]

obs = zfit.Space('x', limits=bounds)

lambda_ = zfit.Parameter("lambda",-2.0, -4.0, -1.0)
Nsig = zfit.Parameter("Nsig", 1., -20., len(data))
Nbkg = zfit.Parameter("Nbkg", len(data), 0., len(data)*1.1)

signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
tot_model = zfit.pdf.SumPDF([signal, background])

# Create the negative log likelihood
data_ = zfit.data.Data.from_numpy(obs=obs, array=data)

# Add a dummy loss
constraint = zfit.constraint.GaussianConstraint(Nbkg, observation=251.57, uncertainty=100)
nll = ExtendedUnbinnedNLL(model=tot_model, data=data_, constraints = [constraint]) 

# Instantiate a minuit minimizer
minimizer = Minuit()

# minimisation of the loss function
minimum = minimizer.minimize(loss=nll)
minimum.hesse()
print(minimum)

# instantation of the calculator
calculator = FrequentistCalculator(nll, minimizer, ntoysnull=5000, ntoysalt=5000)
# calculator = FrequentistCalculator.from_yaml("toys/upperlimit_freq_zfit_toys.yml", nll, minimizer, ntoysnull=5000, ntoysalt=5000)
calculator.bestfit = minimum #optionnal

# parameter of interest of the null hypothesis
poinull = POIarray(Nsig, np.linspace(0.0, 25, 15))
# parameter of interest of the alternative hypothesis
poialt = POI(Nsig, 0.)

# instantation of the discovery test
ul = UpperLimit(calculator, poinull, poialt)

ul.upperlimit(alpha=0.05, CLs=True);

And the traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[31], line 1
----> 1 ul.upperlimit(alpha=0.05, CLs=True);

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/core/upperlimit.py:159, in UpperLimit.upperlimit(self, alpha, CLs, printlevel)
    153 to_interpolate = [observed_key] + [
    154     f"expected{i}" for i in ["", "_p1", "_m1", "_p2", "_m2"]
    155 ]
    157 limits: dict = {}
--> 159 all_pvalues = self.pvalues(CLs)
    160 for k in to_interpolate:
    161     pvalues = all_pvalues[k]

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/core/upperlimit.py:96, in UpperLimit.pvalues(self, CLs)
     83 """
     84 Returns p-values scanned for the values of the parameters of interest
     85 in the null hypothesis.
   (...)
     92     Dictionary of p-values for CLsb, CLs, expected (+/- sigma bands).
     93 """
     94 pvalue_func = self.calculator.pvalue
---> 96 pnull, palt = pvalue_func(
     97     poinull=self.poinull, poialt=self.poialt, qtilde=self.qtilde, onesided=True
     98 )
    100 pvalues = {"clsb": pnull, "clb": palt}
    102 sigmas = [0.0, 1.0, 2.0, -1.0, -2.0]

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/calculators/basecalculator.py:153, in BaseCalculator.pvalue(self, poinull, poialt, qtilde, onesided, onesideddiscovery)
    150     self.check_pois(poialt)
    151     self.check_pois_compatibility(poinull, poialt)
--> 153 return self._pvalue_(
    154     poinull=poinull,
    155     poialt=poialt,
    156     qtilde=qtilde,
    157     onesided=onesided,
    158     onesideddiscovery=onesideddiscovery,
    159 )

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/calculators/frequentist_calculator.py:185, in FrequentistCalculator._pvalue_(self, poinull, poialt, qtilde, onesided, onesideddiscovery)
    182     p = len(qdist[qdist >= qobs]) / len(qdist)
    183     return p
--> 185 qnulldist = self.qnull(
    186     poinull=poinull,
    187     poialt=poialt,
    188     onesided=onesided,
    189     onesideddiscovery=onesideddiscovery,
    190     qtilde=qtilde,
    191 )
    192 pnull = np.empty(len(poinull))
    193 for i, p in enumerate(poinull):

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/calculators/frequentist_calculator.py:86, in FrequentistCalculator.qnull(self, poinull, poialt, onesided, onesideddiscovery, qtilde)
     59 def qnull(
     60     self,
     61     poinull: POI | POIarray,
   (...)
     65     qtilde: bool = False,
     66 ):
     67     """Computes null hypothesis values of the :math:`\\Delta` log-likelihood test statistic.
     68 
     69     Args:
   (...)
     84         >>> q = calc.qnull(poinull, poialt)
     85     """
---> 86     toysresults = self.get_toys_null(poinull, poialt, qtilde)
     87     ret = {}
     89     for p in poinull:

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/calculators/basecalculator.py:513, in ToysCalculator.get_toys_null(self, poigen, poieval, qtilde)
    492 def get_toys_null(
    493     self,
    494     poigen: POI | POIarray,
    495     poieval: POI | POIarray | None = None,
    496     qtilde: bool = False,
    497 ) -> dict[POI, ToyResult]:
    498     """
    499     Return the generated toys for the null hypothesis.
    500 
   (...)
    511         ...     calc.get_toys_alt(p, poieval=poialt)
    512     """
--> 513     return self._get_toys(
    514         poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="null"
    515     )

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/calculators/basecalculator.py:486, in ToysCalculator._get_toys(self, poigen, poieval, qtilde, hypothesis)
    483     if ntogen > 0:
    484         print(f"Generating {hypothesis} hypothesis toys for {p}.")
--> 486         self.generate_and_fit_toys(ntoys=ntogen, poigen=p, poieval=poieval_p)
    488     ret[p] = self.get_toyresult(p, poieval_p)
    490 return ret

File /srv/conda/envs/notebook/lib/python3.10/site-packages/hepstats/hypotests/toyutils.py:286, in ToysManager.generate_and_fit_toys(self, ntoys, poigen, poieval)
    283     param_dict = next(samples)
    285 with ExitStack() as stack:
--> 286     for param, value in param_dict:
    287         stack.enter_context(param.set_value(value))
    289     for minimize_trial in range(2):

File /srv/conda/envs/notebook/lib/python3.10/site-packages/tensorflow/python/ops/variables.py:1123, in Variable.__iter__(self)
   1121 def __iter__(self):
   1122   """When executing eagerly, iterates over the value of the variable."""
-> 1123   return iter(self.read_value())

File /srv/conda/envs/notebook/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:583, in Tensor.__iter__(self)
    581   raise TypeError("Cannot iterate over a tensor with unknown shape.")
    582 if not shape:
--> 583   raise TypeError("Cannot iterate over a scalar tensor.")
    584 if shape[0] is None:
    585   raise TypeError(
    586       "Cannot iterate over a tensor with unknown first dimension.")

TypeError: Cannot iterate over a scalar tensor.
jonas-eschle commented 1 year ago

Thanks a lot for reporting this! We'll look into it in more detail, to me it seems at first glance that param_dict is something weird, i.e. a tensor. But actually, we could probably replace this function anyways with the zfit builtin zfit.param.set_values(...)

secholak commented 1 year ago

This fix works for me: https://github.com/scikit-hep/hepstats/pull/102/commits/4c97092c98c4488dfadda353faa834749ce4bdf1

jonas-eschle commented 1 year ago

Fixed it in #103 , I've added a couple of tests and more. Many thanks again for finding and solving it! As it is the last version to support Python 3.7 (see also #99), I'll make a fix asap with 0.6.1

secholak commented 1 year ago

Thanks a lot for a quick fix!