Regression in Clad when dealing with loops

vaithak commented 4 weeks ago

I tried to create a minimal reproducer from Roofit's failing test cases:

#include "clad/Differentiator/Differentiator.h"

inline double gaussian(double x, double mean, double sigma) {
   const double arg = x - mean;
   const double sig = sigma;
   return std::exp(-0.5 * arg * arg / (sig * sig));
}

double wrapper(double* params) {
  double RooNLLVarNewResult = 0.0;
  double t24[] = {params[0],params[1],1.0};
  for(int loopIdx0 = 0; loopIdx0 < 60; loopIdx0++) {
    const double t10 = gaussian(2.0, 5.000000, params[2]);
    const double t11 = t10/3;
    const double t14 = gaussian(2.0, 2.000000, params[3]);
    const double t15 = t14/3;
    const double t18 = gaussian(2.0, 7.000000, params[4]);
    const double t19 = t18/3;
    const double t22 = gaussian(2.0, 6.000000, params[5]);
    const double t23 = t22/3;
    double t7[] = {t11,t15,t19,t23};
    double t27 = 0, t28= 0;
    for(int i_t29 = 0; i_t29 < 3; i_t29++) {
      t27 += t7[i_t29] * t24[i_t29];
      t28 += t24[i_t29];
    }
    t27 += t7[3] * (1 - t28);
    RooNLLVarNewResult += t27;
  }
  return RooNLLVarNewResult;
}

int main() {
  std::vector<double> parametersVec = {
    0.25, 0.25, 0.5, 1, 1.5, 2
  };
  double d_params[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
  auto f_dx = clad::gradient(wrapper);
  f_dx.execute(parametersVec.data(), d_params);
  std::cout << "autodiff: " << std::endl;
  std::cout << d_params[0] << std::endl;
  std::cout << d_params[1] << std::endl;
  std::cout << d_params[2] << std::endl;
  std::cout << d_params[3] << std::endl;
  std::cout << d_params[4] << std::endl;
  std::cout << d_params[5] << std::endl;

  auto numDiff = [&](int i) {
      const double eps = 1e-6;
      std::vector<double> p{parametersVec};
      p[i] = parametersVec[i] - eps;
      double nllValDown = wrapper(p.data());
      p[i] = parametersVec[i] + eps;
      double nllValUp = wrapper(p.data());
      return (nllValUp - nllValDown) / (2 * eps);
   };

   std::cout << "num diff: " << std::endl;
   std::cout << numDiff(0) << std::endl;
   std::cout << numDiff(1) << std::endl;
   std::cout << numDiff(2) << std::endl;
   std::cout << numDiff(3) << std::endl;
   std::cout << numDiff(4) << std::endl;
   std::cout << numDiff(5) << std::endl;
  return 0;

The error message on Clang setup:

../include/clad/Differentiator/Tape.h:76: clad::tape_impl::reference clad::tape_impl<double>::back() [T = double]: Assertion `_size' failed.
[1]    2223726 IOT instruction (core dumped)  ./a.out

With Cling and Roofit setup, we don't see the error for some reason but still the output is wrong compared to numerical differentiation. After doing a git-bisect, I found that this was introduced in #904.

vaithak commented 4 weeks ago

@PetroZarytskyi This also is related to #904, please take a look.

vaithak commented 4 weeks ago

I am adding another reproducer, which fails with the same errors. Both of these can be added as tests or verified somehow before merging the corresponding PR.

#include "clad/Differentiator/Differentiator.h"

/// Calculates the binomial coefficient n over k.
/// Equivalent to TMath::Binomial, but inlined.
inline double binomial(int n, int k)
{
   if (k == 0 || n == k)
      return 1;

   int k1 = std::min(k, n - k);
   int k2 = n - k1;
   double fact = k2 + 1;
   for (double i = k1; i > 1.; --i) {
      fact *= (k2 + i) / i;
   }
   return fact;
}

/// The caller needs to make sure that there is at least one coefficient.
inline double bernstein(double x, double xmin, double xmax, double *coefs, int nCoefs)
{
   double xScaled = (x - xmin) / (xmax - xmin); // rescale to [0,1]
   int degree = nCoefs - 1;                     // n+1 polys of degree n

   // in case list of arguments passed is empty
   if (degree == 0) {
      return coefs[0];
   } else if (degree == 1) {

      double a0 = coefs[0];      // c0
      double a1 = coefs[1] - a0; // c1 - c0
      return a1 * xScaled + a0;

   } else if (degree == 2) {

      double a0 = coefs[0];            // c0
      double a1 = 2 * (coefs[1] - a0); // 2 * (c1 - c0)
      double a2 = coefs[2] - a1 - a0;  // c0 - 2 * c1 + c2
      return (a2 * xScaled + a1) * xScaled + a0;
   }

   double t = xScaled;
   double s = 1. - xScaled;

   double result = coefs[0] * s;
   for (int i = 1; i < degree; i++) {
      result = (result + t * binomial(degree, i) * coefs[i]) * s;
      t *= xScaled;
   }
   result += t * coefs[degree];

   return result;
}

double wrapper(double* params) {
  double RooNLLVarNewWeightSum = 0.0;
  double RooNLLVarNewResult = 0.0;
  double t5[] = {params[0],params[1],params[2],params[3]};
  for(int loopIdx0 = 0; loopIdx0 < 69; loopIdx0++) {
    const double t7 = bernstein(2.0, 0.000000, 100.000000, t5, 4);
    const double t8 = t7/4;
    RooNLLVarNewResult += t8;
  }
  return RooNLLVarNewResult; 
}

int main() {
  std::vector<double> parametersVec = {
    0.25, 0.25, 0.5, 1, 1.5, 2
  };
  double d_params[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
  auto f_dx = clad::gradient(wrapper);
  f_dx.execute(parametersVec.data(), d_params);
  std::cout << "autodiff: " << std::endl;
  std::cout << d_params[0] << std::endl;
  std::cout << d_params[1] << std::endl;
  std::cout << d_params[2] << std::endl;
  std::cout << d_params[3] << std::endl;
  std::cout << d_params[4] << std::endl;
  std::cout << d_params[5] << std::endl;

  auto numDiff = [&](int i) {
      const double eps = 1e-6;
      std::vector<double> p{parametersVec};
      p[i] = parametersVec[i] - eps;
      double nllValDown = wrapper(p.data());
      p[i] = parametersVec[i] + eps;
      double nllValUp = wrapper(p.data());
      return (nllValUp - nllValDown) / (2 * eps);
   };

   std::cout << "num diff: " << std::endl;
   std::cout << numDiff(0) << std::endl;
   std::cout << numDiff(1) << std::endl;
   std::cout << numDiff(2) << std::endl;
   std::cout << numDiff(3) << std::endl;
   std::cout << numDiff(4) << std::endl;
   std::cout << numDiff(5) << std::endl;
  return 0;
}

guitargeek commented 3 weeks ago

Or even smaller:

#include <Math/CladDerivator.h>

double wrapper(double *params)
{
   double out = 0.0;
   for (int i = 0; i < 1; i++) {
      double arr[] = {1.};
      out += arr[0] * params[0];
   }

   return out;
}

#pragma clad ON
void wrapper_req()
{
   clad::gradient(wrapper, "params");
}
#pragma clad OFF

void Repro()
{
   std::vector<double> parametersVec = {0.5};
   std::vector<double> gradientVec(parametersVec.size());

   wrapper(parametersVec.data());
   wrapper_grad(parametersVec.data(), gradientVec.data());

   std::cout << "Clad diff:" << std::endl;
   for (std::size_t i = 0; i < parametersVec.size(); ++i) {
      std::cout << gradientVec[i] << std::endl;
   }

   auto numDiff = [&](int i) {
      const double eps = 1e-6;
      std::vector<double> p{parametersVec};
      p[i] = parametersVec[i] - eps;
      double nllValDown = wrapper(p.data());
      p[i] = parametersVec[i] + eps;
      double nllValUp = wrapper(p.data());
      return (nllValUp - nllValDown) / (2 * eps);
   };

   std::cout << "Num diff:" << std::endl;
   for (std::size_t i = 0; i < parametersVec.size(); ++i) {
      std::cout << numDiff(i) << std::endl;
   }
}

Output:

Clad diff:
1.3488e-321
Num diff:
1

guitargeek commented 3 weeks ago

Note that unfortunately, the regression is exactly in the RooFit classes that are used in the CMS Run 1 Higgs discovery!

PetroZarytskyi commented 3 weeks ago

Hi everyone. I believe the issue in the generated code is that the value of arr[0] is popped before it's used in the reverse pass:

// forward sweep (inside a loop)
    ...
    clad::push(_t3, arr[0]);
    out += clad::back(_t3) * params[0];
    ...

// reverse sweep (inside a loop)
    ...
    clad::pop(_t3);    <- the value is popped before it's used
    _d_params[0] += clad::back(_t3) * _r_d0;
    ...

The reverse sweep should be

    ...
    _d_params[0] += clad::back(_t3) * _r_d0;
    clad::pop(_t3);
    ...

I will work on this. I want to apologize that #904 caused some problems. It needed a more in-depth review.

guitargeek commented 3 weeks ago

No problem Pietro! I love it when people have the courage to simplify code and then show responsibility to fix the resulting setbacks. That's way better than someone who is afraid to break anything ;)

vgvassilev / clad

Regression in Clad when dealing with loops #927