EnzymeAD / Enzyme

High-performance automatic differentiation of LLVM and MLIR.
https://enzyme.mit.edu
Other
1.27k stars 108 forks source link

incorrect gradient for if statement #2002

Closed minansys closed 2 months ago

minansys commented 2 months ago

I had a C++ program using enzymeAd to compute the gradient and comparing with a finite difference. The results do not match.

The problem is due to the following code in void in_work(int i, int j, std::vector<std::vector> &phi, DataType& min_max)

    if (nghbr <= min_max[k])
        min_max[k] = nghbr;

If I change it to be

    if (nghbr - min_max[k] <= 0)
        min_max[k] = nghbr;

then the gradient matches with the finite difference. Here is a simplified version of the code from the original project, although this code can not reproduce the issue. However, you can get a sense of the background.

#include <iostream>
#include <vector>
#include <random>
#include <cmath>
#include <algorithm>
#include <array>
#include <assert.h>

int enzyme_dup;
int enzyme_dupnoneed;
int enzyme_out;
int enzyme_const;

template <typename return_type, typename... T>
return_type __enzyme_autodiff(void *, T...);

// Define the size of the array
const int rows = 10;
const int cols = 10;
const double epsilon = 1e-6;

#define D 1
typedef std::array<double, D> DataType;

DataType pre_work(int i, int j, std::vector<std::vector<double>> &phi)
{
    DataType min_max{};
    for (size_t k = 0; k < D; ++k)
    {
        min_max[k] = phi[i][j];
    }

    return min_max;
}

void in_work(int i, int j, std::vector<std::vector<double>> &phi, DataType& min_max)
{
    if (i < 0 || i >= rows || j < 0 || j >= cols)
        return;
    for (size_t k = 0; k < D; ++k)
    {
        auto nghbr = phi[i][j];
        if (nghbr <= min_max[k])
            min_max[k] = nghbr;
    }
}

void post_work(int i, int j, DataType& min_max, std::vector<std::vector<double>> &phi_max)
{
    for (size_t k = 0; k < D; ++k)
        phi_max[i][j] = min_max[k];
}

// Function to initialize the 2D array with random values
void initializeArray(std::vector<std::vector<double>> &phi) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis(0.0, 1.0);

    for (int i = 0; i < rows; ++i) {
        for (int j = 0; j < cols; ++j) {
            phi[i][j] = dis(gen);
        }
    }
}

// Function to compute phi_max for each element in the array
std::vector<std::vector<double>> computePhiMax(std::vector<std::vector<double>> &phi) {
    std::vector<std::vector<double>> phi_max(rows, std::vector<double>(cols, 0.0));

    for (int i = 0; i < rows; ++i) {
        for (int j = 0; j < cols; ++j) {
            auto min_max = pre_work(i, j, phi);
            for (int ii = -1; ii <= 1; ++ii) {
                for (int jj = -1; jj <= 1; ++jj) {
                    in_work(i + ii, j + jj, phi, min_max);
                }
            }
            post_work(i, j, min_max, phi_max);
        }
    }
    return phi_max;
}

// Objective function: norm of phi_max
void objectiveFunction(std::vector<std::vector<double>> &phi, double& sum) {
    auto phi_max = computePhiMax(phi);
    sum = 0.0;
    for (int i = 0; i < rows; ++i) {
        for (int j = 0; j < cols; ++j) {
            sum += phi_max[i][j] * phi_max[i][j];
        }
    }
}

// Function to compute the finite difference gradient
double finiteDifferenceGradient(std::vector<std::vector<double>> &phi) {
    for (int i = 0; i < rows; ++i)
        for (int j = 0; j < cols; ++j)
            phi[i][j] += epsilon;
    double obj_plus, obj_minus;
    objectiveFunction(phi, obj_plus);
    for (int i = 0; i < rows; ++i)
        for (int j = 0; j < cols; ++j)
            phi[i][j] -= 2.0 * epsilon;
    objectiveFunction(phi, obj_minus);
    for (int i = 0; i < rows; ++i)
        for (int j = 0; j < cols; ++j)
            phi[i][j] += epsilon;
    return (obj_plus - obj_minus) / (2 * epsilon);
}

// Function to compute the gradient using EnzymeAD
double enzymeGradient(std::vector<std::vector<double>> &phi) {
    std::vector<std::vector<double>> d_phi(phi.size(), std::vector<double>(phi[0].size(), 0.0));
    double obj = 0, d_obj = 1;
    __enzyme_autodiff<void>((void*)objectiveFunction,
                        enzyme_dup, &phi, &d_phi,
                        enzyme_dup, &obj, &d_obj);

    double grad = 0;
    for (const auto &row : d_phi)
        for (const auto &val : row)
            grad += val;
    return grad;
}

int main() {
    std::vector<std::vector<double>> phi(rows, std::vector<double>(cols));
    initializeArray(phi);
    auto ad_grad = enzymeGradient(phi);
    auto fd_grad = finiteDifferenceGradient(phi);

    // Compare gradients
    std::cout << "FD " << fd_grad << "\nadjoint "<< ad_grad << "\nratio " << ad_grad / fd_grad << std::endl;
    return 0;
}
wsmoses commented 2 months ago

I can't reproduce here, it works for me on llvm12 at O1 and latest main. what did you use to compile

wmoses@beast:~/git/Enzyme/enzyme/build12 (utload) $ /home/wmoses/llvms/llvm12/buildD/bin/clang++ at.cpp -fplugin=Enzyme/ClangEnzyme-12.so $CXXFLAGS -O1 && ./a.out
warning: didn't implement memmove, using memcpy as fallback which can result in errors
freeing without malloc %"class.std::vector.0"* %__p
freeing without malloc %"class.std::vector.0"* %__p
FD 33.6251
adjoint 33.6251
ratio 1
minansys commented 2 months ago

Hi @wsmoses,

Thanks for taking a look at this. The issue was only reproducible in my original project in O0 and O3. I was using clang15 with the latest build. I can not reproduce it in this striped case, although the key routine is pretty much similar. In the original code, it is not a 2D matrix, it is 1D array in unstructured mesh. It is hard to mimic that here. I thought you might quickly identify the issue once you see the pattern here. I will close this defect since I can not make a reproducible case. Thanks for the help!