JuliaHubOSS / llvm-cbe

resurrected LLVM "C Backend", with improvements
Other
846 stars 146 forks source link

Different results from C++ code and LLVM-CBE generated C code #81

Closed robiwano closed 3 years ago

robiwano commented 3 years ago

A simple IIR filter (biquad) generates different results with clang++-10 generated code and clang++-10 -> llvm-cbe -> clang-10.

Code (main.cpp):

#include <stdio.h>

#include <array>

static const std::array<float, 4> sections[] = {
    {-1.86673462f, 0.975236177f, -0.00022248477f, 6.41448132e-05f},
    {-1.88822913f, 0.973825812f, 0.00151690713f, -0.00169034698f},
    {-1.93050146f, 0.982603073f, -0.000351806346f, 0.000179776893f},
    {-1.94676137f, 0.984092355f, 0.000357935729f, 0.000223968978f},
    {-1.93602753f, 0.979216754f, 0.00245991652f, -0.00294184568f},
    {-1.98760641f, 0.992139757f, -0.00058901991f, 0.000605413283f},
    {-1.99211085f, 0.9958269f, 0.000262855436f, -0.000232513732f},
    {-1.99258792f, 0.995707214f, -0.000792235543f, 0.000826240634f},
    {-1.99429774f, 0.996919811f, -0.00105583353f, 0.00102336903f},
    {-1.99408424f, 0.996321917f, -0.000847439223f, 0.000741446565f},
    {-1.9954437f, 0.997491956f, -0.000817941211f, 0.000816155691f},
    {-1.99567151f, 0.997142434f, -0.000302375411f, 0.000291527656f},
    {-1.99629068f, 0.99747014f, 0.000228200079f, -0.000246359385f},
    {-1.99680257f, 0.997838438f, 0.00041177214f, -0.000401756348f},
    {-1.99673605f, 0.997656465f, 0.000495632645f, -0.000484255812f},
    {-1.99756479f, 0.998328328f, -0.000959537574f, 0.000969767047f},
    {-1.99775565f, 0.998304605f, -0.00038473474f, 0.000370821945f},
    {-1.99710584f, 0.997515678f, 0.000314344041f, -0.000341897772f},
    {-1.99818242f, 0.998476923f, -0.000256100058f, 0.000256437314f},
    {-1.99865246f, 0.998812675f, -9.16128101e-06f, 1.1989966e-05f},
    {-1.99581778f, 0.995909631f, -0.00209832611f, 0.00207207398f},
    {-1.999017f, 0.999110579f, -0.00029892262f, 0.000296934333f},
    {-1.99951303f, 0.999579072f, 5.49166616e-05f, -5.480459e-05f},
    {-1.99791086f, 0.997960806f, 0.00234198733f, -0.00234321039f},
    {-1.99911726f, 0.999157608f, 0.000525008421f, -0.000528873468f},
    {-1.99845409f, 0.998466671f, -0.00187406479f, 0.00187400985f},
    {-1.9995929f, 0.999602377f, 4.38543648e-05f, -4.33909227e-05f},
    {-1.99961364f, 0.999625087f, 3.97789627e-05f, -3.91256253e-05f},
    {-1.99978626f, 0.999800026f, 2.06127352e-05f, -2.05237902e-05f},
    {-1.99673605f, 0.997656465f, 0.000495632645f, -0.000484255812f},
};

static const unsigned sections_count = sizeof(sections) / sizeof(sections[0]);

namespace
{
    struct biquad_ref {
        std::array<float, 4> m_coeff;
        float m_state[2];

        biquad_ref() : m_coeff(), m_state() { clear(); }
        biquad_ref(const std::array<float, 4>& coeff)
            : m_coeff(coeff), m_state()
        {
            clear();
        }
        void init(const std::array<float, 4>& coeff) { m_coeff = coeff; }
        void clear()
        {  //
            m_state[0] = m_state[1] = 0.f;
        }
        inline float filter(float in)
        {
            float out  = m_coeff[0] * in + m_state[0];
            m_state[0] = m_coeff[1] * in - m_coeff[2] * out - m_state[1];
            m_state[1] = m_coeff[3] * out;
            return out;
        }
    };

    struct piir_ref {
        using bq_type = biquad_ref;
        bq_type* m_bqs;
        int m_num_bqs;

        piir_ref() : m_bqs(nullptr), m_num_bqs(0) {}
        ~piir_ref() { free(m_bqs); }

        inline float filter(float in)
        {
            float acc = 0.f;
            for (int i = 0; i < m_num_bqs; ++i) {
                acc += m_bqs[i].filter(in);
            }
            return acc;
        }
        void filter(const float* in, float* out, int length)
        {
            for (int i = 0; i < length; ++i) {
                out[i] = filter(in[i]);
            }
        }
    };
}  // namespace

int main(void)
{
    piir_ref filter_ref;
    filter_ref.m_bqs = (biquad_ref*)malloc(sections_count * sizeof(biquad_ref));
    filter_ref.m_num_bqs  = sections_count;
    const unsigned length = 4096;

    for (unsigned i = 0; i < sections_count; ++i) {
        const std::array<float, 4>& section = sections[i];
        filter_ref.m_bqs[i].init(
            {section[2], section[3], section[0], section[1]});
        filter_ref.m_bqs[i].clear();
    }

    float* src_dest_ref = (float*)malloc(length * sizeof(float));
    for (unsigned i = 0; i < length; ++i) {
        src_dest_ref[i] = 1.0f;
    }

    // Run floating point filter
    filter_ref.filter(src_dest_ref, src_dest_ref, length);

    // Show result
    for (unsigned i = 0; i < length; ++i) {
        printf("[%4u]: %f\n", i, src_dest_ref[i]);
    }

    free(src_dest_ref);
    return 0;
}

Excerpt from C++ result:

> clang++-10 -o test_cpp main.cpp
> ./test_cpp
...
[4089]: -0.432737
[4090]: -0.432717
[4091]: -0.432696
[4092]: -0.432674
[4093]: -0.432651
[4094]: -0.432627
[4095]: -0.432601

Excerpt from C result:

> clang++-10 -S -emit-llvm -o main.ll main.cpp && llvm-cbe -o main.c main.ll && clang-10 -o test_c main.c
> ./test_c
...
[4089]: -0.437412
[4090]: -0.437410
[4091]: -0.437406
[4092]: -0.437401
[4093]: -0.437395
[4094]: -0.437387
[4095]: -0.437379

Expectation is that these runs should yield identical results. But as seen, the difference is quite large. What can be the cause of this?

This is on Ubuntu 18.04, x86_64.

robiwano commented 3 years ago

To add "insult to injury", I did further tests:

  1. Compile C code to native with clang
  2. Compile C code to LLVM IR with clang -> LLVM IR to ASM through llc > ASM to binary with clang
  3. Compile C code to LLVM IR with clang -> LLVM IR to C through llvm-cbe -> C to binary with clang

C code (main.c):

#include <stdio.h>

#include <malloc.h>

#define FloatType float

typedef struct iir_section {
    FloatType a1;
    FloatType a2;
    FloatType b0;
    FloatType b1;
} iir_section;

static const iir_section sections[] = {
    {-1.86673462f, 0.975236177f, -0.00022248477f, 6.41448132e-05f},
    {-1.88822913f, 0.973825812f, 0.00151690713f, -0.00169034698f},
    {-1.93050146f, 0.982603073f, -0.000351806346f, 0.000179776893f},
    {-1.94676137f, 0.984092355f, 0.000357935729f, 0.000223968978f},
    {-1.93602753f, 0.979216754f, 0.00245991652f, -0.00294184568f},
    {-1.98760641f, 0.992139757f, -0.00058901991f, 0.000605413283f},
    {-1.99211085f, 0.9958269f, 0.000262855436f, -0.000232513732f},
    {-1.99258792f, 0.995707214f, -0.000792235543f, 0.000826240634f},
    {-1.99429774f, 0.996919811f, -0.00105583353f, 0.00102336903f},
    {-1.99408424f, 0.996321917f, -0.000847439223f, 0.000741446565f},
    {-1.9954437f, 0.997491956f, -0.000817941211f, 0.000816155691f},
    {-1.99567151f, 0.997142434f, -0.000302375411f, 0.000291527656f},
    {-1.99629068f, 0.99747014f, 0.000228200079f, -0.000246359385f},
    {-1.99680257f, 0.997838438f, 0.00041177214f, -0.000401756348f},
    {-1.99673605f, 0.997656465f, 0.000495632645f, -0.000484255812f},
    {-1.99756479f, 0.998328328f, -0.000959537574f, 0.000969767047f},
    {-1.99775565f, 0.998304605f, -0.00038473474f, 0.000370821945f},
    {-1.99710584f, 0.997515678f, 0.000314344041f, -0.000341897772f},
    {-1.99818242f, 0.998476923f, -0.000256100058f, 0.000256437314f},
    {-1.99865246f, 0.998812675f, -9.16128101e-06f, 1.1989966e-05f},
    {-1.99581778f, 0.995909631f, -0.00209832611f, 0.00207207398f},
    {-1.999017f, 0.999110579f, -0.00029892262f, 0.000296934333f},
    {-1.99951303f, 0.999579072f, 5.49166616e-05f, -5.480459e-05f},
    {-1.99791086f, 0.997960806f, 0.00234198733f, -0.00234321039f},
    {-1.99911726f, 0.999157608f, 0.000525008421f, -0.000528873468f},
    {-1.99845409f, 0.998466671f, -0.00187406479f, 0.00187400985f},
    {-1.9995929f, 0.999602377f, 4.38543648e-05f, -4.33909227e-05f},
    {-1.99961364f, 0.999625087f, 3.97789627e-05f, -3.91256253e-05f},
    {-1.99978626f, 0.999800026f, 2.06127352e-05f, -2.05237902e-05f},
    {-1.99673605f, 0.997656465f, 0.000495632645f, -0.000484255812f},
};

static const unsigned sections_count = sizeof(sections) / sizeof(sections[0]);

typedef struct biquad {
    iir_section m_coeff;
    FloatType m_state[2];
} biquad;

void biquad_init(biquad* me, const iir_section* section)
{
    me->m_coeff = *section;
}

void biquad_clear(biquad* me)
{
    me->m_state[0] = me->m_state[1] = (FloatType)0;
}

FloatType biquad_filter(biquad* me, FloatType in)
{
    FloatType out = me->m_coeff.b0 * in + me->m_state[0];
    me->m_state[0] =
        me->m_coeff.b1 * in - me->m_coeff.a1 * out - me->m_state[1];
    me->m_state[1] = me->m_coeff.a2 * out;
    return out;
}

typedef struct piir_filter {
    biquad* m_bqs;
    int m_num_bqs;
} piir_filter;

void piir_filter_destroy(piir_filter* me) { free(me->m_bqs); }

FloatType piir_filter_filter_single(piir_filter* me, FloatType in)
{
    FloatType acc = (FloatType)0;
    for (int i = 0; i < me->m_num_bqs; ++i) {
        acc += biquad_filter(me->m_bqs + i, in);
    }
    return acc;
}

void piir_filter_filter_array(piir_filter* me,
                              const FloatType* in,
                              FloatType* out,
                              int length)
{
    for (int i = 0; i < length; ++i) {
        out[i] = piir_filter_filter_single(me, in[i]);
    }
}

int main(void)
{
    piir_filter filter_ref;
    filter_ref.m_bqs      = (biquad*)malloc(sections_count * sizeof(biquad));
    filter_ref.m_num_bqs  = sections_count;
    const unsigned length = 4096;

    for (unsigned i = 0; i < sections_count; ++i) {
        biquad_init(filter_ref.m_bqs + i, sections + i);
        biquad_clear(filter_ref.m_bqs + i);
    }

    float* src_dest_ref = (float*)malloc(length * sizeof(float));
    for (unsigned i = 0; i < length; ++i) {
        src_dest_ref[i] = 1.0f;
    }

    // Run floating point filter
    piir_filter_filter_array(&filter_ref, src_dest_ref, src_dest_ref, length);

    // Show result
    for (unsigned i = 0; i < length; ++i) {
        printf("[%4u]: %f\n", i, src_dest_ref[i]);
    }

    piir_filter_destroy(&filter_ref);
    free(src_dest_ref);
    return 0;
}

Result 1:

> clang-10 -o test_c main.c
> ./test_c
...
[4089]: -0.432737
[4090]: -0.432717
[4091]: -0.432696
[4092]: -0.432674
[4093]: -0.432651
[4094]: -0.432627
[4095]: -0.432601

Result 2:

> clang-10 -S -emit-llvm -o main.ll main.c && llc -o main.asm main.ll && clang-10 -o test_llc main.asm
> ./test_llc
...
[4089]: -0.432737
[4090]: -0.432717
[4091]: -0.432696
[4092]: -0.432674
[4093]: -0.432651
[4094]: -0.432627
[4095]: -0.432601

Result 3:

> clang-10 -S -emit-llvm -o main.ll main.c && llvm-cbe -o main_cbe.c main.ll && clang-10 -o test_cbe main_cbe.c
> ./test_cbe
...
[4089]: -0.437412
[4090]: -0.437410
[4091]: -0.437406
[4092]: -0.437401
[4093]: -0.437395
[4094]: -0.437387
[4095]: -0.437379

Something goes awry with the llvm-cbe test.

robiwano commented 3 years ago

Ok, found the culprit. The coefficients in sections get truncated when using llvm-cbe:

Result without llvm-cbe:

Section [ 0]: -1.8667346239, 0.9752361774, -0.0002224848, 0.0000641448
Section [ 1]: -1.8882291317, 0.9738258123, 0.0015169071, -0.0016903470
Section [ 2]: -1.9305014610, 0.9826030731, -0.0003518063, 0.0001797769
Section [ 3]: -1.9467613697, 0.9840923548, 0.0003579357, 0.0002239690
Section [ 4]: -1.9360275269, 0.9792167544, 0.0024599165, -0.0029418457
Section [ 5]: -1.9876064062, 0.9921397567, -0.0005890199, 0.0006054133
Section [ 6]: -1.9921108484, 0.9958269000, 0.0002628554, -0.0002325137
Section [ 7]: -1.9925879240, 0.9957072139, -0.0007922355, 0.0008262406
Section [ 8]: -1.9942977428, 0.9969198108, -0.0010558335, 0.0010233690
Section [ 9]: -1.9940842390, 0.9963219166, -0.0008474392, 0.0007414466
Section [10]: -1.9954437017, 0.9974919558, -0.0008179412, 0.0008161557
Section [11]: -1.9956715107, 0.9971424341, -0.0003023754, 0.0002915277
Section [12]: -1.9962906837, 0.9974701405, 0.0002282001, -0.0002463594
Section [13]: -1.9968025684, 0.9978384376, 0.0004117721, -0.0004017563
Section [14]: -1.9967360497, 0.9976564646, 0.0004956326, -0.0004842558
Section [15]: -1.9975647926, 0.9983283281, -0.0009595376, 0.0009697670
Section [16]: -1.9977556467, 0.9983046055, -0.0003847347, 0.0003708219
Section [17]: -1.9971058369, 0.9975156784, 0.0003143440, -0.0003418978
Section [18]: -1.9981824160, 0.9984769225, -0.0002561001, 0.0002564373
Section [19]: -1.9986524582, 0.9988126755, -0.0000091613, 0.0000119900
Section [20]: -1.9958177805, 0.9959096313, -0.0020983261, 0.0020720740
Section [21]: -1.9990170002, 0.9991105795, -0.0002989226, 0.0002969343
Section [22]: -1.9995130301, 0.9995790720, 0.0000549167, -0.0000548046
Section [23]: -1.9979108572, 0.9979608059, 0.0023419873, -0.0023432104
Section [24]: -1.9991172552, 0.9991576076, 0.0005250084, -0.0005288735
Section [25]: -1.9984540939, 0.9984666705, -0.0018740648, 0.0018740098
Section [26]: -1.9995929003, 0.9996023774, 0.0000438544, -0.0000433909
Section [27]: -1.9996136427, 0.9996250868, 0.0000397790, -0.0000391256
Section [28]: -1.9997862577, 0.9998000264, 0.0000206127, -0.0000205238
Section [29]: -1.9967360497, 0.9976564646, 0.0004956326, -0.0004842558

Result with llvm-cbe:

Section [ 0]: -1.8667350000, 0.9752362000, -0.0002224848, 0.0000641448
Section [ 1]: -1.8882290000, 0.9738258000, 0.0015169070, -0.0016903470
Section [ 2]: -1.9305010000, 0.9826031000, -0.0003518063, 0.0001797769
Section [ 3]: -1.9467610000, 0.9840924000, 0.0003579357, 0.0002239690
Section [ 4]: -1.9360280000, 0.9792168000, 0.0024599170, -0.0029418460
Section [ 5]: -1.9876060000, 0.9921398000, -0.0005890199, 0.0006054133
Section [ 6]: -1.9921110000, 0.9958269000, 0.0002628554, -0.0002325137
Section [ 7]: -1.9925880000, 0.9957072000, -0.0007922355, 0.0008262406
Section [ 8]: -1.9942980000, 0.9969198000, -0.0010558340, 0.0010233690
Section [ 9]: -1.9940840000, 0.9963219000, -0.0008474392, 0.0007414466
Section [10]: -1.9954440000, 0.9974920000, -0.0008179412, 0.0008161557
Section [11]: -1.9956720000, 0.9971424000, -0.0003023754, 0.0002915277
Section [12]: -1.9962910000, 0.9974701000, 0.0002282001, -0.0002463594
Section [13]: -1.9968030000, 0.9978384000, 0.0004117721, -0.0004017563
Section [14]: -1.9967360000, 0.9976565000, 0.0004956326, -0.0004842558
Section [15]: -1.9975650000, 0.9983283000, -0.0009595376, 0.0009697670
Section [16]: -1.9977560000, 0.9983046000, -0.0003847347, 0.0003708219
Section [17]: -1.9971060000, 0.9975157000, 0.0003143440, -0.0003418978
Section [18]: -1.9981820000, 0.9984769000, -0.0002561001, 0.0002564373
Section [19]: -1.9986520000, 0.9988127000, -0.0000091613, 0.0000119900
Section [20]: -1.9958180000, 0.9959096000, -0.0020983260, 0.0020720740
Section [21]: -1.9990170000, 0.9991106000, -0.0002989226, 0.0002969343
Section [22]: -1.9995130000, 0.9995791000, 0.0000549167, -0.0000548046
Section [23]: -1.9979110000, 0.9979608000, 0.0023419870, -0.0023432100
Section [24]: -1.9991170000, 0.9991576000, 0.0005250084, -0.0005288735
Section [25]: -1.9984540000, 0.9984667000, -0.0018740650, 0.0018740100
Section [26]: -1.9995930000, 0.9996024000, 0.0000438544, -0.0000433909
Section [27]: -1.9996140000, 0.9996251000, 0.0000397790, -0.0000391256
Section [28]: -1.9997860000, 0.9998000000, 0.0000206127, -0.0000205238
Section [29]: -1.9967360000, 0.9976565000, 0.0004956326, -0.0004842558

Note: In this case the type was double and not float. Still there is a truncation.

robiwano commented 3 years ago

Ok, so the problem is how llvm-cbe choses to print out float constants. I'm closing this issue and setting up a clean one.