boostorg / histogram

Fast multi-dimensional generalized histogram with convenient interface for C++14
Boost Software License 1.0
313 stars 74 forks source link

Improve numerical accuracy of circular axis #380

Open HDembinski opened 1 year ago

HDembinski commented 1 year ago

The if (options_type::test(option::circular)) inside index_type index(value_type x) and value_type value(real_index_type i) reminds me of when I was dealing with periodic argument reduction in a simulator+monitor program (every ms PC sends sin(A*t+B) and receives actual value to&from a chip through UDP, t can be as large as several days). There should be some high precision functions in boost.core/utility/utilities/tool/tools/toolbox/... and we just need to find them. https://en.cppreference.com/w/cpp/numeric/lerp https://stackoverflow.com/questions/64058564/single-precision-argument-reduction-for-trigonometric-functions-in-c

{
    // using A = axis::variable<double, axis::null_type, op::circular_t>; // using double solves the errors
    using A = axis::variable<float, axis::null_type, op::circular_t>;
    auto a = A({
      1.,
      1e+8,
      2e+8,
    });
    BOOST_TEST_EQ(a.index(1e8), 1);
    BOOST_TEST_EQ(a.index(2e8), 0); // test 'a.index(2e8) == 0' ('-1' == '0') failed
    BOOST_TEST_EQ(a.index(4e8), 0); // test 'a.index(4e8) == 0' ('-1' == '0') failed
}
{
    // using A = axis::variable<double, axis::null_type, op::circular_t>; // using double solves the errors
    using A = axis::variable<float, axis::null_type, op::circular_t>;
    auto a = A({
      -2e+8,
      -1e+8,
      -1.,
    });
    BOOST_TEST_EQ(a.index(-1e8), 1);
    BOOST_TEST_EQ(a.index(-2e8), 0);
    BOOST_TEST_EQ(a.index(-4e8), 1); // test 'a.index(-4e8) == 1' ('0' == '1') failed
}
return boost::report_errors();

Originally posted by @jhcarl0814 in https://github.com/boostorg/histogram/issues/372#issuecomment-1364386000

jhcarl0814 commented 1 year ago

The implementation below only ensures correctness (not considering inf and nan), can be used to verify more efficient solutions (if there are any) in the future.

turn a floating point into an integer of /* the leading 1 is implicit */ 1 + matissa + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent) = 1 + (std::numeric_limits<T>::digits - 1) + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent) bits (--std=gnu++14 -O2 -Wall -pedantic -pthread -lquadmath) (-std=gnu++14 and -lquadmath are only for boost::multiprecision::float128 demonstration).

the integer = the floating point 2^-(std::numeric_limits<T>::min_exponent-1) 2^(std::numeric_limits<T>::digits - 1) (so the decimal point is moved to the right so it can be an integer)

actual operations used:

  1. (similar to frexp) temp = the floating point * 2^(an integer necessary to make temp falls in [1, 2)) (exponent is changed);
  2. temp = temp * 2^(std::numeric_limits<T>::digits - 1), temp is guaranteed to be an integer now, convert it to an integer (mantissa's decimal point moved to the right);
  3. temp = temp * 2^-(std::numeric_limits<T>::min_exponent - 1) (exponent is changed);
  4. temp = temp / 2^(the integer in step1) (exponent is changed) (now exponent (= original exponent - (std::numeric_limits<T>::min_exponent-1) + (std::numeric_limits<T>::digits - 1)) is guaranteed to be non-negative);
#include<limits>
#include<iostream>
#include<iomanip>
#include<vector>
#include<boost/core/bit.hpp>
#include<boost/multiprecision/integer.hpp>
#include<boost/multiprecision/float128.hpp>
#include<cmath>

template<typename T>
struct signprint_t // https://stackoverflow.com/a/7373921/8343353
{
    T n;
    signprint_t(T m) : n(m) { }
    friend std::ostream & operator<<(std::ostream & o, const signprint_t & s)
    {
        if (s.n < 0) return o << "-" << -s.n;
        return o << s.n;
    }
};
template<typename T>
auto signprint(T v)
{
    return signprint_t<T>(v);
}

template<typename T>
T arithmetic_left_shift(T v,int amount)
{
    if(v<0)
    {
        if(amount<0)
            return -((-v)>>-amount);
        else
            return -((-v)<<amount);
    }
    else
    {
        if(amount<0)
            return v>>-amount;
        else
            return v<<amount;
    }
}

template<typename T>
T arithmetic_right_shift(T v,int amount)
{
    if(v<0)
    {
        if(amount<0)
            return -((-v)<<-amount);
        else
            return -((-v)>>amount);
    }
    else
    {
        if(amount<0)
            return v<<-amount;
        else
            return v>>amount;
    }
}

template<typename T>
using integer_working_storage_t = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
    (1 + (std::numeric_limits<T>::digits - 1) + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent)),
    (1 + (std::numeric_limits<T>::digits - 1) + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent)),
    boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void> >;

template<typename T>
void test()
{
    static_assert(std::numeric_limits<T>::radix==2,"");
    std::cout << "sizeof: " << std::hexfloat << sizeof(T) << '\n';
    std::cout << "digits: "<< std::hexfloat << std::numeric_limits<T>::digits << '\n';
    std::cout << "denorm_min: " << std::hexfloat << std::numeric_limits<T>::denorm_min() << '\n';
    std::cout << "min: " << std::hexfloat << std::numeric_limits<T>::min() << '\n';
    std::cout << "min_exponent: " << std::hexfloat << std::numeric_limits<T>::min_exponent - 1 << '\n';
    std::cout << "max: " << std::hexfloat << std::numeric_limits<T>::max() << '\n';
    std::cout << "max_exponent: " << std::hexfloat << std::numeric_limits<T>::max_exponent - 1 << '\n';
    std::cout << "sign: 1 bit, mantissa: " << (std::numeric_limits<T>::digits - 1) << " bits, exponent: " << boost::core::bit_width(static_cast<unsigned int>(1/* all zero */ + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent + 1) + 1/* all one */) - 1) << '\n';
    std::cout << "integer working storage: " << (1 + (std::numeric_limits<T>::digits - 1) + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent)) << " bits"<<'\n';
    std::cout<<'\n';

    std::vector<T> list={-(T)0, -std::numeric_limits<T>::denorm_min(), -std::numeric_limits<T>::min(), -(T)1, -(T)1-std::numeric_limits<T>::epsilon(), -(T)10, -std::numeric_limits<T>::max(),
        (T)0, std::numeric_limits<T>::denorm_min(), std::numeric_limits<T>::min(), (T)1, (T)1+std::numeric_limits<T>::epsilon(), (T)10, std::numeric_limits<T>::max()};
    for(T v:list)
    {
        std::cout <<std::hexfloat<<v <<'\n';

        using std::logb;
        using std::scalbn;
        int e = (v == 0) ? 0 : (int)(logb(v)), e_temp = e;
        T m = scalbn(v, -e); // [1, 2)
        std::cout<<"move to [1, 2): " <<std::hexfloat<<m<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)" <<'\n';

        e -= (std::numeric_limits<T>::digits - 1);
        m = scalbn(m, (std::numeric_limits<T>::digits - 1)); // integer
        integer_working_storage_t<T> im(m);
        std::cout<<"change mantissa to integer: " <<std::hexfloat<<m<<" ( = "<<std::hex<<std::showbase<<signprint(im)<<")"<<" ( * 2^"<<std::dec<<std::noshowbase<< e <<" = origin)"<<'\n';

        e += (std::numeric_limits<T>::min_exponent - 1);
        im=arithmetic_right_shift(im,(std::numeric_limits<T>::min_exponent - 1));
        std::cout<<"change exponent to non-negative: " <<std::hex<<std::showbase<<signprint(im)<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)"<<'\n';

        e += -e_temp;
        im=arithmetic_right_shift(im,-e_temp);
        std::cout<<"change exponent to (-min_exponent+mantissa): " <<std::hex<<std::showbase<<signprint(im)<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)"<<'\n';

        std::cout<<'\n';
    }
}

int main()
{
    test<float>();
    test<double>();
    test<boost::multiprecision::float128>();
}

turn a floating point value into an integer, and then use integer arithmetic to get correct results.

#include<limits>
#include<iostream>
#include<iomanip>
#include<vector>
// #include<boost/core/bit.hpp>
#include<boost/multiprecision/integer.hpp>
#include<boost/multiprecision/float128.hpp>
#include<cmath>
#include<algorithm>

template<typename T>
struct signprint_t // https://stackoverflow.com/a/7373921/8343353
{
    T n;
    signprint_t(T m) : n(m) { }
    friend std::ostream & operator<<(std::ostream & o, const signprint_t & s)
    {
        if (s.n < 0) return o << "-" << -s.n;
        return o << s.n;
    }
};
template<typename T>
auto signprint(T v)
{
    return signprint_t<T>(v);
}

template<typename T>
T arithmetic_left_shift(T v,int amount)
{
    if(v<0)
    {
        if(amount<0)
            return -((-v)>>-amount);
        else
            return -((-v)<<amount);
    }
    else
    {
        if(amount<0)
            return v>>-amount;
        else
            return v<<amount;
    }
}

template<typename T>
T arithmetic_right_shift(T v,int amount)
{
    if(v<0)
    {
        if(amount<0)
            return -((-v)<<-amount);
        else
            return -((-v)>>amount);
    }
    else
    {
        if(amount<0)
            return v<<-amount;
        else
            return v>>amount;
    }
}

template<typename T>
using integer_working_storage_t = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
    (1 + (std::numeric_limits<T>::digits - 1) + (std::numeri c_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent)),
    (1 + (std::numeric_limits<T>::digits - 1) + (std::numeric_limits<T>::max_exponent - std::numeric_limits<T>::min_exponent)),
    boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void> >;

template<typename T>
integer_working_storage_t<T> to_integer_working_storage_t(T v){
    // std::cout <<std::hexfloat<<v <<'\n';

    using std::logb;
    using std::scalbn;
    int e = (v == 0) ? 0 : (int)(logb(v)), e_temp = e;
    T m = scalbn(v, -e); // [1, 2)
    // std::cout<<"move to [1, 2): " <<std::hexfloat<<m<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)" <<'\n';

    e -= (std::numeric_limits<T>::digits - 1);
    m = scalbn(m, (std::numeric_limits<T>::digits - 1)); // integer
    integer_working_storage_t<T> im(m);
    // std::cout<<"change mantissa to integer: " <<std::hexfloat<<m<<" ( = "<<std::hex<<std::showbase<<signprint(im)<<")"<<" ( * 2^"<<std::dec<<std::noshowbase<< e <<" = origin)"<<'\n';

    e += (std::numeric_limits<T>::min_exponent - 1);
    im=arithmetic_right_shift(im,(std::numeric_limits<T>::min_exponent - 1));
    // std::cout<<"change exponent to non-negative: " <<std::hex<<std::showbase<<signprint(im)<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)"<<'\n';

    e += -e_temp;
    im=arithmetic_right_shift(im,-e_temp);
    // std::cout<<"change exponent to (-min_exponent+mantissa): " <<std::hex<<std::showbase<<signprint(im)<<" ( * 2^"<<std::dec<<std::noshowbase<< e<<" = origin)"<<'\n';

    // std::cout<<'\n';
    return im;
}
template<typename T>
void test(std::initializer_list<T>list, std::initializer_list<T>targets)
{
    std::vector<integer_working_storage_t<T>>vec_;
    for(T v:list)
    {
        vec_.push_back(to_integer_working_storage_t(v));
        // std::cout<<vec_.back()<<'\n';
    }
    // std::cout<<'\n';
    for(T target:targets)
    {
        integer_working_storage_t<T>itarget=to_integer_working_storage_t(target);
        // std::cout<<itarget<<'\n';
        itarget=(itarget-vec_.front())%(vec_.back()-vec_.front());
        if(itarget<0)
            itarget+=(vec_.back()-vec_.front());
        itarget+=vec_.front();
        // std::cout<<itarget<<'\n';
        std::cout<< (std::upper_bound(vec_.begin(),vec_.end(),itarget)-vec_.begin()-1) <<'\n';
    }
    // std::cout<<'\n';
    std::cout<<'\n';
}

int main()
{
    test<float>({1,1e8,2e8,},{1e8,2e8,4e8,});
    test<float>({-2e8,-1e8,-1,},{-1e8,-2e8,-4e8,});
    test<double>({1,1e8,2e8,},{1e8,2e8,4e8,});
    test<double>({-2e8,-1e8,-1,},{-1e8,-2e8,-4e8,});
    test<boost::multiprecision::float128>({1,1e8,2e8,},{1e8,2e8,4e8,});
    test<boost::multiprecision::float128>({-2e8,-1e8,-1,},{-1e8,-2e8,-4e8,});
}
HDembinski commented 1 year ago

I do not think that you need to convert floats to ints in order to have exact arithmetic. Also in the floating point format, all arithmetic operations are exact if do not exhaust the bits in the mantissa (equivalent to integer overflow) and if you only divide by numbers that do not leave a residual.

jhcarl0814 commented 1 year ago

@HDembinski The problem occurs when if do not exhaust the bits in the mantissa is not satisfied. In the example above, 2e+8 - 1 and 4e+8 - 1 can not be stored in float exactly (the least significant bits are lost).

BOOST_TEST_EQ(a.index(4e8), 0); // test 'a.index(4e8) == 0' ('-1' == '0') failed

because x -= std::floor((4e+8 - 1) / (2e+8 - 1)) * (2e+8 - 1); is actually x -= std::floor(4e+8 / 2e+8) * 2e+8; make x become 0, 0's index in { 1., 1e+8, 2e+8, } is -1 (should be impossible to be returned by index).