wrong max & min exponents

joseluis commented 1 year ago

There seems to be a displacement between the accepted min max exponents in the spec and the ones implemented.

I've made an example to showcase it and compare it with dec:

use {core::str::FromStr, decstr::*};

fn main() {
    /* 32-bit */

    // this is the epsilon indicated in:
    // https://en.wikipedia.org/wiki/Machine_epsilon#Values_for_standard_hardware_arithmetics
    let str_10en6 = "0.00001"; // 10e-6
    // this is the maximum accepted by the type as a string of digits:
    let str_10en7 = "0.000001"; // 10e-7
    print_decimal![Bitstring32::from_str(str_10en6).unwrap(), as_le_bytes];
    print_decimal![Bitstring32::from_str(str_10en7).unwrap(), as_le_bytes];

    // one more zero overflows:
    // let str_10en8 = "0.0000001"; // 10e-8
    // print_decimal![Bitstring32::from_str("0.0000001").unwrap(), as_le_bytes];

    // converting back to string returns the same number OK
    // const PI32: &str = "3.14159"; // 6 digits
    const PI32: &str = "3.141592"; // 7 digits
    assert_eq![
        Bitstring32::from_str(PI32).unwrap().to_string(),
        String::from(PI32)
    ];

    // Using scientific notation accepts much more.
    // The limit should be between -95 +96 exp range according to the spec,
    // FIXME: but it seems displaced -6, +6 integral units.
    println!("Bitstring32 boundary exponents:");
    print_decimal![Bitstring32::from_str("10e-101").unwrap(), as_le_bytes]; // min exp accepted
    print_decimal![Bitstring32::from_str("10e90").unwrap(), as_le_bytes]; // max exp accepted
    println!("dec:Decimal32 same exponents:");
    print_decimal![dec::Decimal32::from_str("10e-101").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal32::from_str("10e90").unwrap(), to_le_bytes]; //
    println!("dec:Decimal32 correct exponents:");
    print_decimal![dec::Decimal32::from_str("10e-95").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal32::from_str("10e96").unwrap(), to_le_bytes]; //
    println!();

    /* 64-bit */

    // epsilon from wikipedia:
    let str_10en15 = "0.00000000000001"; // 10e-15
    // this is the maximum accepted by the type as a string of digits:
    let str_10en16 = "0.000000000000001"; // 10e-16
    print_decimal![Bitstring64::from_str(str_10en15).unwrap(), as_le_bytes];
    print_decimal![Bitstring64::from_str(str_10en16).unwrap(), as_le_bytes];

    // one more zero overflows:
    // let str_10en17 = "0.0000000000000a01"; // 10e-17
    // print_decimal![Bitstring64::from_str(str_10en17).unwrap(), as_le_bytes];

    // converting back to string returns the same number OK
    // const PI64: &str = "3.14159265358979"; // 15 digits
    const PI64: &str = "3.141592653589793"; // 16 digits
    assert_eq![
        Bitstring64::from_str(PI64).unwrap().to_string(),
        String::from(PI64)
    ];

    // Using scientific notation accepts much more.
    // The limit should be between -383 +384 exp range according to the spec,
    // FIXME: but it seems displaced -15, +15 integral units.
    println!("Bitstring64 boundary exponents:");
    print_decimal![Bitstring64::from_str("10e-398").unwrap(), as_le_bytes]; // min exp accepted
    print_decimal![Bitstring64::from_str("10e369").unwrap(), as_le_bytes]; // max exp accepted
    println!("dec:Decimal64 same exponents:");
    print_decimal![dec::Decimal64::from_str("10e-398").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal64::from_str("10e369").unwrap(), to_le_bytes]; //
    println!("dec:Decimal64 correct exponents:");
    print_decimal![dec::Decimal64::from_str("10e-383").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal64::from_str("10e384").unwrap(), to_le_bytes]; //
    println!();

    /* 128-bit */

    // epsilon from wikipedia:
    let str_10en33 = "0.00000000000000000000000000000001"; // 10e-33
    // this is the maximum accepted by the type as a string of digits:
    let str_10en34 = "0.000000000000000000000000000000001"; // 10e-34
    print_decimal![Bitstring128::from_str(str_10en33).unwrap(), as_le_bytes];
    print_decimal![Bitstring128::from_str(str_10en34).unwrap(), as_le_bytes];

    // one more zero overflows:
    // let str_10en35 = "0.0000000000000000000000000000000001"; // 10e-34
    // print_decimal![Bitstring128::from_str(str_10en17).unwrap(), as_le_bytes];

    // converting back to string returns the same number OK
    // const PI128: &str = "3.14159265358979323846264338327950"; // 33 digits
    const PI128: &str = "3.141592653589793238462643383279502"; // 34 digits, 35 char
    assert_eq![
        Bitstring128::from_str(PI128).unwrap().to_string(),
        String::from(PI128)
    ];

    // Using scientific notation accepts much more.
    // The limit should be between -6144 +6145 exp range according to the spec,
    // FIXME: but it seems displaced -32, +34 integral units 
    println!("Bitstring128 boundary exponents:");
    print_decimal![Bitstring128::from_str("10e-6176").unwrap(), as_le_bytes]; // min exp accepted
    print_decimal![Bitstring128::from_str("10e6111").unwrap(), as_le_bytes]; // max exp accepted
    println!("dec:Decimal128 same exponents:");
    print_decimal![dec::Decimal128::from_str("10e-6176").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal128::from_str("10e6111").unwrap(), to_le_bytes]; //
    println!("dec:Decimal128 correct exponents:");
    print_decimal![dec::Decimal128::from_str("10e-6144").unwrap(), to_le_bytes]; //
    print_decimal![dec::Decimal128::from_str("10e6145").unwrap(), to_le_bytes]; //
    println!();
}

macro_rules! print_decimal {
    // $dec: the decimal number,
    // $tobytes: the fn to convert to bytes
    ($dec:expr, $tobytes:ident) => {
        let d = $dec;
        print!("{:?} ", d.$tobytes());
        println!(": {}", stringify![$dec]);

        // bits in original le order:
        // for b in d.$tobytes().iter() { print!["{b:08b} "]; }
        // bits in be order:
        // for b in d.$tobytes().iter().rev() { print!["{b:08b} "]; }
    };
}
pub(crate) use print_decimal;

its output:

[1, 0, 0, 34] : Bitstring32::from_str(str_10en6).unwrap()
[1, 0, 240, 33] : Bitstring32::from_str(str_10en7).unwrap()
Bitstring32 boundary exponents:
[16, 0, 0, 0] : Bitstring32::from_str("10e-101").unwrap()
[16, 0, 240, 67] : Bitstring32::from_str("10e90").unwrap()
dec:Decimal32 same exponents:
[16, 0, 0, 0] : dec::Decimal32::from_str("10e-101").unwrap()
[16, 0, 240, 67] : dec::Decimal32::from_str("10e90").unwrap()
dec:Decimal32 correct exponents:
[16, 0, 96, 0] : dec::Decimal32::from_str("10e-95").unwrap()
[0, 0, 0, 120] : dec::Decimal32::from_str("10e96").unwrap()

[1, 0, 0, 0, 0, 0, 0, 34] : Bitstring64::from_str(str_10en15).unwrap()
[1, 0, 0, 0, 0, 0, 252, 33] : Bitstring64::from_str(str_10en16).unwrap()
Bitstring64 boundary exponents:
[16, 0, 0, 0, 0, 0, 0, 0] : Bitstring64::from_str("10e-398").unwrap()
[16, 0, 0, 0, 0, 0, 252, 67] : Bitstring64::from_str("10e369").unwrap()
dec:Decimal64 same exponents:
[16, 0, 0, 0, 0, 0, 0, 0] : dec::Decimal64::from_str("10e-398").unwrap()
[16, 0, 0, 0, 0, 0, 252, 67] : dec::Decimal64::from_str("10e369").unwrap()
dec:Decimal64 correct exponents:
[16, 0, 0, 0, 0, 0, 60, 0] : dec::Decimal64::from_str("10e-383").unwrap()
[0, 0, 0, 0, 0, 0, 0, 120] : dec::Decimal64::from_str("10e384").unwrap()

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34] : Bitstring128::from_str(str_10en33).unwrap()
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 255, 33] : Bitstring128::from_str(str_10en34).unwrap()
Bitstring128 boundary exponents:
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : Bitstring128::from_str("10e-6176").unwrap()
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 255, 67] : Bitstring128::from_str("10e6111").unwrap()
dec:Decimal128 same exponents:
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : dec::Decimal128::from_str("10e-6176").unwrap()
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 255, 67] : dec::Decimal128::from_str("10e6111").unwrap()
dec:Decimal128 correct exponents:
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0] : dec::Decimal128::from_str("10e-6144").unwrap()
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120] : dec::Decimal128::from_str("10e6145").unwrap()

KodrAus commented 1 year ago

// FIXME: but it seems displaced -15, +15 integral units.

I think this is the difference between 0.000000000000000×10^−383 and 0000000000000000×10^−398, and 9.999999999999999×10^384 and 9999999999999999×10^369. Since the decimal encodes an integer, the actual range of the exponent encoded is reduced by the number of fractional digits. You'll find most functions in this library refer to the _integerexponent, which is this shifted value.

dec::Decimal64::from_str("10e-383").unwrap()

This example encodes fine as 1.0e-382, but this one:

dec::Decimal64::from_str("10e384").unwrap()

becomes infinity.

joseluis commented 1 year ago

I'm mostly puzzled about the differences with the dec library... our binary representation varies wildly from theirs. Doesn't that mean there's something that needs to be fixed or at least better understood / documented?

joseluis commented 1 year ago

Also, do you know what the constants MIN_10_EXP, MAX_10_EXP, MIN_EXP, MAX_EXP should be set to in our case then?

KodrAus commented 1 year ago

Hmm, yeh on little-endian architectures the binary representation of this library and dec should agree, unless dec is rounding, which this library doesn't do. I think dec encodes in arrays of 32bit integers instead of bytes, but as far as I know it doesn't actually specify any particular binary encoding.

Have you got any examples handy of where they don't agree? From the ones above the only differences I can see are where dec is rounding to infinity (which is the all-zeros except the last byte is 120 cases).

KodrAus commented 1 year ago

Also, do you know what the constants MIN_10_EXP, MAX_10_EXP, MIN_EXP, MAX_EXP should be set to in our case then?

The max exponent you can write in a string depends on the number of fractional digits, so I'm not sure we can actually define these as useful constants. The minimum and maximum values that can actually be encoded in a decimal of a given precision are:

MIN_EXP: 1 - emax - precision_digits + 1
MAX_EXP: emax - precision_digits + 1

But since the exponent is adjusted based on fractional digits, if you have a decimal point then the exponent you see can be larger, by up to precision_digits - 1.

Say we're encoding into a decimal64. The following values are equivalent:

10e369
1.0e370

but this will overflow:

10e370

I'm not much of an expert on this stuff either, so may be entirely mistaken here, but that's my understanding of it all 🙂

joseluis commented 1 year ago

Yes I think that's why MIN_10_EXP and MAX_10_EXP are defined as the minimum and maximum normalized exponents base 10, so currently for Bitstring64 they should be set to 10e-398, and 10e369, respectively.

I don't think the binary exponents make sense in the case of decimal numbers we can leave them out.

What I was mostly wondering is whether the library ideally should support the documented exponent range of 10e-383 (or 10e-382) & 10e384 (or close enough)... But if that's too complicated or too much work I can just create this constants set to the current limits. What do you think?

KodrAus commented 1 year ago

I think we should and do support the range -383 to 384 when encoding using scientific notation in the same way as dec, so I don't think there's anything we actually need to change in the source. Perhaps we set MIN_10_EXP and MAX_10_EXP to the normalized range, so -398 to +369, and in our library docs note that the exponent range changes when the number is fractional. Basically the same way Wikipedia describes it. I'd be happy to take a crack at writing that.

What do you think?

KodrAus commented 1 year ago

Ah, I just caught up on what MIN_10_EXP and MAX_10_EXP are 🙂 They're the base10 exponents, not the exponent range for N in 10eN.

I think we could simply ignore them?

joseluis commented 1 year ago

We could ignore them but aren't they equivalent to the ones for f32/f64? They can be useful to make sure while working with exponents they are within the supported range, or to clamp them...

I'm still learning about decimals and floating points so not I'm not feeling super confident about these topics, but for me and by default it makes most sense to mimic what the rust library is doing.

joseluis commented 1 year ago

It took me a while but I'm finally grokking how both complementary exponent ranges represent the same range of numbers, but using a different significand.

I think we should set them to -398 to +369 because these values are consistent with the behavior of the actual implementation of the "from_str" function, which is where they'd be used.

joseluis commented 1 year ago

Have you got any examples handy of where they don't agree? From the ones above the only differences I can see are where dec is rounding to infinity (which is the all-zeros except the last byte is 120 cases).

You are right, I've made more checks and they agree except when rounding to infinity, so there's no issue.

KodrAus / decstr

wrong max & min exponents #15