velvia / compressed-vec

SIMD Floating point and integer compressed vector library
Apache License 2.0
78 stars 8 forks source link

NibblePackMedFixedSect not completing decoding to sink #4

Open fsolleza opened 3 years ago

fsolleza commented 3 years ago

The code below fails on the last assert: assert_eq!(sink.values, input);. It seems like the last 8 u64 values are not written to the sink.

Other inputs (e.g. the commented out input) seem to not exhibit this behavior. This might be triggered by some edge case.

Alternatively, maybe I'm not understanding how NibblePackMedFixedSect and Section256Sink should interact - but I'm pretty sure I do.

Thanks!

use compressed_vec::{
    section::{FixedSectReader, FixedSectionWriter, NibblePackMedFixedSect},
    sink::Section256Sink,
};

fn main() {
    let mut buf: [u8; 8196] = [0; 8196];

    //let input: &[u64] = &[
    //    10682, 262, 361, 338, 1620, 5231, 3668, 850, 1861, 5012, 3201, 1615, 178, 5102, 2697, 981,
    //    704, 1971, 4632, 2481, 1122, 7395, 4954, 480, 4919, 5734, 2776, 2890, 9841, 486, 2300,
    //    1213, 1811, 2988, 702, 272, 3402, 9653, 9262, 4785, 27, 3867, 11568, 5761, 1769, 4456,
    //    4903, 2640, 1310, 7593, 2628, 612, 1500, 11383, 13558, 5535, 1512, 3948, 267, 1004, 1081,
    //    7863, 5522, 9683, 2108, 8214, 6795, 718, 8660, 8385, 8558, 6113, 1869, 6180, 2740, 3675,
    //    624, 4414, 7107, 2213, 7064, 622, 4225, 1944, 2379, 3539, 4056, 5099, 1384, 3562, 2596,
    //    3529, 146, 4257, 1454, 4166, 7574, 6437, 8944, 871, 11499, 6378, 5295, 1148, 2687, 637,
    //    1098, 3182, 1632, 1143, 769, 636, 3661, 4218, 2053, 877, 3705, 10386, 4183, 5245, 2258,
    //    5056, 3455, 3477, 1601, 8634, 4061, 6162, 14001, 10630, 6878, 12633, 827, 9940, 1813, 4999,
    //    86, 3114, 1633, 8135, 7658, 6395, 7762, 1581, 537, 158, 1717, 985, 6889, 4274, 786, 6762,
    //    4948, 5343, 2726, 3503, 3593, 2913, 7946, 2151, 2832, 1775, 7711, 12284, 5265, 4605, 1277,
    //    1096, 3455, 8104, 4203, 8046, 6651, 5326, 9219, 5580, 3364, 3165, 358, 3138, 1309, 2485,
    //    15528, 7979, 5191, 447, 751, 2834, 3067, 2688, 2115, 5958, 7547, 3042, 5847, 9394, 7685,
    //    2810, 5621, 1296, 89, 1704, 4016, 587, 2675, 2291, 7394, 2753, 2147, 2203, 3212, 1844,
    //    2727, 3196, 3813, 3824, 2035, 4109, 9236, 6747, 6141, 6150, 3849, 6766, 5159, 3166, 3553,
    //    3330, 6426, 4739, 8799, 11030, 4191, 2352, 3695, 2581, 4502, 176, 86, 1685, 4734, 3428,
    //    8417, 102, 1506, 1397, 175, 2855, 5420, 7193, 13334, 4939, 3163, 3858, 4083, 5592,
    //];
    let input: &[u64] = &[
        13531, 7392, 4283, 6675, 7938, 941, 585, 2133, 2076, 5459, 392, 2292, 3250, 1825, 5508,
        6017, 5757, 672, 4624, 132, 402, 2981, 323, 4899, 8416, 1134, 4771, 9372, 3423, 6327, 1200,
        2324, 4299, 471, 1140, 5719, 3880, 10134, 4807, 1968, 4211, 4176, 6339, 1587, 9182, 144,
        12399, 5322, 9022, 5813, 6, 18032, 11663, 4331, 3, 3821, 4310, 4603, 5898, 1861, 2447, 452,
        2277, 5166, 470, 7767, 5067, 9082, 7012, 6223, 2557, 3980, 3638, 3006, 8407, 7006, 2607,
        6941, 4070, 1987, 9729, 10650, 6208, 11815, 4306, 3415, 6438, 1687, 3253, 2682, 645, 7900,
        10613, 27, 1131, 3078, 10048, 12075, 1502, 4404, 6598, 9191, 1057, 1125, 2442, 6261, 1697,
        9730, 6559, 5416, 2481, 2937, 6102, 1334, 5035, 4466, 3435, 3878, 797, 4110, 10547, 8950,
        1613, 241, 3913, 1563, 2719, 11268, 6433, 1495, 4726, 2072, 6455, 1651, 11, 6286, 2349,
        769, 6002, 8415, 6550, 877, 1845, 3706, 7287, 3437, 1924, 5488, 3732, 7175, 1100, 4858,
        3565, 1538, 5769, 1292, 3899, 3898, 5354, 33, 4835, 8852, 7507, 94, 3992, 3822, 15755,
        7538, 4285, 7928, 1154, 6991, 3513, 6162, 6323, 3522, 2783, 4858, 449, 2140, 1891, 2375,
        5864, 1120, 7935, 4548, 4851, 2100, 2345, 6842, 3508, 5753, 5440, 7403, 7540, 7407, 2290,
        3079, 6388, 3555, 1908, 3850, 477, 8699, 2314, 8643, 8138, 1117, 6544, 8067, 8328, 201,
        2523, 3797, 1754, 839, 6116, 1177, 2035, 103, 1950, 2941, 4466, 4799, 6622, 9989, 411,
        5080, 15, 3622, 6915, 1418, 2923, 5922, 190, 1603, 29, 2828, 4951, 875, 2031, 7626, 5507,
        3030, 2261, 4734, 2647, 2799, 529, 3186, 6761, 68244, 65351, 2124, 5974, 2569,
    ];

    // NibblePackMedFixedSect accepts slices of len 256
    assert_eq!(input.len(), 256);

    // Encode the 256 slice...
    let size = NibblePackMedFixedSect::gen_stats_and_write(&mut buf[..], 0, &input).unwrap();

    // Then decode it to a 256 sink, expecting the decoded = input
    let mut sink = Section256Sink::<u64>::new();
    let section = NibblePackMedFixedSect::<u64>::try_from(&buf[..size]).unwrap();
    section.decode_to_sink(&mut sink).unwrap();
    assert_eq!(sink.values, input);
}
fsolleza commented 3 years ago

In case it might be helpful, per #2, the code below seems to be failing at a very specific location:

    let data: Vec<u64> = (0..100000).collect();
    let mut appender = VectorU64Appender::try_new(data.len()).unwrap();
    let result = appender.encode_all(data.clone()).unwrap();
    let reader = VectorReader::try_new(&result[..]).unwrap();
    let result: Vec<u64> = reader.iterate().collect();
    let start = 48_380;
    let end = 48_390;
    assert_eq!(&data[start..end], &result[start..end]);

Error is:

  left: `[48380, 48381, 48382, 48383, 48384, 48385, 48386, 48387, 48388, 48389]`,
 right: `[48380, 48381, 48382, 48383, 48128, 48129, 48130, 48131, 48132, 48133]`', src/main.rs:67:5