Closed danburkert closed 5 years ago
Thanks for the great bug report! i was able to reproduce rapidly.
There is a similar failure for non-sorted blocks when the block contains a small value:
#[test]
fn test_bit_width_32() {
use bitpacking::BitPacker4x;
let mut values = vec![i32::max_value() as u32 + 1; BitPacker4x::BLOCK_LEN];
values[0] = 0;
let bit_packer = BitPacker4x::new();
let bit_width = bit_packer.num_bits(&values);
assert_eq!(bit_width, 32);
let mut block = vec![0u8; BitPacker4x::compressed_block_size(bit_width)];
bit_packer.compress(&values, &mut block, bit_width);
let mut decoded_values = vec![0x10101010; BitPacker4x::BLOCK_LEN];
bit_packer.decompress(&block, &mut decoded_values, bit_width);
assert_eq!(values, decoded_values);
}
fails with:
thread 'page::bitpacked::tests::test_bit_width_32' panicked at 'assertion failed: `(left == right)`
left: `[0, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648]`,
right: `[2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648, 2147483648]`', data/src/page/bitpacked.rs:308:9
Pure curiosity, how did you discover that?
Proptest
Sweet! If you have time could you contribute your code?
The tests that found these are over a bigger data structure which uses bitpacking as a building block, but it's not too difficult to create block specific tests:
proptest! {
#[test]
fn check_block(
values in prop::collection::vec(u32::arbitrary(), BitPacker4x::BLOCK_LEN),
) {
let bit_packer = BitPacker4x::new();
let bit_width = bit_packer.num_bits(&*values);
let mut block = vec![0u8; BitPacker4x::compressed_block_size(bit_width)];
bit_packer.compress(&values, &mut block, bit_width);
let mut decoded_values = vec![0x10101010; BitPacker4x::BLOCK_LEN];
bit_packer.decompress(&block, &mut decoded_values, bit_width);
prop_assert_eq!(values, decoded_values);
}
#[test]
fn check_sorted_block(
(mut values, init_value) in prop::collection::vec(u32::arbitrary(), BitPacker4x::BLOCK_LEN).prop_flat_map(|mut values| {
values.sort();
let min_value = values[0];
(Just(values), (0..=min_value))
}),
) {
let bit_packer = BitPacker4x::new();
let bit_width = bit_packer.num_bits_sorted(init_value, &*values);
let mut block = vec![0u8; BitPacker4x::compressed_block_size(bit_width)];
bit_packer.compress_sorted(init_value, &values, &mut block, bit_width);
let mut decoded_values = vec![0x10101010; BitPacker4x::BLOCK_LEN];
bit_packer.decompress_sorted(init_value, &block, &mut decoded_values, bit_width);
prop_assert_eq!(values, decoded_values);
}
}
The non-sorted check repros the second issue; I haven't gotten the sorted check to repro the original issue, I'm guessing because the sort space is too large.
(feel free to use/adapt that for bitpacking
)
Thanks a lot!
Fixed in 0.8.1. Yanked 0.8.
This bug does not affect tantivy as DocId
are limited to 31 bits.
Thanks for the quick turnaround!
Easiest way to explain this is through a repro:
fails with: