Open progval opened 4 weeks ago
repro code (add it to tests/basic/main.rs):
tests/basic/main.rs
fn integration_path(path: &str) -> String { let dir = env!("CARGO_MANIFEST_DIR"); format!("{}/tests/integration/data/{}", dir, path) } #[test] pub fn decimal128_timestamps_1900_test() { let path = integration_path("TestOrcFile.testDate1900.orc"); let f = File::open(path).expect("no file found"); let mut reader = ArrowReaderBuilder::try_new(f) .unwrap() .with_batch_size(10) // it's a big file, we don't want to test more than that .with_schema(Arc::new(Schema::new(vec![ Field::new( "time", DataType::Decimal128(38, 9), true, ), Field::new( "date", DataType::Date32, true, ), ]))) .build(); let batch = reader.next().unwrap().unwrap(); println!("{:?}", batch.column(0)); let expected = [ "+-----------------------+------------+", "| time | date |", "+-----------------------+------------+", "| -2198229903.900000000 | 1900-12-25 |", "| -2198229903.899900000 | 1900-12-25 |", "| -2198229903.899800000 | 1900-12-25 |", "| -2198229903.899700000 | 1900-12-25 |", "| -2198229903.899600000 | 1900-12-25 |", "| -2198229903.899500000 | 1900-12-25 |", "| -2198229903.899400000 | 1900-12-25 |", "| -2198229903.899300000 | 1900-12-25 |", "| -2198229903.899200000 | 1900-12-25 |", "| -2198229903.899100000 | 1900-12-25 |", "+-----------------------+------------+", ]; assert_batches_eq(&[batch], &expected); }
errors with:
expected: [ "+-----------------------+------------+", "| time | date |", "+-----------------------+------------+", "| -2198229903.900000000 | 1900-12-25 |", "| -2198229903.899900000 | 1900-12-25 |", "| -2198229903.899800000 | 1900-12-25 |", "| -2198229903.899700000 | 1900-12-25 |", "| -2198229903.899600000 | 1900-12-25 |", "| -2198229903.899500000 | 1900-12-25 |", "| -2198229903.899400000 | 1900-12-25 |", "| -2198229903.899300000 | 1900-12-25 |", "| -2198229903.899200000 | 1900-12-25 |", "| -2198229903.899100000 | 1900-12-25 |", "+-----------------------+------------+", ] actual: [ "+-----------------------+------------+", "| time | date |", "+-----------------------+------------+", "| 107616705.213693951 | 1900-12-25 |", "| -2198229899.000000000 | 1900-12-25 |", "| -2198229902.999499000 | 1900-12-25 |", "| -2198229898.990000000 | 1900-12-25 |", "| -2198229902.999498000 | 1900-12-25 |", "| -2198229898.980000000 | 1900-12-25 |", "| -2198229902.999497000 | 1900-12-25 |", "| -2198229898.970000000 | 1900-12-25 |", "| -2198229902.999496000 | 1900-12-25 |", "| -2198229898.960000000 | 1900-12-25 |", "+-----------------------+------------+", ]
by replacing .with_batch_size(10) with .with_batch_size(20), we even get an overflow crash:
.with_batch_size(10)
.with_batch_size(20)
[src/reader/decode/timestamp.rs:67:5] &base = 1420099200 [src/reader/decode/timestamp.rs:67:5] &seconds_since_orc_base = Ok( -3618300303, ) [src/reader/decode/timestamp.rs:67:5] &nanoseconds = Ok( -407, ) thread 'decimal128_timestamps_1900_test' panicked at src/reader/decode/timestamp.rs:76:9: attempt to multiply with overflow
Both are caused by https://github.com/datafusion-contrib/datafusion-orc/commit/9dd640c8e93644e73fa39c78ea88e92b4166f2e5
cc @Jefffrey
Should this issue be moved to orc-rust?
Yes
repro code (add it to
tests/basic/main.rs
):errors with:
by replacing
.with_batch_size(10)
with.with_batch_size(20)
, we even get an overflow crash:Both are caused by https://github.com/datafusion-contrib/datafusion-orc/commit/9dd640c8e93644e73fa39c78ea88e92b4166f2e5
cc @Jefffrey