spiraldb / vortex

A toolkit for working with compressed Arrow in-memory, on-disk, and over-the-wire
Apache License 2.0
92 stars 5 forks source link

Fix DictArray / ConstantArray canonicalization issues #415

Closed a10y closed 1 week ago

a10y commented 1 week ago

From a benchmark:

Benchmarking datafusion/vortex_pushdown: Warming up for 3.0000 sthread 'tokio-runtime-worker' panicked at vortex-array/src/canonical.rs:182:48:
called `Result::unwrap()` on an `Err` value: function slice not implemented for vortex.constant
Backtrace:
   0: std::backtrace::Backtrace::create
   1: vortex::Array::with_dyn::{{closure}}
   2: <vortex::array::constant::ConstantEncoding as vortex::encoding::ArrayEncoding>::with_dyn
   3: vortex::compute::slice::slice
   4: vortex::array::sparse::compute::slice::<impl vortex::compute::slice::SliceFn for vortex::array::sparse::SparseArray>::slice
   5: vortex::Array::with_dyn::{{closure}}
   6: <vortex::array::sparse::SparseEncoding as vortex::encoding::ArrayEncoding>::with_dyn
   7: vortex::compute::slice::slice
   8: vortex_fastlanes::bitpacking::compute::take::take_primitive
   9: vortex_fastlanes::bitpacking::compute::take::<impl vortex::compute::take::TakeFn for vortex_fastlanes::bitpacking::BitPackedArray>::take
  10: vortex::Array::with_dyn::{{closure}}
  11: <vortex_fastlanes::bitpacking::BitPackedEncoding as vortex::encoding::ArrayEncoding>::with_dyn
  12: vortex::compute::take::take
  13: <vortex_dict::dict::DictArray as vortex::canonical::IntoCanonical>::into_canonical
  14: <vortex_dict::dict::DictEncoding as vortex::encoding::ArrayEncoding>::canonicalize
  15: vortex_fastlanes::delta::compress::decompress
  16: <vortex_fastlanes::delta::DeltaEncoding as vortex::encoding::ArrayEncoding>::canonicalize
  17: vortex_fastlanes::for::compress::decompress
  18: <vortex_fastlanes::for::FoREncoding as vortex::encoding::ArrayEncoding>::canonicalize
  19: vortex_fastlanes::delta::compress::decompress
  20: <vortex_fastlanes::delta::DeltaEncoding as vortex::encoding::ArrayEncoding>::canonicalize
  21: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
  22: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
  23: vortex::canonical::struct_to_arrow
  24: vortex::canonical::Canonical::into_arrow
  25: <core::pin::Pin<P> as core::future::future::Future>::poll
  26: <vortex_datafusion::plans::RowIndicesStream<F> as futures_core::stream::Stream>::poll_next
  27: <vortex_datafusion::plans::TakeRowsStream<F> as futures_core::stream::Stream>::poll_next
  28: datafusion_physical_plan::repartition::RepartitionExec::pull_from_input::{{closure}}
  29: tokio::runtime::task::core::Core<T,S>::poll
  30: tokio::runtime::task::harness::Harness<T,S>::poll
  31: tokio::runtime::scheduler::multi_thread::worker::Context::run_task
  32: tokio::runtime::scheduler::multi_thread::worker::Context::run
  33: tokio::runtime::context::runtime::enter_runtime
  34: tokio::runtime::scheduler::multi_thread::worker::run
  35: <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll
  36: tokio::runtime::task::core::Core<T,S>::poll
  37: tokio::runtime::task::harness::Harness<T,S>::poll
  38: tokio::runtime::blocking::pool::Inner::run
  39: std::sys::backtrace::__rust_begin_short_backtrace
  40: core::ops::function::FnOnce::call_once{{vtable.shim}}
  41: std::sys::pal::unix::thread::Thread::new::thread_start
  42: __pthread_joiner_wake

stack backtrace:
   0:        0x1064f20d8 - <std::sys::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h26c0b4f719d222a3
   1:        0x10651405c - core::fmt::write::hc08b7fce83e4f7c4
   2:        0x1064ee938 - std::io::Write::write_fmt::hce61385361854552
   3:        0x1064f1f30 - std::sys::backtrace::print::h7d56298fc89b5007
   4:        0x1064f3104 - std::panicking::default_hook::{{closure}}::h76bd6f91e05c6798
   5:        0x1064f2dd0 - std::panicking::default_hook::h05b04dc464194274
   6:        0x1064f3aec - std::panicking::rust_panic_with_hook::h01b84a25b9a58015
   7:        0x1064f348c - std::panicking::begin_panic_handler::{{closure}}::h7f0944b94677e626
   8:        0x1064f255c - std::sys::backtrace::__rust_end_short_backtrace::h1cb71c303170678d
   9:        0x1064f3154 - _rust_begin_unwind
  10:        0x1065d454c - core::panicking::panic_fmt::h1d2c3af46b7587f9
  11:        0x1065d48dc - core::result::unwrap_failed::he65c476240e7d707
  12:        0x104aeb58c - <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold::h6089065f795c9c4b
  13:        0x104b073c8 - <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter::hd6c65e4ca284bbec
  14:        0x104b90308 - vortex::canonical::struct_to_arrow::h212c78ec45e45e94
  15:        0x104b8b808 - vortex::canonical::Canonical::into_arrow::h0d52dc73ef8ee7d2
  16:        0x104ab1558 - <core::pin::Pin<P> as core::future::future::Future>::poll::h04195c9a4dc7a567
  17:        0x104a9d5f0 - <vortex_datafusion::plans::RowIndicesStream<F> as futures_core::stream::Stream>::poll_next::hd1a151343f51c3fb
  18:        0x104a9e4fc - <vortex_datafusion::plans::TakeRowsStream<F> as futures_core::stream::Stream>::poll_next::h6c3479825233b4ec
  19:        0x1053a5a60 - datafusion_physical_plan::repartition::RepartitionExec::pull_from_input::{{closure}}::h6ca9dca67ce9fbcd
  20:        0x10539c028 - tokio::runtime::task::core::Core<T,S>::poll::ha19c976c9dd25e01
  21:        0x10539ec5c - tokio::runtime::task::harness::Harness<T,S>::poll::hb8529332c87b1612
  22:        0x106323720 - tokio::runtime::scheduler::multi_thread::worker::Context::run_task::h8db25098f588bb8c
  23:        0x106322750 - tokio::runtime::scheduler::multi_thread::worker::Context::run::h1b899e48bcbff692
  24:        0x10631cdbc - tokio::runtime::context::runtime::enter_runtime::heca5c00e0773d957
  25:        0x106322300 - tokio::runtime::scheduler::multi_thread::worker::run::h0e2e5501c8168490
  26:        0x106329f80 - <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll::h08a2b6dddb5b92d3
  27:        0x106328acc - tokio::runtime::task::core::Core<T,S>::poll::h8d81e79f68e8bc71
  28:        0x106315ac0 - tokio::runtime::task::harness::Harness<T,S>::poll::h8f210379e8d8a3bb
  29:        0x10632ef38 - tokio::runtime::blocking::pool::Inner::run::h6d24b9a4ef5b58a9
  30:        0x106315214 - std::sys::backtrace::__rust_begin_short_backtrace::hb48ebc00c86ca762
  31:        0x106327d74 - core::ops::function::FnOnce::call_once{{vtable.shim}}::h139cba40f3b0e7d6
  32:        0x1064f9008 - std::sys::pal::unix::thread::Thread::new::thread_start::h93a44ad39abec327
  33:        0x18e11af94 - __pthread_joiner_wake
thread 'main' panicked at bench-vortex/benches/datafusion_benchmark.rs:111:22:
called `Result::unwrap()` on an `Err` value: Context("Join Error", External(JoinError::Panic(Id(513555), ...)))
stack backtrace:
   0:        0x1064f20d8 - <std::sys::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h26c0b4f719d222a3
   1:        0x10651405c - core::fmt::write::hc08b7fce83e4f7c4
   2:        0x1064ee938 - std::io::Write::write_fmt::hce61385361854552
   3:        0x1064f1f30 - std::sys::backtrace::print::h7d56298fc89b5007
   4:        0x1064f3104 - std::panicking::default_hook::{{closure}}::h76bd6f91e05c6798
   5:        0x1064f2dd0 - std::panicking::default_hook::h05b04dc464194274
   6:        0x1064f3aec - std::panicking::rust_panic_with_hook::h01b84a25b9a58015
   7:        0x1064f348c - std::panicking::begin_panic_handler::{{closure}}::h7f0944b94677e626
   8:        0x1064f255c - std::sys::backtrace::__rust_end_short_backtrace::h1cb71c303170678d
   9:        0x1064f3154 - _rust_begin_unwind
  10:        0x1065d454c - core::panicking::panic_fmt::h1d2c3af46b7587f9
  11:        0x1065d48dc - core::result::unwrap_failed::he65c476240e7d707
  12:        0x104939abc - tokio::runtime::park::CachedParkThread::block_on::h7b0697e6c2f77795
  13:        0x104930ab0 - criterion::bencher::AsyncBencher<A,M>::iter::hd7fa583df2d15b6a
  14:        0x10494c418 - <criterion::routine::Function<M,F,T> as criterion::routine::Routine<M,T>>::warm_up::hf6d2827f58510126
  15:        0x10494f598 - criterion::routine::Routine::sample::h9e9bd857abb06216
  16:        0x104951124 - criterion::analysis::common::h2677114a327c73c0
  17:        0x1049190bc - criterion::benchmark_group::BenchmarkGroup<M>::bench_function::ha4b624a9276b01c0
  18:        0x10492688c - datafusion_benchmark::main::hb8333595c85f4351
  19:        0x10493c404 - std::sys::backtrace::__rust_begin_short_backtrace::h2c2ec9926186fbb4
  20:        0x10496e108 - std::rt::lang_start::{{closure}}::hba03469602c7fec5
  21:        0x1064e8c88 - std::rt::lang_start_internal::hcfebe28fa7b3d2c2
  22:        0x104926d40 - _main