apache / horaedb

Apache HoraeDB (incubating) is a high-performance, distributed, cloud native time-series database.
https://horaedb.apache.org
Apache License 2.0
2.62k stars 203 forks source link

Panic when reading null values of a not-null column defined schema #302

Closed ShiKaiWi closed 1 year ago

ShiKaiWi commented 2 years ago

Describe this problem

It seems there is no check for insert a null value of a not-null column now, and the datafusion may panic when processing such case. Here is the stacktrace:

2022-10-14 15:12:20.398 ERRO [common_util/src/panic.rs:42] thread 'ceres-read' panicked 'called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Column 'COUNT(DISTINCT SPM_2055016796_INFLUENCE_DEF
AULT.action)[count distinct]' is declared as non-nullable but contains null values")' at "/home/chunshao.rcs/.cargo/git/checkouts/arrow-datafusion-b9eb4f789f8bda1f/d84ea9c/datafusion/core/src/physical_plan/r
epartition.rs:178"
   0: common_util::panic::set_panic_hook::{{closure}}
             at /home/chunshao.rcs/github/CeresDB/ceresdb/common_util/src/panic.rs:41:18
   1: std::panicking::rust_panic_with_hook
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panicking.rs:702:17
   2: std::panicking::begin_panic_handler::{{closure}}
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panicking.rs:588:13
   3: std::sys_common::backtrace::__rust_end_short_backtrace
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/sys_common/backtrace.rs:138:18
   4: rust_begin_unwind
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panicking.rs:584:5
   5: core::panicking::panic_fmt
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/core/src/panicking.rs:142:14
   6: core::result::unwrap_failed
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/core/src/result.rs:1814:5
   7: core::result::Result<T,E>::unwrap
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/core/src/result.rs:1107:23
      datafusion::physical_plan::repartition::BatchPartitioner::partition
             at /home/chunshao.rcs/.cargo/git/checkouts/arrow-datafusion-b9eb4f789f8bda1f/d84ea9c/datafusion/core/src/physical_plan/repartition.rs:178:33
   8: datafusion::physical_plan::repartition::RepartitionExec::pull_from_input::{{closure}}
             at /home/chunshao.rcs/.cargo/git/checkouts/arrow-datafusion-b9eb4f789f8bda1f/d84ea9c/datafusion/core/src/physical_plan/repartition.rs:452:13
      <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/core/src/future/mod.rs:91:19
   9: tokio::runtime::task::core::CoreStage<T>::poll::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/core.rs:165:17
      tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/loom/std/unsafe_cell.rs:14:9
      tokio::runtime::task::core::CoreStage<T>::poll
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/core.rs:155:13
      tokio::runtime::task::harness::poll_future::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/harness.rs:480:19
      <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/core/src/panic/unwind_safe.rs:271:9
  10: std::panicking::try::do_call
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panicking.rs:492:40
      std::panicking::try
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panicking.rs:456:19
      std::panic::catch_unwind
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/panic.rs:137:14
      tokio::runtime::task::harness::poll_future
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/harness.rs:468:18
      tokio::runtime::task::harness::Harness<T,S>::poll_inner
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/harness.rs:104:27
      tokio::runtime::task::harness::Harness<T,S>::poll
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/harness.rs:57:15
        11: tokio::runtime::task::raw::RawTask::poll
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/raw.rs:84:18
      tokio::runtime::task::LocalNotified<S>::run
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/task/mod.rs:381:9
      tokio::runtime::thread_pool::worker::Context::run_task::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:458:21
      tokio::coop::with_budget::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/coop.rs:102:9
      std::thread::local::LocalKey<T>::try_with
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/thread/local.rs:445:16
      std::thread::local::LocalKey<T>::with
             at /rustc/d394408fb38c4de61f765a3ed5189d2731a1da91/library/std/src/thread/local.rs:421:9
  12: tokio::coop::with_budget
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:434:9
      tokio::coop::budget
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/coop.rs:72:5
      tokio::runtime::thread_pool::worker::Context::run_task
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:434:9
  13: tokio::runtime::thread_pool::worker::Context::run
  14: tokio::runtime::thread_pool::worker::run::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:386:17
      tokio::macros::scoped_tls::ScopedKey<T>::set
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/macros/scoped_tls.rs:61:9
  15: tokio::runtime::thread_pool::worker::run
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:383:5
  16: tokio::runtime::thread_pool::worker::Launch::launch::{{closure}}
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/thread_pool/worker.rs:362:45
      <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll
             at /home/chunshao.rcs/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.20.1/src/runtime/blocking/task.rs:42:21

The table schema is:

{"rows":[{"Table":"SPM_2055016796_INFLUENCE_DEFAULT","Create Table":"CREATE TABLE `SPM_2055016796_INFLUENCE_DEFAULT` (`period` timestamp NOT NULL, `tsid` uint64 NOT NULL, `TraceId` string TAG, `_result` string TAG, `groupbyIndex0` string TAG, `idc` string TAG, `ldc` string TAG, `server` string TAG, `action` string TAG, `pid` string TAG, PRIMARY KEY(period,tsid), TIMESTAMP KEY(period)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', num_rows_per_row_group='8192', segment_duration='2h', storage_format='COLUMNAR', ttl='110d', update_mode='APPEND', write_buffer_size='33554432')"}]}

Steps to reproduce

  1. Create a table with tag;
  2. Insert a row with a null value as the tag;
  3. Query the row;

Expected behavior

It should not panic.

Additional Information

At least two things we need to fix:

ShiKaiWi commented 2 years ago

Actually, I can't reproduce this error. After digging into the codebase, I find it the following points:

Maybe this problem is caused by altering schema?

MichaelLeeHZ commented 2 years ago

@ShiKaiWi I reproduce this error.

  1. Create table named 'demo' with tag 'name'
    curl --location --request POST 'http://127.0.0.1:5000/sql' \
    --header 'Content-Type: application/json' \
    -H 'x-ceresdb-access-tenant: test' \
    --data-raw '{
    "query": "CREATE TABLE `demo` (`name` string TAG NULL, `value` double NOT NULL, `t` timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='\''false'\'')"
    }'

    The table schema is :

CREATE TABLE `demo` (`t` timestamp NOT NULL, `tsid` uint64 NOT NULL, `name` string TAG, `value` double NOT NULL, PRIMARY KEY(t,tsid), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', num_rows_per_row_group='8192', segment_duration='', storage_format='COLUMNAR', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"
  1. insert a row, tag name is null
curl --location --request POST 'http://127.0.0.1:5000/sql' \
--header 'Content-Type: application/json' \
-H 'x-ceresdb-access-tenant: test' \
--data-raw '{
    "query": "INSERT INTO demo(t,  value) VALUES(1651737067000, 100)"
}'
  1. Query like following statement, with Group-By and Count(DISTINCT) operator.
curl --location --request POST 'http://127.0.0.1:5000/sql' \
--header 'Content-Type: application/json' \
--header 'x-ceresdb-access-tenant: test' \
--data-raw '{
    "query": "select `t`, count(distinct name) from demo group by `t`"
}'

Here is the stacktrace:

ERRO [common_util/src/panic.rs:42] thread 'ceres-bg' panicked 'called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Column 'COUNT(DISTINCT demo.name)[count distinct]' is declared as non-nullable but contains null values")' at "/Users/michael/.cargo/git/checkouts/arrow-datafusion-b9eb4f789f8bda1f/d84ea9c/datafusion/core/src/physical_plan/repartition.rs:178"
   0: backtrace::backtrace::libunwind::trace
             at /Users/michael/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.66/src/backtrace/mod.rs:66:5
      backtrace::backtrace::trace_unsynchronized
             at /Users/michael/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.66/src/backtrace/mod.rs:66:5
      backtrace::backtrace::trace
             at /Users/michael/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.66/src/backtrace/mod.rs:53:14
      backtrace::capture::Backtrace::create
             at /Users/michael/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.66/src/capture.rs:176:9
      backtrace::capture::Backtrace::new
MichaelLeeHZ commented 1 year ago

I submit a bug issue:https://github.com/apache/arrow-datafusion/issues/4040 for datafusion.

jiacai2050 commented 1 year ago

I try to reproduce this problem with this file: https://github.com/apache/arrow-datafusion/blob/97b3a4b37f54aaa52f8705db3e57b15ee98c24a7/datafusion-examples/examples/memtable.rs#L39

Changes:

1 file changed, 6 insertions(+), 8 deletions(-)
datafusion-examples/examples/memtable.rs | 14 ++++++--------

modified   datafusion-examples/examples/memtable.rs
@@ -36,14 +36,12 @@ async fn main() -> Result<()> {
     // Register the in-memory table containing the data
     ctx.register_table("users", Arc::new(mem_table))?;

-    let dataframe = ctx.sql("SELECT * FROM users;").await?;
+    let dataframe = ctx
+        .sql("SELECT id,count(distinct bank_account) From users group by id;")
+        .await?;

     timeout(Duration::from_secs(10), async move {
-        let result = dataframe.collect().await.unwrap();
-        let record_batch = result.get(0).unwrap();
-
-        assert_eq!(1, record_batch.column(0).len());
-        dbg!(record_batch.columns());
+        dataframe.show().await.unwrap();
     })
     .await
     .unwrap();
@@ -56,8 +54,8 @@ fn create_memtable() -> Result<MemTable> {
 }

 fn create_record_batch() -> Result<RecordBatch> {
-    let id_array = UInt8Array::from(vec![1]);
-    let account_array = UInt64Array::from(vec![9000]);
+    let id_array = UInt8Array::from(vec![1, 2]);
+    let account_array = UInt64Array::from(vec![None, Some(1)]);

     Ok(RecordBatch::try_new(
         get_schema(),

Then execute this demo with cargo run --example memtable, will output

+----+------------------------------------+
| id | COUNT(DISTINCT users.bank_account) |
+----+------------------------------------+
| 2  | 1                                  |
| 1  | 0                                  |
+----+------------------------------------+

It works without panic, maybe we need to narrow this problem down, to check if it's our usage issue or upstream issue.