apache / doris

Apache Doris is an easy-to-use, high performance and unified analytics database.
https://doris.apache.org
Apache License 2.0
11.86k stars 3.13k forks source link

[Bug] Stream Load导入json文件失败 #31938

Open yongjinhou opened 4 months ago

yongjinhou commented 4 months ago

Search before asking

Version

2.0.4&2.0.5

What's Wrong?

  1. 建库建表 CREATE TABLE test_stream_load_json_test_json_all_param_db.test_stream_load_json_test_json_all_param_tb ( start_version INT NOT NULL, end_version INT NOT NULL, version_hash LARGEINT NOT NULL, create_time DATETIME NOT NULL, segment_group VARCHAR(100) NOT NULL, num_size INT NOT NULL ) PARTITION BY RANGE(start_version) ( PARTITION p1 VALUES LESS THAN ("1"), PARTITION p2 VALUES LESS THAN ("3"), PARTITION p3 VALUES LESS THAN ("5") ) DISTRIBUTED BY Hash(start_version) BUCKETS 5;
  2. Stream Load导入数据 curl --location-trusted -u root:xxx -H "Content-length:11449" -H "format:json" -H "Transfer-Encoding:" -H "strip_outer_array:true" -H "jsonpaths:["$.start_version", "$.end_version", "$.version_hash", "$.creation_time", "$.segment_group[0].column_pruning[0]", "$.segment_group[0].num_rows"]" -H "json_root:$.delta" -H "Expect:100-continue" -H "timeout:300" -H "timezone:+00:00" -H "partitions:p1,p2" -H "where:version_hash=6029593056193292005" -H "columns:start_version, end_version, version_hash, create_timestamp, segment_group, num_size, create_time=from_unixtime(create_timestamp)" -H "max_filter_ratio:0.5" -T /home/work/xxx/object.json http://ip:port/api/test_stream_load_json_test_json_all_param_db/test_stream_load_json_test_json_all_param_tb/_stream_load
  3. 数据导入失败,报错信息如下 \n "TxnId": 23580,\n "Label": "f2370553-9451-4abc-96a8-b3fb8337cb6e",\n "Comment": "",\n "TwoPhaseCommit": "false",\n "Status": "Fail",\n "Message": "[INTERNAL_ERROR]too many filtered rows\n\n\t0# std::_Function_handler<void (doris::RuntimeState, doris::Status), doris::StreamLoadExecutor::execute_plan_fragment(std::shared_ptr)::$_0>::_M_invoke(std::_Any_data const&, doris::RuntimeState&&, doris::Status&&) at /home/disk6/palo/release/baidu/bdg/doris/PALO-2023-08/be/src/common/status.h:354\n\t1# doris::FragmentMgr::_exec_actual(std::shared_ptr, std::function<void (doris::RuntimeState, doris::Status)> const&) at /home/disk6/palo/release/baidu/bdg/doris/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:360\n\t2# std::_Function_handler<void (), doris::FragmentMgr::exec_plan_fragment(doris::TExecPlanFragmentParams const&, std::function<void (doris::RuntimeState, doris::Status)> const&)::$_0>::_M_invoke(std::_Any_data const&) at /home/disk6/palo/release/baidu/bdg/doris/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:701\n\t3# doris::ThreadPool::dispatch_thread() at /home/disk6/palo/release/baidu/bdg/doris/PALO-2023-08/be/src/util/threadpool.cpp:0\n\t4# doris::Thread::supervise_thread(void*) at /home/disk6/palo/release/baidu/bdg/doris/ldb_toolchain/bin/../usr/include/pthread.h:562\n\t5# start_thread\n\t6# __clone\n",\n "NumberTotalRows": 4,\n "NumberLoadedRows": 0,\n "NumberFilteredRows": 4,\n "NumberUnselectedRows": 0,\n "LoadBytes": 11449,\n "LoadTimeMs": 29,\n "BeginTxnTimeMs": 0,\n "StreamLoadPutTimeMs": 2,\n "ReadDataTimeMs": 0,\n "WriteDataTimeMs": 24,\n "CommitAndPublishTimeMs": 0,\n "ErrorURL": "http://ip:port/api/_load_error_log?file=__shard_132/error_log_insert_stmt_82430ab4b1b0b81e-faf2ff8b3eaa4ea5_82430ab4b1b0b81e_faf2ff8b3eaa4ea5"\n
  4. json文件5.
{
    "num_rows_per_data_block": 1024,
    "cumulative_layer_point": 2,
    "num_short_key_fields": 5,
    "column": [
        {
            "name": "k1",
            "type": "INT",
            "aggregation": "NONE",
            "length": 4,
            "is_key": true,
            "index_length": 4,
            "is_allow_null": true,
            "unique_id": 0,
            "is_root_column": true
        },
        {
            "name": "k2",
            "type": "SMALLINT",
            "aggregation": "NONE",
            "length": 2,
            "is_key": true,
            "index_length": 2,
            "is_allow_null": true,
            "unique_id": 1,
            "is_root_column": true
        },
        {
            "name": "k9",
            "type": "FLOAT",
            "aggregation": "SUM",
            "length": 4,
            "is_key": false,
            "index_length": 4,
            "is_allow_null": true,
            "unique_id": 10,
            "is_root_column": true
        }
    ],
    "creation_time": 1553152125,
    "selectivity": [
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1
    ],
    "data_file_type": "COLUMN_ORIENTED_FILE",
    "next_column_unique_id": 11,
    "compress_kind": "COMPRESS_LZ4",
    "segment_size": 268435456,
    "keys_type": "AGG_KEYS",
    "delta": [
        {
            "start_version": 0,
            "end_version": 1,
            "version_hash": 0,
            "creation_time": 1553152125,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 0,
                    "index_size": 0,
                    "data_size": 0,
                    "num_rows": 0,
                    "empty": true
                }
            ]
        },
        {
            "start_version": 2,
            "end_version": 2,
            "version_hash": 6029593056193292005,
            "creation_time": 1553152255,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        },
        {
            "start_version": 3,
            "end_version": 3,
            "version_hash": 7368336314652758588,
            "creation_time": 1553152260,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        },
        {
            "start_version": 4,
            "end_version": 4,
            "version_hash": 9172793704282665912,
            "creation_time": 1553152268,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "MA==",
                            "max": "MA==",
                            "null_flag": true
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        }
    ],
    "pending_delta": [
        {
            "partition_id": 15005,
            "transaction_id": 10007,
            "creation_time": 1553152325,
            "pending_segment_group": [
                {
                    "pending_segment_group_id": 0,
                    "num_segments": 1,
                    "load_id": {
                        "hi": -6248051641982818523,
                        "lo": -4026637950854708082
                    },
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        },
        {
            "partition_id": 15005,
            "transaction_id": 10008,
            "creation_time": 1553152332,
            "pending_segment_group": [
                {
                    "pending_segment_group_id": 0,
                    "num_segments": 1,
                    "load_id": {
                        "hi": 8955644356935812351,
                        "lo": 5235253922991912895
                    },
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        }
    ],
    "incremental_delta": [
        {
            "start_version": 2,
            "end_version": 2,
            "version_hash": 6029593056193292005,
            "creation_time": 1553152255,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        },
        {
            "start_version": 3,
            "end_version": 3,
            "version_hash": 7368336314652758588,
            "creation_time": 1553152260,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        },
        {
            "start_version": 4,
            "end_version": 4,
            "version_hash": 9172793704282665912,
            "creation_time": 1553152268,
            "segment_group": [
                {
                    "segment_group_id": 0,
                    "num_segments": 1,
                    "index_size": 229,
                    "data_size": 4186,
                    "num_rows": 3315,
                    "column_pruning": [
                        {
                            "min": "LTEyOA==",
                            "max": "MTI2",
                            "null_flag": false
                        },
                        {
                            "min": "MTk4OQ==",
                            "max": "MjAxMg==",
                            "null_flag": false
                        },
                        {
                            "max": "MTIzLjEyMzAwMDAwMA==",
                            "null_flag": false
                        },
                        {
                            "min": "d2FuZ2ppbmcwNA==",
                            "max": "d3VsaW4wNA==",
                            "null_flag": false
                        }
                    ],
                    "empty": false
                }
            ]
        }
    ],
    "tablet_id": 15007,
    "schema_hash": 368169781,
    "shard": 0
}

What You Expected?

期望成功导入

How to Reproduce?

No response

Anything Else?

No response

Are you willing to submit PR?

Code of Conduct

liruixl commented 3 months ago

代码格式有点乱😂

xy720 commented 3 months ago

I will take it.