StarRocks / starrocks

StarRocks, a Linux Foundation project, is a next-generation sub-second MPP OLAP database for full analytics scenarios, including multi-dimensional analytics, real-time analytics, and ad-hoc queries.
https://starrocks.io
Apache License 2.0
8.3k stars 1.68k forks source link

从视图查询数据参入空表超时 #47890

Open passiolin opened 3 days ago

passiolin commented 3 days ago

Steps to reproduce the behavior (Required)

  1. create table

    CREATE TABLE `bi_app_af109_device_d` (
    `device_sn` varchar(65533) NULL COMMENT "",
    `max_temp` float NULL COMMENT "",
    `min_temp` float NULL COMMENT "",
    `temp_gap` float NULL COMMENT "",
    `latest_ts` datetime NULL COMMENT "",
    `eariliest_ts` datetime NULL COMMENT "",
    `ts_gap` bigint(20) NULL COMMENT "",
    `device_status` varchar(65533) NULL COMMENT "",
    `good_count` bigint(20) NULL COMMENT "",
    `not_good_count` bigint(20) NULL COMMENT "",
    `rundate` date NULL COMMENT "",
    `historical_max_temp` float NULL COMMENT "",
    `historical_min_temp` float NULL COMMENT "",
    `device_active_date` datetime NULL COMMENT ""
    ) ENGINE=OLAP 
    DUPLICATE KEY(`device_sn`)
    DISTRIBUTED BY RANDOM
    PROPERTIES (
    "replication_num" = "3",
    "in_memory" = "false",
    "enable_persistent_index" = "false",
    "replicated_storage" = "true",
    "compression" = "LZ4"
    );
  2. create view

    CREATE OR REPLACE
    VIEW `final_status` (`device_sn`,
    `max_temp`,
    `min_temp`,
    `temp_gap`,
    `latest_ts`,
    `eariliest_ts`,
    `ts_gap`,
    `not_good_count`,
    `good_count`,
    `rundate`,
    `historical_max_temp`,
    `historical_min_temp`,
    `device_active_date`,
    `device_status`) AS
    SELECT
    `tb_app_log`.`a`.`device_sn`,
    `tb_app_log`.`a`.`max_temp`,
    `tb_app_log`.`a`.`min_temp`,
    `tb_app_log`.`a`.`max_temp` - `tb_app_log`.`a`.`min_temp` AS `temp_gap`,
    `tb_app_log`.`a`.`latest_ts`,
    `tb_app_log`.`a`.`eariliest_ts`,
    `tb_app_log`.`a`.`ts_gap`,
    count(CASE WHEN ((`tb_app_log`.`a`.`max_temp` >= 60) OR (`tb_app_log`.`a`.`min_temp` <= 8)) THEN 1 ELSE NULL END) OVER (PARTITION BY `tb_app_log`.`a`.`device_sn` ) AS `not_good_count`,
    count(CASE WHEN ((`tb_app_log`.`a`.`max_temp` < 60) AND (`tb_app_log`.`a`.`min_temp` > 8)) THEN 1 ELSE NULL END) OVER (PARTITION BY `tb_app_log`.`a`.`device_sn` ) AS `good_count`,
    current_date() AS `rundate`,
    `tb_app_log`.`b`.`historical_max_temp`,
    `tb_app_log`.`b`.`historical_min_temp`,
    `tb_app_log`.`b`.`device_active_date`,
    CASE
        WHEN ((`tb_app_log`.`a`.`max_temp` >= 60)
        OR (`tb_app_log`.`a`.`min_temp` <= 8)) THEN 'Not Good'
        WHEN (((`tb_app_log`.`a`.`min_temp` > 16.6)
        AND ((`tb_app_log`.`a`.`max_temp` - `tb_app_log`.`a`.`min_temp`) < 6))
        AND (`tb_app_log`.`a`.`ts_gap` >= 4)) THEN 'Not Good'
        ELSE 'Normal'
    END AS `device_status`
    FROM
    `tb_app_log`.`aggregated_data` AS `a`
    LEFT OUTER JOIN `tb_app_log`.`historical_data` AS `b` ON
    `tb_app_log`.`a`.`device_sn` = `tb_app_log`.`b`.`device_sn`;
  3. view data

    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF050130223148EYP', 13.06, 11.54, 1.5200000000000014, '2024-07-05 03:04:46', '2024-06-29 03:06:09', 143, 0, 1, '2024-07-05', 19.670000076293945, 11.54, '2024-06-02 22:08:40', 'Normal');
    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF0501304131480F0', 17.38, 11.7, 5.68, '2024-07-05 03:06:01', '2024-06-29 03:07:28', 143, 0, 1, '2024-07-05', 17.829999923706055, 11.569999694824219, '2024-05-28 00:49:27', 'Normal');
    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF05013044414919E', 26.14, 11.98, 14.16, '2024-07-05 03:06:13', '2024-07-02 18:56:17', 56, 0, 1, '2024-07-05', 26.14, 11.98, '2024-07-02 18:56:17', 'Normal');
    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF0501304575148ZY', 14.86, 11.76, 3.0999999999999996, '2024-07-05 03:05:52', '2024-06-29 03:07:09', 143, 0, 1, '2024-07-05', 19.65, 11.57, '2024-06-09 20:12:50', 'Normal');
    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF0501304B40142HB', 19.670000076293945, 11.680000305175781, 7.989999771118164, '2024-05-13 04:29:28', '2024-05-10 03:14:42', 73, 0, 1, '2024-07-05', 19.670000076293945, -10.369999885559082, '2024-03-15 11:32:15', 'Normal');
    INSERT INTO tb_app_log.final_status
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, not_good_count, good_count, rundate, historical_max_temp, historical_min_temp, device_active_date, device_status)
    VALUES('AF050130501426A3N', 18.440000534057617, 11.869999885559082, 6.570000648498535, '2024-05-26 23:50:25', '2024-05-20 23:51:02', 143, 0, 1, '2024-07-05', 23.270000457763672, 11.869999885559082, '2024-05-17 21:21:56', 'Normal');    
  4. insert select

    INSERT INTO bi_app_prod.bi_app_af109_device_d
    (device_sn, max_temp, min_temp, temp_gap, latest_ts, eariliest_ts, ts_gap, good_count, not_good_count, rundate,
    historical_max_temp, historical_min_temp, device_active_date, device_status)
    SELECT device_sn,
       max_temp,
       min_temp,
       temp_gap,
       latest_ts,
       eariliest_ts,
       ts_gap,
       good_count,
       not_good_count,
       rundate,
       historical_max_temp,
       historical_min_temp,
       device_active_date,
       device_status
    FROM tb_app_log.final_status;

Expected behavior (Required)

bi_app_prod.bi_app_af109_device_d是一张空表,之前已经删除了所有的数据。 tb_app_log.final_status,数据只有几千行。

步骤4可以正常插入。

Real behavior (Required)

7000多行的视图表,执行上述步骤4的 sql时,CPU和网络IO出现峰值,并且执行超时

图片

StarRocks version (Required)

StarRocks Version:3.2.1-79ee91d OS:Ubuntu20.04 Cluster:3BE(16C+64G+2T)+1FE(4C+16G)

passiolin commented 3 days ago

测试了别的insert into select,也会出现这种问题。

passiolin commented 3 days ago

使用pstack无法导出be节点的栈,应是服务器环境可能有点问题

root@us-prod-sr-be-2:~# pstack 3656408
3656408: /data/starrocks/be/StarRocks-3.2.8/be/lib/starrocks_be
'linux-vdso.so.1': opening object file: No such file or directory
Could not open object file.
passiolin commented 3 days ago

重启be节点后,故障恢复了,可能是死锁。下次出现的时候,我再用pstack导出堆栈。

或者开发人员可以根据版本判断是否有死锁问题,我升级版本也是可以的。