StarRocks / starrocks

The world's fastest open query engine for sub-second analytics both on and off the data lakehouse. With the flexibility to support nearly any scenario, StarRocks provides best-in-class performance for multi-dimensional analytics, real-time analytics, and ad-hoc queries. A Linux Foundation project.
https://starrocks.io
Apache License 2.0
9.01k stars 1.81k forks source link

result error from sqlancer query #7017

Closed colorfulu closed 2 years ago

colorfulu commented 2 years ago

Steps to reproduce the behavior (Required)

DROP DATABASE IF EXISTS w9;
CREATE DATABASE w9;
USE w9;
admin set frontend config ("enable_decimal_v3" = "true");
CREATE TABLE t0 (c_0_0 DATE NOT NULL ,c_0_1 DATE NOT NULL ,c_0_2 BOOLEAN REPLACE NOT NULL ,c_0_3 BOOLEAN REPLACE NULL ,c_0_4 DECIMAL(30, 15)  SUM NULL ) AGGREGATE KEY (c_0_0,c_0_1) PARTITION BY RANGE(c_0_0) (START ("2010-01-01") END ("2021-12-31") EVERY (INTERVAL 89 day)) DISTRIBUTED BY HASH (c_0_1) BUCKETS 3 properties("replication_num"="3") ;
CREATE TABLE IF NOT EXISTS t1 (c_1_0 BIGINT NOT NULL ,c_1_1 VARCHAR(11) REPLACE_IF_NOT_NULL NULL ) AGGREGATE KEY (c_1_0) DISTRIBUTED BY HASH (c_1_0) BUCKETS 3 properties("replication_num"="3") ;
INSERT INTO t1 VALUES (-726115304, '');
INSERT INTO t1 VALUES (-726115304, ''), (706425684, 'G');
INSERT INTO t1 VALUES (615003013, 'bbU');
INSERT INTO t0 VALUES ('2010-01-25', '2010-01-25', true, false, 0.288179818550634);
INSERT INTO t0 VALUES ('2010-01-27', '2010-01-02', true, false, 0.433250604389951), ('2010-01-11', '2010-01-27', true, true, NULL), ('2010-01-17', '2010-01-19', false, true, NULL);
-- SELECT subt0.c_0_0 FROM  (SELECT t1_93.c_1_0, t1_93.c_1_1 FROM t1 AS t1_93 ORDER BY t1_93.c_1_0) subt1 RIGHT ANTI JOIN (SELECT t0_92.c_0_0, t0_92.c_0_1, t0_92.c_0_2, t0_92.c_0_3, t0_92.c_0_4 FROM t0 AS t0_92) subt0 ON subt1.c_1_1 = subt0.c_0_1 AND subt1.c_1_1 != subt0.c_0_1;
-- cardinality: 4
-- SELECT subt0.c_0_0 FROM  (SELECT t1_93.c_1_0, t1_93.c_1_1 FROM t1 AS t1_93 ORDER BY t1_93.c_1_0) subt1 RIGHT ANTI JOIN (SELECT t0_92.c_0_0, t0_92.c_0_1, t0_92.c_0_2, t0_92.c_0_3, t0_92.c_0_4 FROM t0 AS t0_92) subt0 ON subt1.c_1_1 = subt0.c_0_1 AND subt1.c_1_1 != subt0.c_0_1 WHERE ( ( subt0.c_0_4 ) != ( subt0.c_0_4 ) ) IN ((SELECT t1_93.c_1_0 FROM t1 AS t1_93 WHERE (subt0.c_0_1) IN ('1970-01-04') ) )  UNION ALL SELECT subt0.c_0_0 FROM  (SELECT t1_93.c_1_0, t1_93.c_1_1 FROM t1 AS t1_93 ORDER BY t1_93.c_1_0) subt1 RIGHT ANTI JOIN (SELECT t0_92.c_0_0, t0_92.c_0_1, t0_92.c_0_2, t0_92.c_0_3, t0_92.c_0_4 FROM t0 AS t0_92) subt0 ON subt1.c_1_1 = subt0.c_0_1 AND subt1.c_1_1 != subt0.c_0_1 WHERE (NOT (( ( subt0.c_0_4 ) != ( subt0.c_0_4 ) ) IN ((SELECT t1_93.c_1_0 FROM t1 AS t1_93 WHERE (subt0.c_0_1) IN ('1970-01-04') ) ) )) UNION ALL SELECT subt0.c_0_0 FROM  (SELECT t1_93.c_1_0, t1_93.c_1_1 FROM t1 AS t1_93 ORDER BY t1_93.c_1_0) subt1 RIGHT ANTI JOIN (SELECT t0_92.c_0_0, t0_92.c_0_1, t0_92.c_0_2, t0_92.c_0_3, t0_92.c_0_4 FROM t0 AS t0_92) subt0 ON subt1.c_1_1 = subt0.c_0_1 AND subt1.c_1_1 != subt0.c_0_1 WHERE ((( ( subt0.c_0_4 ) != ( subt0.c_0_4 ) ) IN ((SELECT t1_93.c_1_0 FROM t1 AS t1_93 WHERE (subt0.c_0_1) IN ('1970-01-04') ) ) ) IS NULL);
-- cardinality: 6

Expected behavior (Required)

Real behavior (Required)

StarRocks version (Required)

liuyehcf commented 2 years ago
-- Q1
SELECT c_0_0
FROM t0
WHERE 
(
  ((c_0_4 != c_0_4) IN 
    (
      SELECT c_1_0 FROM t1 WHERE c_0_1 = '1970-01-04'
    )
  ) IS NULL
)

-- Q2
SELECT c_0_0
FROM t0
WHERE 
( NOT 
  ((c_0_4 != c_0_4) IN 
    (
      SELECT c_1_0 FROM t1 WHERE c_0_1 = '1970-01-04'
    )
  )
)

Q1 is expected to output 0 rows and Q2 is expected to output 4 rows

But actually Q1 output 2 rows and Q2 output 4 rows

The corresponding sql equivalent to Q1 is

WITH xx AS
  (SELECT t1.c_1_0 AS v2,
          '1970-01-04' AS v3
   FROM t1)
SELECT t0.c_0_0,
       CASE
           WHEN t1Rows = 0 THEN FALSE
           WHEN (c_0_4 != c_0_4) IS NULL THEN NULL
           WHEN t1d.v2 IS NOT NULL THEN TRUE
           WHEN v2Nulls < t1Rows THEN NULL
           ELSE FALSE
       END
FROM t0
LEFT OUTER JOIN
  (SELECT xx.v2,
          xx.v3
   FROM xx
   GROUP BY xx.v2,
            xx.v3) AS t1d ON (c_0_4 != c_0_4)=t1d.v2
AND t0.c_0_1 = t1d.v3
LEFT OUTER JOIN
  (SELECT count(*) AS t1Rows,
          count(xx.v2) AS v2Nulls,
          xx.v3
   FROM xx
   GROUP BY xx.v3) AS t1c ON t1c.v3 = t0.c_0_1
liuyehcf commented 2 years ago

..