Closed cockroach-teamcity closed 1 year ago
Incorrect results test case:
SET unconstrained_non_covering_index_scan_enabled = true;
CREATE TABLE table11 (
col1_0 REGPROC NULL,
col1_1 FLOAT8 NOT NULL,
col1_2 TIMESTAMP NOT NULL,
col1_3 BYTES NOT NULL,
col1_4 NAME NOT NULL,
col1_5 OID NULL,
col1_6 JSONB NULL,
col1_7 REGPROCEDURE NULL,
col1_8 INT8 NOT NULL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
CONSTRAINT table1_pkey PRIMARY KEY (rowid ASC),
UNIQUE INDEX table1_col1_5_key (col1_5 DESC) PARTITION BY LIST (col1_5) (
PARTITION table1_part_0 VALUES IN ((0), (4054401374), (634153767), (3907223168), (3056489612), (712165927)),
PARTITION table1_part_1 VALUES IN ((2513293438), (4104166462), (2170174562), (2263164083), (1409360697)),
PARTITION table1_part_2 VALUES IN ((3531390767), (680683507), (3905193285), (1075633376), (3091927336), (4123276399)),
PARTITION table1_part_3 VALUES IN ((2883991761), (1817452698), (275452381), (2049442003), (1763789332)),
PARTITION table1_part_4 VALUES IN ((1471948064), (538430908), (2715103026), (1587139408), (2947172171), (42062373)),
PARTITION table1_part_5 VALUES IN ((358781909), (2141121311), (NULL), (1160143884), (3661521982)),
PARTITION table1_part_6 VALUES IN ((502401592), (2773778536), (3061595338), (289215522), (574362468), (1656676478)),
PARTITION table1_part_7 VALUES IN ((1916209359), (3327812134), (1178314814), (783675437)),
PARTITION table1_part_8 VALUES IN ((2709206916), (2655504936), (1989112598), (2751444736), (1362770897)),
PARTITION table1_part_9 VALUES IN ((1142450034), (644683911), (3096651659), (3464195867), (2902709692), (1047527619)),
PARTITION "DEFAULT" VALUES IN ((DEFAULT))
) WHERE (((col1_3 < '\x58':::BYTES) OR (col1_2 = '-2000-01-01 00:00:00':::TIMESTAMP)) AND (col1_1 != 3.4028234663852886e+38:::FLOAT8)) AND (col1_8 = (-128):::INT8),
INDEX table1_col1_3_col1_1_expr_idx (col1_3 ASC, col1_1 DESC, (col1_8 + 6351614700169882062:::INT8) ASC),
INDEX table1_expr_col1_3_col1_0_col1_4_col1_8_idx (lower(CAST(col1_5 AS STRING)) DESC, col1_3 ASC, col1_0 DESC, col1_4 ASC, col1_8 DESC) STORING (col1_1, col1_5),
UNIQUE INDEX table1_col1_3_col1_0_key (col1_3 DESC, col1_0 ASC) STORING (col1_2, col1_8) PARTITION BY LIST (col1_3) (
PARTITION table1_part_0 VALUES IN (('\x00')),
PARTITION table1_part_1 VALUES IN (('\x')),
PARTITION table1_part_2 VALUES IN (('\xc2a26fa402')),
PARTITION table1_part_3 VALUES IN (('\xe29883'))
),
FAMILY fam_0_col1_4_rowid (col1_4, rowid),
FAMILY fam_1_col1_7 (col1_7),
FAMILY fam_2_col1_0 (col1_0),
FAMILY fam_3_col1_2_col1_5 (col1_2, col1_5),
FAMILY fam_4_col1_8_col1_6_col1_1_col1_3 (col1_8, col1_6, col1_1, col1_3)
);
insert into table11 values
(NULL, 1.234567890123456e+19, '1992-10-10 03:50:10.000523', '\x6109', '☃', 0, '{"train": [null], "a": ["car", [false]], "bar": null, "foo": null}', 1, 15),
(0, 1.234567890123456e+19, '1996-01-08 00:37:11.000713', '\x616161616161', '☃', NULL, NULL, 0, -1193954633286491958),
(NULL, 1.234567890123456e+19, '1972-09-26 21:13:20', '\x610a', '☃', NULL, NULL, 0, -6322506686618379891),
(NULL, 1.234567890123456e+19, '4714-11-24 00:00:00 BC', '\x6109', '☃', 0, '{"cars": [{"make": "Volkswagen", "model": "Rabbit", "trim": "S", "year": "2009"}, {"make": "Toyota", "model": "Camry", "trim": "LE", "year": "2002"}, {"make": "Ford", "model": "Focus", "trim": "SE", "year": "2011"}, {"make": "Buick", "model": "Grand National", "trim": "T-Type", "year": "1987"}, {"make": "Buick", "model": "Skylark", "trim": "Gran Sport", "year": "1966"}, {"make": "Porsche", "model": "911", "trim": "Turbo S", "year": "2022"}, {"make": "Chevrolet", "model": "Corvette", "trim": "C8", "year": "2022"}]}', 0, 72),
(NULL, 1.234567890123456e+19, '1991-02-11 13:29:14.000363', '\x', '☃', NULL, '{"train": [null], "a": ["car", [false]], "bar2": null, "foo2": null}', 0, 59),
(0, 1.234567890123456e+19, '1983-08-27 15:04:28.000107', '\x73b74a39', '☃', NULL, '{"P!(z3zIkDEc": {"foo": false}, "c": {}, "foo": [[[], {}], []], "k&<Rc&v": [], "puV8Bx}n": {"#P*": {}}}', 0, 14);
SELECT
tab_17534.col1_3 AS col_52140,
0:::OID AS col_52141,
tab_17533.col1_7 AS col_52142,
'\x58':::BYTES AS col_52143,
0:::OID AS col_52144
FROM
defaultdb.public.table11@[0] AS tab_17533
RIGHT JOIN defaultdb.public.table11@[0] AS tab_17534
JOIN defaultdb.public.table11@[0] AS tab_17535 ON
(tab_17534.crdb_internal_mvcc_timestamp) = (tab_17535.crdb_internal_mvcc_timestamp)
AND (tab_17534.col1_4) = (tab_17535.col1_4)
JOIN defaultdb.public.table11@[0] AS tab_17536 ON
(tab_17535.col1_1) = (tab_17536.col1_1)
AND (tab_17535.col1_3) = (tab_17536.col1_3)
AND (tab_17534.col1_6) = (tab_17536.col1_6)
JOIN defaultdb.public.table11@[0] AS tab_17537
JOIN defaultdb.public.table11@[0] AS tab_17538 ON
(tab_17537.crdb_internal_mvcc_timestamp) = (tab_17538.crdb_internal_mvcc_timestamp) ON
(tab_17536.col1_2) = (tab_17538.col1_2) AND (tab_17535.col1_4) = (tab_17537.col1_4) ON
(tab_17533.col1_3) = (tab_17537.col1_3) AND (tab_17533.col1_6) = (tab_17536.col1_6)
ORDER BY
tab_17535.col1_4, tab_17535.col1_3 DESC;
SET testing_optimizer_random_seed = 4057832385546395198;
SET testing_optimizer_cost_perturbation = 1.0;
SELECT
tab_17534.col1_3 AS col_52140,
0:::OID AS col_52141,
tab_17533.col1_7 AS col_52142,
'\x58':::BYTES AS col_52143,
0:::OID AS col_52144
FROM
defaultdb.public.table11@[0] AS tab_17533
RIGHT JOIN defaultdb.public.table11@[0] AS tab_17534
JOIN defaultdb.public.table11@[0] AS tab_17535 ON
(tab_17534.crdb_internal_mvcc_timestamp) = (tab_17535.crdb_internal_mvcc_timestamp)
AND (tab_17534.col1_4) = (tab_17535.col1_4)
JOIN defaultdb.public.table11@[0] AS tab_17536 ON
(tab_17535.col1_1) = (tab_17536.col1_1)
AND (tab_17535.col1_3) = (tab_17536.col1_3)
AND (tab_17534.col1_6) = (tab_17536.col1_6)
JOIN defaultdb.public.table11@[0] AS tab_17537
JOIN defaultdb.public.table11@[0] AS tab_17538 ON
(tab_17537.crdb_internal_mvcc_timestamp) = (tab_17538.crdb_internal_mvcc_timestamp) ON
(tab_17536.col1_2) = (tab_17538.col1_2) AND (tab_17535.col1_4) = (tab_17537.col1_4) ON
(tab_17533.col1_3) = (tab_17537.col1_3) AND (tab_17533.col1_6) = (tab_17536.col1_6)
ORDER BY
tab_17535.col1_4, tab_17535.col1_3 DESC;
Session flag unconstrained_non_covering_index_scan_enabled
is not present in V22.1.x, so it's difficult to verify if this is a regression or pre-existing bug.
Test case is using internal column crdb_internal_mvcc_timestamp
. It might be good to verify this always comes from the primary index (does it exist in indexes, and if so, could it hold a different values than in the base table row?).
@mgartner I further reduced this, and bisected it to:
298e573139192477967eee94329d1f0cbea70d3b is the first bad commit
commit 298e573139192477967eee94329d1f0cbea70d3b
Author: Marcus Gartner <marcus@cockroachlabs.com>
Date: Fri Aug 19 06:24:48 2022 -0400
opt: reduce allocations in OrderingSet.LongestCommonPrefix
Previously, the `*OrderingChoice` returned by `LongestCommonPrefix` was
allocated on the heap (except in the case where the ordering set
contained a ordering choice that implies the given ordering). Now,
`LongestCommonPrefix` returns a non-pointer type and avoids these
allocations.
Release justification: This is a minor change that improves performance.
Release note: None
pkg/sql/opt/props/ordering_choice.go | 18 +++++++++---------
pkg/sql/opt/xform/optimizer.go | 15 +++++++--------
2 files changed, 16 insertions(+), 17 deletions(-)
I can't say that I understand why this change should cause the problem, but there is a plan change. Reduced test case:
CREATE TABLE table11 (
col1_1 FLOAT8 NOT NULL,
col1_2 TIMESTAMP NOT NULL,
col1_3 BYTES NOT NULL,
col1_4 NAME NOT NULL,
col1_5 OID NULL,
col1_6 JSONB NULL,
col1_7 REGPROCEDURE NULL,
INDEX table1_col1_3_col1_1_expr_idx (col1_3 ASC, col1_1 DESC)
);
insert into table11 values
(1.234567890123456e+19, '1992-10-10 03:50:10.000523', '\x6109', '☃', 0, '{"train": [null], "a": ["car", [false]], "bar": null, "foo": null}', 1),
(1.234567890123456e+19, '1996-01-08 00:37:11.000713', '\x616161616161', '☃', NULL, NULL, 0),
(1.234567890123456e+19, '1972-09-26 21:13:20', '\x610a', '☃', NULL, NULL, 0),
(1.234567890123456e+19, '4714-11-24 00:00:00 BC', '\x6109', '☃', 0, '{"cars": [{"make": "Volkswagen", "model": "Rabbit", "trim": "S", "year": "2009"}, {"make": "Toyota", "model": "Camry", "trim": "LE", "year": "2002"}, {"make": "Ford", "model": "Focus", "trim": "SE", "year": "2011"}, {"make": "Buick", "model": "Grand National", "trim": "T-Type", "year": "1987"}, {"make": "Buick", "model": "Skylark", "trim": "Gran Sport", "year": "1966"}, {"make": "Porsche", "model": "911", "trim": "Turbo S", "year": "2022"}, {"make": "Chevrolet", "model": "Corvette", "trim": "C8", "year": "2022"}]}', 0),
(1.234567890123456e+19, '1991-02-11 13:29:14.000363', '\x', '☃', NULL, '{"train": [null], "a": ["car", [false]], "bar2": null, "foo2": null}', 0),
(1.234567890123456e+19, '1983-08-27 15:04:28.000107', '\x73b74a39', '☃', NULL, '{"P!(z3zIkDEc": {"foo": false}, "c": {}, "foo": [[[], {}], []], "k&<Rc&v": [], "puV8Bx}n": {"#P*": {}}}', 0);
SET testing_optimizer_random_seed = 4057832385546395198;
SET testing_optimizer_cost_perturbation = 1.0;
SELECT
tab_17534.col1_3 AS col_52140,
0:::OID AS col_52141,
tab_17533.col1_7 AS col_52142,
'\x58':::BYTES AS col_52143,
0:::OID AS col_52144
FROM
defaultdb.public.table11@[0] AS tab_17533
RIGHT JOIN defaultdb.public.table11@[0] AS tab_17534
JOIN defaultdb.public.table11@[0] AS tab_17535 ON
(tab_17534.crdb_internal_mvcc_timestamp) = (tab_17535.crdb_internal_mvcc_timestamp)
AND (tab_17534.col1_4) = (tab_17535.col1_4)
JOIN defaultdb.public.table11@[0] AS tab_17536 ON
(tab_17535.col1_1) = (tab_17536.col1_1)
AND (tab_17535.col1_3) = (tab_17536.col1_3)
AND (tab_17534.col1_6) = (tab_17536.col1_6)
JOIN defaultdb.public.table11@[0] AS tab_17537
JOIN defaultdb.public.table11@[0] AS tab_17538 ON
(tab_17537.crdb_internal_mvcc_timestamp) = (tab_17538.crdb_internal_mvcc_timestamp) ON
(tab_17536.col1_2) = (tab_17538.col1_2) AND (tab_17535.col1_4) = (tab_17537.col1_4) ON
(tab_17533.col1_3) = (tab_17537.col1_3) AND (tab_17533.col1_6) = (tab_17536.col1_6)
ORDER BY
tab_17535.col1_4, tab_17535.col1_3 DESC;
It returns 37 rows at this commit, but should return 36 rows. Previous EXPLAIN:
distribution: local
vectorized: true
• sort
│ order: +col1_4,-col1_3
│
└── • render
│
└── • lookup join (left outer)
│ table: table11@table11_pkey
│ equality: (rowid) = (rowid)
│ equality cols are key
│ pred: col1_6 = col1_6
│
└── • lookup join (left outer)
│ table: table11@table1_col1_3_col1_1_expr_idx
│ equality: (col1_3) = (col1_3)
│
└── • hash join
│ equality: (crdb_internal_mvcc_timestamp, col1_2) = (crdb_internal_mvcc_timestamp, col1_2)
│
├── • hash join
│ │ equality: (col1_4) = (col1_4)
│ │
│ ├── • scan
│ │ missing stats
│ │ table: table11@table11_pkey
│ │ spans: FULL SCAN
│ │
│ └── • lookup join
│ │ table: table11@table11_pkey
│ │ equality: (rowid) = (rowid)
│ │ equality cols are key
│ │ pred: col1_6 = col1_6
│ │
│ └── • lookup join
│ │ table: table11@table1_col1_3_col1_1_expr_idx
│ │ equality: (col1_3, col1_1) = (col1_3,col1_1)
│ │
│ └── • hash join
│ │ equality: (crdb_internal_mvcc_timestamp, col1_4) = (crdb_internal_mvcc_timestamp, col1_4)
│ │
│ ├── • scan
│ │ missing stats
│ │ table: table11@table11_pkey
│ │ spans: FULL SCAN
│ │
│ └── • scan
│ missing stats
│ table: table11@table11_pkey
│ spans: FULL SCAN
│
└── • scan
missing stats
table: table11@table11_pkey
spans: FULL SCAN
New EXPLAIN, (sort is no longer the last step):
distribution: local
vectorized: true
• render
│
└── • lookup join (left outer)
│ table: table11@table11_pkey
│ equality: (rowid) = (rowid)
│ equality cols are key
│ pred: col1_6 = col1_6
│
└── • sort
│ order: +col1_4,-col1_3
│
└── • lookup join (left outer)
│ table: table11@table1_col1_3_col1_1_expr_idx
│ equality: (col1_3) = (col1_3)
│
└── • hash join
│ equality: (crdb_internal_mvcc_timestamp, col1_2) = (crdb_internal_mvcc_timestamp, col1_2)
│
├── • hash join
│ │ equality: (col1_4) = (col1_4)
│ │
│ ├── • scan
│ │ missing stats
│ │ table: table11@table11_pkey
│ │ spans: FULL SCAN
│ │
│ └── • lookup join
│ │ table: table11@table11_pkey
│ │ equality: (rowid) = (rowid)
│ │ equality cols are key
│ │ pred: col1_6 = col1_6
│ │
│ └── • lookup join
│ │ table: table11@table1_col1_3_col1_1_expr_idx
│ │ equality: (col1_3, col1_1) = (col1_3,col1_1)
│ │
│ └── • hash join
│ │ equality: (crdb_internal_mvcc_timestamp, col1_4) = (crdb_internal_mvcc_timestamp, col1_4)
│ │
│ ├── • scan
│ │ missing stats
│ │ table: table11@table11_pkey
│ │ spans: FULL SCAN
│ │
│ └── • scan
│ missing stats
│ table: table11@table11_pkey
│ spans: FULL SCAN
│
└── • scan
missing stats
table: table11@table11_pkey
spans: FULL SCAN
There are two problems.
I think the plan change is probably due to the change made here, which shouldn't cause any correctness issues; rather, it will just prevent us from redundantly optimizing the expression with no required ordering. The plan change is likely just because the cost perturbation is different after the extra sort gets considered. So, (2) seems like the thing to worry about here.
The incorrect results seem to be because the 2nd join in a paired join is expected to be in the same order as the first join, for proper interpretation of the continuation column:
// ContinuationCol is the column ID of the continuation column when
// IsFirstJoinInPairedJoiner is true. The continuation column is a boolean
// column that indicates whether an output row is a continuation of a group
// corresponding to a single left input row.
ContinuationCol opt.ColumnID
I think the plan change is probably due to the change made here, which shouldn't cause any correctness issues; rather, it will just prevent us from redundantly optimizing the expression with no required ordering. The plan change is likely just because the cost perturbation is different after the extra sort gets considered. So, (2) seems like the thing to worry about here.
Prior to my change we were optimizing with an empty ordering enforcer, but now we're not because we return ok=false
instead of ok=true
. Is that correct? I'm confused about the implications of this. Should we be re-optimizing with no required ordering? I can't think of cases where that would be useful.
And it sounds like #86443 didn't introduce a bug, but it change the query plan in a way that reveals an existing bug? Is that correct?
I think your change was correct, it just removed duplicate work.
And it sounds like https://github.com/cockroachdb/cockroach/pull/86443 didn't introduce a bug, but it change the query plan in a way that reveals an existing bug? Is that correct?
Yeah, I think all it did was slightly change the ordering/number of expressions in the memo, which would significantly change costing because of the perturbation.
Had a chance to take another look at this with @rytaft - PR coming soon.
roachtest.costfuzz failed with artifacts on release-22.2 @ 64049e4b9210de3af4a1d814a9af0123b59a055f:
Parameters:
ROACHTEST_cloud=gce
,ROACHTEST_cpu=4
,ROACHTEST_encrypted=false
,ROACHTEST_ssd=0
Help
See: [roachtest README](https://github.com/cockroachdb/cockroach/blob/master/pkg/cmd/roachtest/README.md) See: [How To Investigate \(internal\)](https://cockroachlabs.atlassian.net/l/c/SSSBr8c7)
Same failure on other branches
- #86796 roachtest.costfuzz failed due to scientific notation v. decimal notation [C-test-failure T-sql-queries]
/cc @cockroachdb/sql-queries
This test on roachdash | Improve this report!
Jira issue: CRDB-20329