Closed cockroach-teamcity closed 10 months ago
Marking as a GA-blocker until we get to the bottom of this one.
Seems similar to #102676. #105694 is related.
I think this probably isn't a GA-blocker due to the presence of stddev_pop
and the expected vs. found output, so I'll remove the label and add a low-priority label.
Here is reduced repro (it doesn't result in different output every time, more like 50% of the time). It requires multiple nodes, so I usually do demo --nodes 3
:
SET sql_safe_updates = false;
CREATE TABLE IF NOT EXISTS seed_mr_table AS SELECT g::INT2 AS _int2,
g::INT4 AS _int4,
g::INT8 AS _int8,
g::FLOAT8 AS _float8,
'2001-01-01'::DATE + g AS _date,
'2001-01-01'::TIMESTAMP
+ g * '1 day'::INTERVAL AS _timestamp,
'2001-01-01'::TIMESTAMPTZ
+ g * '1 day'::INTERVAL AS _timestamptz,
g * '1 day'::INTERVAL AS _interval,
g % 2 = 1 AS _bool,
g::DECIMAL AS _decimal,
g::STRING AS _string,
g::STRING::BYTES AS _bytes,
substring(
'00000000-0000-0000-0000-'
|| g::STRING
|| '00000000000',
1,
36
)::UUID AS _uuid
FROM generate_series(1, 5) AS g;
SET statement_timeout = '1m0s';
INSERT INTO seed_mr_table DEFAULT VALUES;
ALTER TABLE defaultdb.public.seed_mr_table RENAME COLUMN _int4 TO col😪1;
INSERT
INTO defaultdb.public.seed_mr_table AS tab325 (
_float8,
_date,
_timestamp,
_timestamptz,
_bool,
_string,
_uuid
)
VALUES (
(-1.244111606419558):::FLOAT8,
NULL,
'1970-01-11 03:25:45':::TIMESTAMP,
'2029-11-25 21:41:55.000505+00':::TIMESTAMPTZ,
true,
'':::STRING,
'0a4d0a4d-0a4d-0a4d-0a4d-0a4d0a4d0a4d':::UUID
);
INSERT
INTO defaultdb.public.seed_mr_table AS tab406 (
col😪1,
_float8,
_date,
_timestamp,
_decimal,
_bytes,
_uuid
)
VALUES (
(-918673490):::INT8,
1.7976931348623157e+308:::FLOAT8,
'1997-05-29':::DATE,
'1980-12-07 12:48:24.000771':::TIMESTAMP,
(-6.517418879712536215E+32):::DECIMAL,
e'\\x610a':::BYTES,
'922ea106-35a9-4203-a3c6-7f8f3074c10b':::UUID
);
INSERT
INTO defaultdb.public.seed_mr_table AS 😌tab476 (
col😪1,
_float8,
_timestamptz,
_interval,
_bool,
_string
)
VALUES (
NULL,
1.7976931348623157e+308:::FLOAT8,
'1987-11-25 06:14:03.000625+00':::TIMESTAMPTZ,
'-35 years -9 mons -149 days -19:24:50.62023':::INTERVAL,
true,
e'a\t':::STRING
);
INSERT
INTO defaultdb.public.seed_mr_table AS "ta😦B😭495" (
_int2,
_date,
_timestamp,
_timestamptz,
_interval,
_bool,
_string,
_uuid
)
VALUES (
4027:::INT8,
'1982-04-12':::DATE,
'2010-06-03 22:40:23.000022':::TIMESTAMP,
'294276-12-31 23:59:59.999999+00':::TIMESTAMPTZ,
'35 years 5 mons 779 days 22:42:59.302528':::INTERVAL,
true,
e'\rbj':::STRING,
'01ef01ef-01ef-01ef-01ef-01ef01ef01ef':::UUID
);
INSERT
INTO defaultdb.public.seed_mr_table AS tab496 (
_int2,
_int8,
_float8,
_date,
_timestamp,
_decimal,
_string
)
VALUES (
NULL,
(-54):::INT8,
0.6594882641057866:::FLOAT8,
'1970-01-01':::DATE,
'1997-05-20 03:46:40':::TIMESTAMP,
'-Infinity':::DECIMAL,
e'\\40LH@\x1e!p':::STRING
);
INSERT
INTO defaultdb.public.seed_mr_table AS "t ab499" (
_int2,
col😪1,
_int8,
_interval,
_bool,
_bytes,
_uuid
)
VALUES (
(-81):::INT8,
NULL,
(-4439801854239849055):::INT8,
'-63 years -7 mons -677 days -04:01:41.060163':::INTERVAL,
true,
e'\\x237115a05098df':::BYTES,
'0a5e0a5e-0a5e-0a5e-0a5e-0a5e0a5e0a5e':::UUID
);
ALTER TABLE seed_mr_table SPLIT AT SELECT rowid FROM seed_mr_table WHERE random() < 0.5 LIMIT 10;
ALTER TABLE seed_mr_table SCATTER;
SET testing_optimizer_random_seed = 1631728616107996592;
SET testing_optimizer_disable_rule_probability = 1.000000;
SET vectorize = off;
SET distsql = off;
SELECT
stddev_pop(tab74._float8::FLOAT8)::FLOAT8 AS "col.280"
FROM
defaultdb.public.seed_mr_table@[0] AS tab74
GROUP BY
tab74._float8
HAVING
bool_or(tab74._bool::BOOL)::BOOL ORDER BY 1;
RESET testing_optimizer_random_seed;
RESET testing_optimizer_disable_rule_probability;
SET distsql = on;
SELECT
stddev_pop(tab74._float8::FLOAT8)::FLOAT8 AS "col.280"
FROM
defaultdb.public.seed_mr_table@[0] AS tab74
GROUP BY
tab74._float8
HAVING
bool_or(tab74._bool::BOOL)::BOOL ORDER BY 1;
Outputs are:
NULL
0
0
0
0
0
and
NULL
NaN
0
0
0
0
I'm pretty sure this is due to distributed plan in the latter case. stddev_pop
is executed in two stages if distributed, so we're definitely hitting different order of evaluation of float operations, and the operations themselves are a bit different too (when comparing to local execution). If I make both runs have the same distsql
parameter, then the output matches.
This difference has existed for a long time, and we haven't had any users complain about it. Thus, I'll close this as "we'll never fix / expected difference".
roachtest.unoptimized-query-oracle/disable-rules=all/seed-multi-region failed with artifacts on master @ e747f6e6857a19d6048cb184b0f55c52cb8a6390:
Parameters:
ROACHTEST_arch=amd64
ROACHTEST_cloud=gce
ROACHTEST_cpu=4
ROACHTEST_encrypted=false
ROACHTEST_metamorphicBuild=false
ROACHTEST_ssd=0
Help
See: roachtest README
See: How To Investigate (internal)
See: Grafana
Same failure on other branches
- #115942 roachtest: unoptimized-query-oracle/disable-rules=all/seed-multi-region failed [C-test-failure O-roachtest O-robot T-sql-queries branch-release-23.1 release-blocker]
/cc @cockroachdb/sql-queries
This test on roachdash | Improve this report!
Jira issue: CRDB-34368