pingcap / tidb

TiDB - the open-source, cloud-native, distributed SQL database designed for modern applications.
https://pingcap.com
Apache License 2.0
37.26k stars 5.84k forks source link

incorrect result caused by stream agg #49605

Closed wjhuang2016 closed 10 months ago

wjhuang2016 commented 10 months ago

Bug Report

Please answer these questions before submitting your issue. Thanks!

1. Minimal reproduce step (Required)

CREATE TABLE `tcfc8cb35` (
  `col_77` mediumint(9) NOT NULL,
  `col_78` year(4) NOT NULL,
  `col_79` varbinary(62) NOT NULL,
  `col_80` text COLLATE utf8mb4_unicode_ci NOT NULL,
  `col_81` tinyint(4) NOT NULL DEFAULT '115',
  `col_82` smallint(6) DEFAULT '2675',
  `col_83` date DEFAULT '1981-09-17',
  `col_84` mediumint(8) unsigned NOT NULL,
  `col_85` varchar(384) CHARACTER SET gbk COLLATE gbk_bin DEFAULT NULL,
  UNIQUE KEY `idx_23` (`col_84`,`col_82`),
  PRIMARY KEY (`col_84`,`col_77`) /*T![clustered_index] CLUSTERED */,
  UNIQUE KEY `idx_25` (`col_84`,`col_85`(5),`col_81`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
PARTITION BY HASH (`col_84`) PARTITIONS 1;
INSERT INTO `tcfc8cb35` VALUES (2065948,1999,_binary '8jxNjbksVTKq&B+vBA','rf',-54,-5656,'1987-07-03',259254,'7me坨3q!*1'),(-8248164,2024,_binary 'zA5A','s)DAkX3',-93,-12983,'2027-12-18',299573,'LUf咲hdS滧'),(-6131509,2023,_binary 'xdex#Y2','1th%hDTCYPn3-E7kOZs',-51,19149,'2013-10-28',428279,'矷莒X'),(7545837,1998,_binary 'PCVO','&(lJw69',30,4093,'1987-07-03',736235,'腏@TOIJ*賀U#CO8l駏昙%'),(-7449472,2029,_binary 'B7&jrlcijyw%&2UnN','EjbFfX!',80,-7590,'2011-11-03',765580,'堮ZQF_9闇d髴J'),(-7176200,1988,_binary 'tiPglv7mX_#','CnCtNb(p*KN%PDYrn',-25,NULL,'1987-07-03',842956,'Gq羣嗳殓閶e6zaF燤3衩碵'),(-115168,2036,_binary 'BqmX$-4It-Oi0hL','!8#dvHUNz6B%ROi',82,18787,'1991-09-20',921706,'椉2庘v'),(6665100,1987,_binary '4IJgk0fr4','(D',-73,28628,'1987-07-03',1149668,'摔玝S渉LOKO^隤膊往语)1'),(-4065661,2021,_binary '8G%','xDO39xw#uT96#Bmuin',-107,17356,'1970-12-20',1316239,'+0c35掬-阗儁jGri%盾'),(7622462,1990,_binary '&o+)s)D0qGBm$b','kjoS9Dzld',84,688,'1987-07-03',1403663,'$H鍿_M~G葫uJ'),(5269354,2018,_binary 'wq9hC8bwFDI0T','s8XPrN+nptl~m',-2,-31272,'2008-05-26',1534517,'y椁n躁Q'),(2065948,1982,_binary '8jxNjbksVTKq&B+vBA','g$+i4dgH)NNh)Aw_',11,19800,'1987-07-03',1591457,'z^+H~薼A嶘bq酠JX杆jLb'),(4076971,2024,_binary '&!RrsH','7Mpvk',-63,-632,'2032-10-28',1611011,'鬰+EXmxFq慻6'),(3522062,1981,_binary ')nq#!UiHKk8','j~wFe77ai',50,6951,'1987-07-03',1716854,'J'),(7859777,2012,_binary 'PBA5xgJ&G&','UM7o!uv=&M6o-(j57)',18,-5978,'1987-07-03',1967012,'e)浢L獹関韠瀗N'),(2065948,2028,_binary '8jxNjbksVTKq&B+vBA','JmsEki9t4PU&mAUf',51,12002,'2017-12-23',1981288,'mp氏襚6F7q崴'),(-1500070,1975,_binary '=GJ8@ii=l8s5s','m!G8L+IjLvy',-8,27610,'2029-05-16',2196155,'經隸祜wU2'),(-2714763,1985,_binary 'KGJ1zL(zQv8_S=PDk62','CLWXa7+Up',-40,22617,'2035-03-20',2235800,'k鱓Gjg絟顉蠛鏕辭Z予5*5k餿'),(2065948,2025,_binary '8jxNjbksVTKq&B+vBA','w&yj$&a&',-117,-28412,'2025-06-09',2327801,''),(2065948,1989,_binary '8jxNjbksVTKq&B+vBA','',-36,-6885,'1987-07-03',2473107,'氆x砓4株3D穭瀒奟4)'),(3119785,2000,_binary 'HLug0KlZ','6ROUT1FO',-70,-15533,'1987-07-03',2545647,'=o67b黢lA3#b'),(7603879,1999,_binary '+tWVu','#$1WzaJ7n#9',69,23055,'1987-07-03',2648260,''),(2065948,2031,_binary '8jxNjbksVTKq&B+vBA','r6ILBm=',91,12389,'1982-06-10',3466410,'~螉aj3aXn赱(=G钦b7'),(4238003,1978,'','o',-74,7353,'1987-07-03',3494873,'FM鯄vFJ朰bs#_4'),(-790167,1982,_binary 'Av6kc','jc',64,13980,'1987-07-03',3551730,'摲潤鼥Rv哦壎您榒'),(2065948,2008,_binary '8jxNjbksVTKq&B+vBA','O',-87,17988,'1987-07-03',3575420,'鈒Ci9莺EDky^荊x籧懳B7_NM'),(-6411333,2003,_binary ')jN1I)3k','qd',116,-18541,'1987-07-03',3587407,'1uW棪'),(2065948,1999,_binary '8jxNjbksVTKq&B+vBA','rYlHHkmOTzr(haME!',23,18873,'1987-07-03',3922067,'旋揳49竜煋Q忿H鏙$駚h'),(-5905639,2004,_binary '!^mA-=UpJH','z33d6+cNCtu!uWiLJc',126,12848,'1987-07-03',3948744,'U騄8n@倴j炑zuG~g8l缴y锦u'),(-4335557,2008,_binary 'B@!$b&TX%s6Yz','0aH_)E@5j-G0B',-101,28498,'1987-07-03',3956716,'d=I虼U66惠鬖u'),(-7768195,2010,'','wOScJPKp1',125,22243,'1987-07-03',4034343,'脲+R湂AFSla唠'),(-6248343,2010,_binary 'H%Gn2','A3qO*Q(ZgO3l&((',-29,18384,'1987-07-03',4423651,'a丞蕦桍i烆淧!P边z4F坄X螯'),(5808445,2024,_binary '@l','!ylC',-2,-10879,'1980-04-17',4504641,'到O掙+qn蝹'),(-5553634,2009,_binary 'O*ZKHmU2XNjm','f',72,-12752,'1987-07-03',4678872,'虉'),(-176937,2031,_binary 'Q2q&IOF','Fh6(^',-97,-10178,'2005-09-26',5158138,'~篬ap!'),(7807055,1982,_binary 'b0Yx1^Ee&PT5#qS4T','qDLFX@uEmD',40,23325,'2033-09-10',5230638,'姻iX瑿剬A汛嘈俕'),(7282485,1992,_binary 'i*~N437h','0(e6F(fifMp7jyD',118,-28562,'2034-04-24',5232700,'挧N繐2P'),(2065948,2020,_binary '8jxNjbksVTKq&B+vBA','#$1WzaJ7n#9',121,-28094,'2026-10-27',5270487,'啣H郺a擤颛猢W躅X浢俲抰-'),(-4694164,2001,_binary 'D9P*DDrm#@','VyE@0X8N#0',-109,-10697,'1987-07-03',5345079,'S~)譱~赮%$瘓9ZFJE譕秵eL谹'),(-2652987,2033,_binary 'v~Mfuz_cfc$p','dr4)E5NhJxulZx~',-126,-14754,'2029-02-27',5403054,'7獗C晼費畧cV瘹籈麕'),(3176897,1988,'','q0kbjDz@*',121,NULL,'1987-07-03',5742757,'t囋b虔罣B墀鸆C~蹼Kh'),(4077714,2005,_binary '00pOp6Vohm5S)h8C','J+OYV$',76,-7242,'1987-07-03',6233773,'榅3)鯤%J+^qu觺xe腼n稝鄗瘩'),(2065948,1990,_binary '8jxNjbksVTKq&B+vBA','-m*',-121,-26732,'1987-07-03',6279625,''),(-7283599,2031,_binary '2Q6bFeyT^7Abr6','#$1WzaJ7n#9',61,-27919,'2010-04-06',6286582,'J-靎KU'),(5355431,2033,_binary '3Ac7Z~GvWQb=jFD','tw*yQ~6',-45,1148,'2005-03-12',6334739,'h'),(-1908530,2012,_binary '-)(YwF4%w','DrcJ8$juOa%xxa1c',-56,22588,'1987-07-03',6547611,'60厇岙N'),(-3028495,2009,_binary 'K8ahYNywRwpN-','rIc8975-6zQED^)uO',-29,-26403,'1987-07-03',6652722,'ohUAlH托'),(373271,2025,_binary ')zok7','%OlS!#_',92,-9937,'2014-01-23',6739909,'獱'),(2065948,1991,_binary '8jxNjbksVTKq&B+vBA','%q+0Y!rL9',97,NULL,'1987-07-03',6958705,'4薌咻-WAa裌釁E怃('),(-2489124,1994,_binary '$LL6D8jyUq4~#hPsY','jvU9zDQrP~khw',27,12087,'1987-07-03',7012297,'kL涔彲5-值%鰽D9ul霢=傉HOl'),(-6621533,1990,_binary '2f$%xGt7=iulHAIB%','juZL$WT66R',-81,-28350,'1987-07-03',7098858,'蘴S#擝侧n阠1溱#Q硘I'),(3148235,1999,_binary '6','+Yfzq_-53Xx6Gk^W',89,-24483,'2012-03-03',7129324,'1矋k'),(8367424,2025,_binary 'NkB$v@x','KxZd0@',-110,-9436,'2036-02-21',7228725,'K1'),(-3013746,2013,'','#M',122,NULL,'1987-07-03',7318267,'o艍锅Ts颇p)~41'),(878056,2010,_binary '3+fLHpy','^sklXT',-26,-12530,'1987-07-03',7519722,'戁T8鐖=g风'),(2065948,2028,_binary '8jxNjbksVTKq&B+vBA','76ue=wXm%UY1fScw',108,17447,'1983-11-02',7710557,'2qe1涵戊c恅Gjbl)喆侹Y'),(-7569442,2035,_binary '0f&^j6OKIpBeF_','IRNsf~%',-16,-30426,'2016-10-07',7827141,'焮'),(2065948,2027,_binary '8jxNjbksVTKq&B+vBA','I@7##ggk)8t',64,31899,'1980-03-14',7827568,'8M蚰驸'),(5845212,2023,_binary 'I@4jL(Np5@I*','M1DfF&n*C',43,-20588,'1985-05-23',7943602,'aj喸鎼賓磨F隳_似!'),(1207597,1990,_binary ')4GbKzQd81Y@0Ob0','~IIe4vhi&r$l+jP8#',-37,-28560,'1987-07-03',8486164,'巳'),(-575709,1995,_binary 'P3uNp(lgB&6&e',')_RiCR##ul+w9kon)4Y',93,25220,'1987-07-03',8526611,'$)P滎qh2彑o'),(272211,1985,_binary 'vZ_IhK','EY8kW7G-*!',-111,5091,'1987-07-03',8718559,'f+0n%sgW榮U3'),(5082097,1989,_binary '!bwRr','9z',123,-9099,'1987-07-03',8731609,'靣!il_猜渨R喀cLBR'),(4344692,2013,_binary '~N@gTDznA','M-',-62,21341,'2033-09-05',8802841,'烋聛=1'),(-888018,1983,_binary '#3Q*)=','#$1WzaJ7n#9',7,23524,'1987-07-03',8810789,'Z鳘'),(-2857580,1990,_binary 'O_rPB(Uu2','KcR!1OFjWnTos7HmdAb',6,31287,'1987-07-03',9114777,'0#祛mfvr歩秳~誓5r)F2C1'),(2065948,2002,_binary '8jxNjbksVTKq&B+vBA','Jw__',43,9051,'1987-07-03',9228702,'C+Z洕*gBd(d偈忩煗#'),(-5575750,1977,_binary 'DxZ_6K5','I8',91,-11274,'1987-07-03',9602406,'ZXk櫨R碗(T'),(1369720,2008,_binary '&','lhEU__wIue1',90,16181,'1987-07-03',9882632,'B祣o8荞Y脕釓5Yco~L'),(-3204171,2024,_binary '$4d$n3hRK$rUT=y','#$1WzaJ7n#9',63,10018,'2010-01-08',9932759,'%谒VS)Y僁嶞涿$'),(7540912,2011,_binary '8J)%&ylQhbXynPBL','vn2',-127,-21978,'1987-07-03',10034270,'慒XoB袡EXU画P襝o裪e8蕏1雴x'),(3393060,2009,'','lmnxI',-124,9303,'1987-07-03',10099108,'騌愭nb*2蒿-8'),(4668609,1973,_binary 'tAV+r$pI7YH','2@acJ',-56,-25348,'1987-07-03',10292256,'Dogds'),(2885852,2013,_binary 'fR%GXR!$ZIvHFiwxkF','#$C9%*6FvREs)',-105,-6654,'1977-10-30',10443668,'e鲝5涔F$鷈YB蔕SN矡n鍖烬瓇椆d'),(-7703980,1988,_binary 'xp','!9^bm4-Ovv4tmF#m(U',8,334,'1987-07-03',10654893,'7%7l&vB衄@libz&'),(2450639,2033,_binary 'cc','Zpl',66,-29090,'1973-10-13',10778440,'QX)H睔鞔溃t勔骺U襎_O糮f'),(5688376,2033,_binary '8n^w2Ylk#-cJR-MvjV','#$1WzaJ7n#9',-75,-5129,'2036-11-16',10804994,'T鸙XAh=仍鏇05潰芫k餄w覇L熾%'),(7723708,1979,_binary '$ro3Mg','mCj$K^byck*^3HJ58ZL',-80,21190,'2000-06-21',10945071,'媒懙唀篞heD鷢粭h崶w#g(舐态w'),(7697923,2007,'','h&azj@rIHGD%J7ZGX',-124,8866,'1987-07-03',11180555,'觅Y闤4y樠y鞂噂'),(-3169338,2027,_binary 'zBwP9KZr0Kkd00BI','(!5V',120,5946,'1980-03-08',11348991,'_G暊a搩9j疭Hp耲殴9@莑'),(-6929234,2021,_binary 'vhrEUFs','VSD&6jA3WIgl6RNf',-49,28182,'2028-09-27',11427172,'=&(豵P銇尟饶臵3p鐃'),(271919,2033,_binary 'w80^AGvkwCYt=','JM+#D9GXsckZYCd=',86,-10201,'2013-12-14',11659914,''),(-3365934,2010,_binary '%D2hC)2#K2ALZNDR','_YOEDlDVjQ',34,19745,'1987-07-03',11786762,'y鱷C6v6=av-~醉瀌蔰S掴躊n'),(2096489,1992,'','qfsnrZJ',55,28280,'1987-07-03',11823163,'*IDr濊蝯萘麩#'),(-7059790,1983,_binary 'J0+','',29,16538,'1987-07-03',12066338,'UrO'),(4100333,2008,_binary '0fUh5N(-a','qzP_$wsBoUxn~Y*C',-5,-29636,'1987-07-03',12155547,'邫m(畇aY笇*1v^6R*Z觞i'),(-1286331,1981,_binary '*','&eGk9=D0@6',-42,-15361,'2033-08-26',12472906,'LX#Fy菤(伴侺!Rbyh'),(2065948,1988,_binary '8jxNjbksVTKq&B+vBA','2dTXKEy+&4_14Br_@H',66,-8649,'1987-07-03',12632626,'宿C'),(6865464,1976,_binary '$','kDU&eOmRMK',-72,-32037,'1987-07-03',12678540,'0~J椝辵櫝愅wcri麦鯻'),(1196386,1994,_binary 'VY3aTCd#%SElYA','',85,28197,'2020-06-04',13143747,'U嘙蘮2n凔禁'),(-4509762,1971,_binary 'x8*-seSL0kcszPzZB','GwQK6v5LVA%c#u@tbtk',3,6974,'1987-07-03',13691034,'^舼蚎#l7'),(835807,2036,_binary '&4PWc=^','qD7!x)MVz',-30,-31398,'1975-03-26',14099208,'EU'),(-2194744,1999,_binary 'MN*YvmiI2bIse%','AA81Um)-b^',109,19845,'1987-07-03',14217337,''),(-556803,2021,_binary 'K','ThvH_pv',111,-19652,'2002-12-15',14362112,'礞#U缟卝寒姛_Qv墍S(8I4l3'),(2480098,1980,_binary 'wBzJE(n5+D)Y(Cya^FH','tkNb9T%taZlzl5i',112,-32310,'1987-07-03',14695837,'缂M_h5'),(3738348,2002,'','#$1WzaJ7n#9',-71,-32694,'1987-07-03',14787591,'i銄悞a'),(3718337,1988,_binary 'yksncIN#xKk&@*Su','bACi56BLi',51,-2156,'1987-07-03',14875260,'矉揩考pB1!E撂椏o叵鄧W6'),(2959549,1974,_binary '4vaECeyn8a(%0Pd(L1','sQvo~9',-24,14978,'1987-07-03',14998305,'U@佖0ydr耣Icz-3w('),(2065948,2020,_binary '8jxNjbksVTKq&B+vBA','-6gT#W%ABjy&^',104,32450,'2025-03-09',15028966,'u灓s闗D齊晐A~)a=鈾騯&dW'),(427931,2027,_binary 'AzMZ7v2','gmGIHATDfN@v+65Kh',80,-13045,'1982-02-19',15051621,NULL),(2065948,1985,_binary '8jxNjbksVTKq&B+vBA','#$1WzaJ7n#9',-12,2895,'1987-07-03',15130286,'u稉m'),(-6050486,1976,_binary '0U)tQ1Ni','(I7MIKS~W',21,15937,'1987-07-03',15158522,''),(2065948,1987,_binary '8jxNjbksVTKq&B+vBA','e#pPO#Pz',-67,-4515,'1987-07-03',15257603,'0n蔪q膵蟅Jm5-eTk驉碶'),(-1291743,1997,'','QZ~I~R#~4YGGM',116,28888,'1987-07-03',15497898,'Wy嫀詬'),(-5653586,2000,_binary '%#epF!$#^T9rv','CvzY@qJ-_P',9,23282,'1987-07-03',15690886,'9a2Q-0(嚠k(蚫'),(7691125,1989,_binary 'pujS+','k',38,25390,'1973-12-11',15705489,'嘆VG'),(-6078620,2001,_binary 'QA!','4RgN=M',117,NULL,'1987-07-03',15835109,'J霄N艺Pb@Q~9-皜'),(-5064150,2007,_binary '@2elha','p+iJJ*adgsn6#-^UiyD',41,12467,'1987-07-03',15877702,'lvr-g椦C枯'),(3689799,1992,_binary 'O=HXD-BW!QPws#Q','9k',99,10584,'1987-07-03',15915843,'_31Qn槴0Jg夣4%N'),(-2038430,2019,_binary 'NW_L-o','(bLRAqg@0ME',96,-26507,'2031-02-20',15943560,'iwpk^=(9'),(7426642,2016,_binary 'FdP=i6Whj*','d^r9IL!XE3g',113,27418,'1987-07-03',16280285,'swn鞃)鉉g7弮U9輥%'),(-4267180,1990,_binary 'MFB*d9CI+GM6!O','ssaehFbT&8LLR',-20,27708,'1987-07-03',16363101,'S$CX蕜4jzQ亟ZX氲'),(-4967247,2035,_binary '5Rxo42N-Oww7mo&uA6','Q=',-110,-1068,'2035-04-30',16553997,'蹑'),(2065948,1998,_binary '8jxNjbksVTKq&B+vBA','YojBG$',-125,-3031,'1987-07-03',16607124,'橆a璭D蠳K-CUUG'),(2075524,1991,_binary 'PlpaiAr3e@^)','V6Pr~T',80,NULL,'1987-07-03',16727983,'_I$V');

SELECT /*+ AGG_TO_COP() STREAM_AGG()*/ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;

SELECT /*+ AGG_TO_COP() */ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;

2. What did you expect to see? (Required)

Two queries get the same results.

3. What did you see instead (Required)

mysql> SELECT /*+ AGG_TO_COP() */ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;
...
110 rows in set, 1 warning (0.01 sec)

mysql> SELECT /*+ AGG_TO_COP() STREAM_AGG()*/ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;
...
115 rows in set, 1 warning (0.01 sec)

4. What is your TiDB version? (Required)

v7.6.0-alpha-342-g5883c5ba34-dirty

wshwsh12 commented 10 months ago
  1. I have test in mysql and get the result 110 rows.. And we can run the sql following:
    
    Run in mysql:

MySQL [test]> select count(1) from tcfc8cb35 where col_85 = ''; +----------+ | count(1) | +----------+ | 6 | +----------+ 1 row in set (0.001 sec)

So the 110 rows should be the correct result.

2. I think the key problem is not collation, because the the rows that incorrect with empty col_85. I have test the case with gbk or not, HashAgg always gets 110 rows and StreamAgg always gets 115 rows.

3. Why StreamAgg can't get the correct result?
We can see the plan for StreamAgg.

tidb> desc analyze SELECT /+ AGG_TO_COP() STREAM_AGG()/ (NOT (tcfc8cb35.col_85>=_UTF8MB4'j筧8') OR NOT (tcfc8cb35.col_85=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(tcfc8cb35.col_81) AS r0,QUOTE(tcfc8cb35.col_85) AS r1 FROM tcfc8cb35 WHERE tcfc8cb35.col_84>240817 OR tcfc8cb35.col_85 BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY tcfc8cb35.col_85;

+------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ | id | estRows | actRows | task | access object | execution info | operator info | memory | disk | +------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ | Projection_7 | 30.67 | 115 | root | | time:1.7ms, loops:2, RU:1.341432, Concurrency:OFF | istrue(or(not(ge(test.tcfc8cb35.col_85, j筧8)), not(eq(test.tcfc8cb35.col_85, 暈lH忧ll6))))->Column#11, Column#10, quote(test.tcfc8cb35.col_85)->Column#12 | 4.76 KB | N/A | | └─StreamAgg_11 | 30.67 | 115 | root | | time:1.65ms, loops:2 | group by:test.tcfc8cb35.col_85, funcs:max(test.tcfc8cb35.col_81)->Column#10, funcs:firstrow(test.tcfc8cb35.col_85)->test.tcfc8cb35.col_85 | 5.87 KB | N/A | | └─IndexMerge_19 | 38.33 | 115 | root | | time:1.61ms, loops:2, index_task:{fetch_handle:681.274µs, merge:18.815µs}, table_task:{num:1, concurrency:5, fetch_row:820.815µs, wait_time:774.578µs} | type: union | 39.0 KB | N/A | | ├─TableRangeScan_16(Build) | 38.33 | 115 | cop[tikv] | table:tcfc8cb35, partition:p0 | time:666.1µs, loops:3, cop_task: {num: 1, max: 630.5µs, proc_keys: 115, tot_proc: 77.6µs, tot_wait: 201.5µs, rpc_num: 1, rpc_time: 615.2µs, copr_cache_hit_ratio: 0.00, build_task_duration: 5.3µs, max_distsql_concurrency: 1}, tikv_task:{time:1ms, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 4140, total_keys: 116, get_snapshot_time: 149.2µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}} | range:(240817,+inf], keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized] | 2.10 KB | N/A | | ├─IndexFullScan_17(Build) | 0.00 | 0 | cop[tikv] | table:tcfc8cb35, partition:p0, index:idx_25(col_84, col_85, col_81) | time:6.63µs, loops:1 | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized] | N/A | N/A | | └─TableRowIDScan_18(Probe) | 38.33 | 115 | cop[tikv] | table:tcfc8cb35, partition:p0 | time:786.9µs, loops:2, cop_task: {num: 1, max: 752µs, proc_keys: 115, tot_proc: 317.1µs, tot_wait: 57.1µs, rpc_num: 1, rpc_time: 728.5µs, copr_cache_hit_ratio: 0.00, build_task_duration: 4.28µs, max_distsql_concurrency: 1}, tikv_task:{time:0s, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 12891, total_keys: 230, get_snapshot_time: 9.2µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}} | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized] | N/A | N/A | +------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ 6 rows in set, 1 warning (0.002 sec)

I think the IndexMerge can't keep the col_85 order, but we don't generate an extra sort to keep col_85 order. 

4. Try to avoid index_merge in this case. It get the correct result.

tidb> desc analyze SELECT /+ no_index_merge() AGG_TO_COP() STREAM_AGG()/ (NOT (tcfc8cb35.col_85>=_UTF8MB4'j筧8') OR NOT (tcfc8cb35.col_85=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(tcfc8cb35.col_81) AS r0,QUOTE(tcfc8cb35.col_85) AS r1 FROM tcfc8cb35 WHERE tcfc8cb35.col_84>240817 OR tcfc8cb35.col_85 BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領lov' GROUP BY tcfc8cb35.col_85; +--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+ | id | estRows | actRows | task | access object | execution info | operator info | memory | disk | +--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+ | Projection_7 | 30.67 | 110 | root | | time:815.6µs, loops:2, RU:0.723660, Concurrency:OFF | istrue(or(not(ge(test.tcfc8cb35.col_85, j筧8)), not(eq(test.tcfc8cb35.col_85, 暈lH忧ll6))))->Column#11, Column#10, quote(test.tcfc8cb35.col_85)->Column#12 | 11.5 KB | N/A | | └─StreamAgg_10 | 30.67 | 110 | root | | time:762.9µs, loops:2 | group by:test.tcfc8cb35.col_85, funcs:max(test.tcfc8cb35.col_81)->Column#10, funcs:firstrow(test.tcfc8cb35.col_85)->test.tcfc8cb35.col_85 | 20.6 KB | N/A | | └─Sort_15 | 38.33 | 115 | root | | time:725.6µs, loops:2 | test.tcfc8cb35.col_85 | 8.41 KB | 0 Bytes | | └─TableReader_14 | 38.33 | 115 | root | | time:681µs, loops:2, cop_task: {num: 1, max: 671.4µs, proc_keys: 115, tot_proc: 155.9µs, tot_wait: 143.4µs, rpc_num: 1, rpc_time: 651.1µs, copr_cache_hit_ratio: 0.00, build_task_duration: 5.97µs, max_distsql_concurrency: 1} | data:Selection_13 | 4.84 KB | N/A | | └─Selection_13 | 38.33 | 115 | cop[tikv] | | tikv_task:{time:0s, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 12891, total_keys: 116, get_snapshot_time: 125.4µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}} | or(gt(test.tcfc8cb35.col_84, 240817), and(ge(test.tcfc8cb35.col_85, "WVz"), le(test.tcfc8cb35.col_85, "G#駧褉ZC領lov"))) | N/A | N/A | | └─TableFullScan_12 | 115.00 | 115 | cop[tikv] | table:tcfc8cb35, partition:p0 | tikv_task:{time:0s, loops:3} | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized] | N/A | N/A | +--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+ 6 rows in set, 1 warning (0.002 sec)

elsa0520 commented 10 months ago
  1. I have test in mysql and get the result 110 rows.. And we can run the sql following:
Run in mysql:

MySQL [test]> select count(1) from tcfc8cb35 where col_85 = '';
+----------+
| count(1) |
+----------+
|        6 |
+----------+
1 row in set (0.001 sec)

So the 110 rows should be the correct result.

  1. I think the key problem is not collation, because the the rows that incorrect with empty col_85. I have test the case with gbk or not, HashAgg always gets 110 rows and StreamAgg always gets 115 rows.
  2. Why StreamAgg can't get the correct result? We can see the plan for StreamAgg.
tidb> desc analyze  SELECT /*+ AGG_TO_COP() STREAM_AGG()*/ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;

+------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+
| id                                 | estRows | actRows | task      | access object                                                       | execution info                                                                                                                                                                                                                                                                                                                                                                                                                                                     | operator info                                                                                                                                                 | memory  | disk |
+------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+
| Projection_7                       | 30.67   | 115     | root      |                                                                     | time:1.7ms, loops:2, RU:1.341432, Concurrency:OFF                                                                                                                                                                                                                                                                                                                                                                                                                  | istrue(or(not(ge(test.tcfc8cb35.col_85, j筧8)), not(eq(test.tcfc8cb35.col_85, 暈lH忧ll6))))->Column#11, Column#10, quote(test.tcfc8cb35.col_85)->Column#12    | 4.76 KB | N/A  |
| └─StreamAgg_11                     | 30.67   | 115     | root      |                                                                     | time:1.65ms, loops:2                                                                                                                                                                                                                                                                                                                                                                                                                                               | group by:test.tcfc8cb35.col_85, funcs:max(test.tcfc8cb35.col_81)->Column#10, funcs:firstrow(test.tcfc8cb35.col_85)->test.tcfc8cb35.col_85                     | 5.87 KB | N/A  |
|   └─IndexMerge_19                  | 38.33   | 115     | root      |                                                                     | time:1.61ms, loops:2, index_task:{fetch_handle:681.274µs, merge:18.815µs}, table_task:{num:1, concurrency:5, fetch_row:820.815µs, wait_time:774.578µs}                                                                                                                                                                                                                                                                                                             | type: union                                                                                                                                                   | 39.0 KB | N/A  |
|     ├─TableRangeScan_16(Build)     | 38.33   | 115     | cop[tikv] | table:tcfc8cb35, partition:p0                                       | time:666.1µs, loops:3, cop_task: {num: 1, max: 630.5µs, proc_keys: 115, tot_proc: 77.6µs, tot_wait: 201.5µs, rpc_num: 1, rpc_time: 615.2µs, copr_cache_hit_ratio: 0.00, build_task_duration: 5.3µs, max_distsql_concurrency: 1}, tikv_task:{time:1ms, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 4140, total_keys: 116, get_snapshot_time: 149.2µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}}        | range:(240817,+inf], keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized]                                                              | 2.10 KB | N/A  |
|     ├─IndexFullScan_17(Build)      | 0.00    | 0       | cop[tikv] | table:tcfc8cb35, partition:p0, index:idx_25(col_84, col_85, col_81) | time:6.63µs, loops:1                                                                                                                                                                                                                                                                                                                                                                                                                                               | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized]                                                                                   | N/A     | N/A  |
|     └─TableRowIDScan_18(Probe)     | 38.33   | 115     | cop[tikv] | table:tcfc8cb35, partition:p0                                       | time:786.9µs, loops:2, cop_task: {num: 1, max: 752µs, proc_keys: 115, tot_proc: 317.1µs, tot_wait: 57.1µs, rpc_num: 1, rpc_time: 728.5µs, copr_cache_hit_ratio: 0.00, build_task_duration: 4.28µs, max_distsql_concurrency: 1}, tikv_task:{time:0s, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 12891, total_keys: 230, get_snapshot_time: 9.2µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}}           | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized]                                                                                   | N/A     | N/A  |
+------------------------------------+---------+---------+-----------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+------+
6 rows in set, 1 warning (0.002 sec)

I think the IndexMerge can't keep the col_85 order, but we don't generate an extra sort to keep col_85 order.

  1. Try to avoid index_merge in this case. It get the correct result.
tidb> desc analyze  SELECT /*+ no_index_merge() AGG_TO_COP() STREAM_AGG()*/ (NOT (`tcfc8cb35`.`col_85`>=_UTF8MB4'j筧8') OR NOT (`tcfc8cb35`.`col_85`=_UTF8MB4'暈lH忧ll6')) IS TRUE,MAX(`tcfc8cb35`.`col_81`) AS `r0`,QUOTE(`tcfc8cb35`.`col_85`) AS `r1` FROM `tcfc8cb35` WHERE `tcfc8cb35`.`col_84`>240817 OR `tcfc8cb35`.`col_85` BETWEEN _UTF8MB4'WVz' AND _UTF8MB4'G#駧褉ZC領*lov' GROUP BY `tcfc8cb35`.`col_85`;
+--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+
| id                             | estRows | actRows | task      | access object                 | execution info                                                                                                                                                                                                                        | operator info                                                                                                                                                 | memory  | disk    |
+--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+
| Projection_7                   | 30.67   | 110     | root      |                               | time:815.6µs, loops:2, RU:0.723660, Concurrency:OFF                                                                                                                                                                                   | istrue(or(not(ge(test.tcfc8cb35.col_85, j筧8)), not(eq(test.tcfc8cb35.col_85, 暈lH忧ll6))))->Column#11, Column#10, quote(test.tcfc8cb35.col_85)->Column#12    | 11.5 KB | N/A     |
| └─StreamAgg_10                 | 30.67   | 110     | root      |                               | time:762.9µs, loops:2                                                                                                                                                                                                                 | group by:test.tcfc8cb35.col_85, funcs:max(test.tcfc8cb35.col_81)->Column#10, funcs:firstrow(test.tcfc8cb35.col_85)->test.tcfc8cb35.col_85                     | 20.6 KB | N/A     |
|   └─Sort_15                    | 38.33   | 115     | root      |                               | time:725.6µs, loops:2                                                                                                                                                                                                                 | test.tcfc8cb35.col_85                                                                                                                                         | 8.41 KB | 0 Bytes |
|     └─TableReader_14           | 38.33   | 115     | root      |                               | time:681µs, loops:2, cop_task: {num: 1, max: 671.4µs, proc_keys: 115, tot_proc: 155.9µs, tot_wait: 143.4µs, rpc_num: 1, rpc_time: 651.1µs, copr_cache_hit_ratio: 0.00, build_task_duration: 5.97µs, max_distsql_concurrency: 1}       | data:Selection_13                                                                                                                                             | 4.84 KB | N/A     |
|       └─Selection_13           | 38.33   | 115     | cop[tikv] |                               | tikv_task:{time:0s, loops:3}, scan_detail: {total_process_keys: 115, total_process_keys_size: 12891, total_keys: 116, get_snapshot_time: 125.4µs, rocksdb: {delete_skipped_count: 115, key_skipped_count: 230, block: {}}}            | or(gt(test.tcfc8cb35.col_84, 240817), and(ge(test.tcfc8cb35.col_85, "WVz"), le(test.tcfc8cb35.col_85, "G#駧褉ZC領*lov")))                                     | N/A     | N/A     |
|         └─TableFullScan_12     | 115.00  | 115     | cop[tikv] | table:tcfc8cb35, partition:p0 | tikv_task:{time:0s, loops:3}                                                                                                                                                                                                          | keep order:false, stats:partial[col_84:unInitialized, col_85:unInitialized]                                                                                   | N/A     | N/A     |
+--------------------------------+---------+---------+-----------+-------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+
6 rows in set, 1 warning (0.002 sec)

You are right. Maybe I have some mistake database when I test the no collation table. I will go ahead to focus this issue.

elsa0520 commented 10 months ago

I try the another plan with only index reader which will add the sort node under the stream agg when the sort of child plan is not satisfy the requirement.

mysql> create table t1 (k1 int, k2 int);
Query OK, 0 rows affected (0.08 sec)

mysql> alter table t1 add index idxk2_k1 (k2, k1);
Query OK, 0 rows affected (0.56 sec)

mysql> explain select /*+ AGG_TO_COP() STREAM_AGG()*/ k1, max(k2) from t1 where k2 between 1 and 10 group by k1;
+-------------------------------+---------+-----------+----------------------------------+--------------------------------------------------------------------------------------------------------------------------+
| id                            | estRows | task      | access object                    | operator info                                                                                                            |
+-------------------------------+---------+-----------+----------------------------------+--------------------------------------------------------------------------------------------------------------------------+
| Projection_5                  | 200.00  | root      |                                  | nocollation.t1.k1, Column#4                                                                                              |
| └─StreamAgg_8                 | 200.00  | root      |                                  | group by:nocollation.t1.k1, funcs:max(nocollation.t1.k2)->Column#4, funcs:firstrow(nocollation.t1.k1)->nocollation.t1.k1 |
|   └─Sort_12                   | 250.00  | root      |                                  | nocollation.t1.k1                                                                                                        |
|     └─IndexReader_11          | 250.00  | root      |                                  | index:IndexRangeScan_10                                                                                                  |
|       └─IndexRangeScan_10     | 250.00  | cop[tikv] | table:t1, index:idxk2_k1(k2, k1) | range:[1,10], keep order:false, stats:pseudo                                                                             |
+-------------------------------+---------+-----------+----------------------------------+--------------------------------------------------------------------------------------------------------------------------+
5 rows in set (0.00 sec)

So, the optimizer should add the sort node under the stream agg if the sort property of child plan is not satisfy the requirement.

Optimizer lost the sort property in somewhere when the under plan contains the index merge.

elsa0520 commented 10 months ago

@AilinKid Please take a look

AilinKid commented 10 months ago

streamAggregate does enumerated prop{sortItem}, and when we enforce the sort operator for this kind of prop, the judgment of task.plan() function == nil does mean this plan is invalid, especially for indexMergeReader. (we keep index-plan-hasn't finished for the convenience of pushing more LIMIT down to its partial index paths)

image