Open nayuta-yanagisawa opened 3 years ago
Thread 30 "mysqld" hit Breakpoint 1, 0x00007ffff42f8ae0 in spider_db_query(st_spider_conn*, char const*, unsigned int, int, int*)@plt ()
from /home/vagrant/repo/mariadb-server/build/mysql-test/var/plugins/ha_spider.so
(gdb) bt
#0 0x00007ffff42f8ae0 in spider_db_query(st_spider_conn*, char const*, unsigned int, int, int*)@plt ()
from /home/vagrant/repo/mariadb-server/build/mysql-test/var/plugins/ha_spider.so
#1 0x00007ffff4434404 in spider_mbase_handler::execute_sql (this=0x7fff98148f50, sql_type=1, conn=0x7fff9814a990, quick_mode=3, need_mon=0x7fff98148df0)
at /home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc:13885
#2 0x00007ffff444dec2 in spider_group_by_handler::init_scan (this=0x7fff98191650) at /home/vagrant/repo/mariadb-server/storage/spider/spd_group_by_handler.cc:1466
#3 0x0000555555e530e1 in Pushdown_query::execute (this=0x7fff98017240, join=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/group_by_handler.cc:49
#4 0x0000555555e07d6b in do_select (join=0x7fff980152f8, procedure=0x0) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:19938
#5 0x0000555555ddcf0f in JOIN::exec_inner (this=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4489
#6 0x0000555555ddc03e in JOIN::exec (this=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4271
#7 0x0000555555ddd7aa in mysql_select (thd=0x7fff98000d90, tables=0x7fff980141e0, wild_num=0, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0,
proc_param=0x0, select_options=2147486464, result=0x7fff980152d0, unit=0x7fff98004cc8, select_lex=0x7fff98013588)
at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4709
#8 0x0000555555dcd111 in handle_select (thd=0x7fff98000d90, lex=0x7fff98004c08, result=0x7fff980152d0, setup_tables_done_option=0)
at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:412
#9 0x0000555555d92080 in execute_sqlcom_select (thd=0x7fff98000d90, all_tables=0x7fff980141e0) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:6449
#10 0x0000555555d88892 in mysql_execute_command (thd=0x7fff98000d90) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:3968
#11 0x0000555555d960b1 in mysql_parse (thd=0x7fff98000d90, rawbuf=0x7fff980134c8 "SELECT IF(COUNT(id > 0),'Y','N') FROM tbl_a", length=43,
parser_state=0x7ffff478d520, is_com_multi=false, is_next_command=false) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:7995
#12 0x0000555555d823c3 in dispatch_command (command=COM_QUERY, thd=0x7fff98000d90, packet=0x7fff9800ace1 "", packet_length=43, is_com_multi=false,
is_next_command=false) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:1857
#13 0x0000555555d80c29 in do_command (thd=0x7fff98000d90) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:1373
#14 0x0000555555f10ce3 in do_handle_one_connection (connect=0x555558280690) at /home/vagrant/repo/mariadb-server/sql/sql_connect.cc:1412
#15 0x0000555555f10a2c in handle_one_connection (arg=0x555558280690) at /home/vagrant/repo/mariadb-server/sql/sql_connect.cc:1316
#16 0x00007ffff7e9c609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#17 0x00007ffff7658293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
debug trace より、data node で実行されるクエリ。
select count((t0.`id` > 0)) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(id > 0),'Y','N')` from `auto_test_remote`.`tbl_a` t0
SELECT IF(id > 0,'Y','N') FROM tbl_a
に変えると次のようなクエリになる。
select (if((t0.`id` > 0) , _latin1'Y' , _latin1'N')) `IF(id > 0,'Y','N')` from `auto_test_remote`.`tbl_a` t0
SELECT IF(COUNT(0),'Y','N') FROM tbl_a;
# => select count(0) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(0),'Y','N')` from `auto_test_remote`.`tbl_a` t0
SELECT IF(0,'Y','N') FROM tbl_a;
# => select 1 from `auto_test_remote`.`tbl_a` t0
SELECT COUNT(id) FROM tbl_a;
# => select count(t0.`id`) `COUNT(id)` from `auto_test_remote`.`tbl_a` t0
謎のバッククオートはたぶん別名なんだろうな。if(``, ... のバッククオートはそれを参照してる。
バッククオートに何か (x とする) 入っていても、if((select x
,... にしないと動かないはず。また、そのようにしても ERROR 1247 (42S22): Reference 'x' not supported (reference to group function)
みたいに言われるはず。
Q. query の書き換えはどこで行われてるの…
A. わからん
int spider_group_by_handler::init_scan()
の /* making a query */
って書いてあるところかな??Q. 何故クエリの書き換えを行うのか?
A. たぶん、direct join (複数 shard のデータを集約して透過的に join する仕組み) をやるため。https://github.com/Kentoku/spider/pull/19
spider_mbase_handler
の member の sql
を見ればどういうクエリが組み立てられてるかわかる。
とりあえず spider_mbase_handler::append_list_item_select_part
が問題のバッククオートの部分を組み立ててるのはわかった。
1316 if ((error_num = dbton_hdl->append_list_item_select_part(
(gdb) p
$8 = {mem_calc_inited = true, str = {<Charset> = {
m_charset = 0x5555575092a0 <my_charset_utf8_general_ci>}, <Binary_string> = {<Static_binary_string> = {<Sql_alloc> = {<No data fields>},
Ptr = 0x7fff9814fca0 "select ", str_length = 7}, Alloced_length = 8, extra_alloc = 0, alloced = true, thread_specific = false}, <No data fields>}, id = 59,
func_name = 0x7ffff447661b "<unknown>", file_name = 0x7ffff4476258 "/home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc", line_no = 8616,
current_alloc_mem = 8, next = 0x0}
(gdb) n
1317 query.select, NULL, 0, TRUE, fields, SPIDER_SQL_TYPE_SELECT_SQL)))
(gdb) p ((spider_mbase_handler*)dbton_hdl)->sql
$9 = {mem_calc_inited = true, str = {<Charset> = {
m_charset = 0x5555575092a0 <my_charset_utf8_general_ci>}, <Binary_string> = {<Static_binary_string> = {<Sql_alloc> = {<No data fields>},
Ptr = 0x7fff98052330 "select count((t0.`id` > 0)) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(id > 0),'Y','N')`,", str_length = 93},
Alloced_length = 120, extra_alloc = 0, alloced = true, thread_specific = false}, <No data fields>}, id = 59, func_name = 0x7ffff447661b "<unknown>",
file_name = 0x7ffff4476258 "/home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc", line_no = 8616, current_alloc_mem = 120, next = 0x0}
どうやら Spider はすべての field (column) をトップレベルの select の SELECT 句の後ろに並べるという書き換えを行うようである。append_list_item_select
, spider_db_print_item_type
あたりを参照。
これは想像だが、複数シャードでの実行結果を後で集計するためにやってるのかな…?たぶん、複数シャードにまたがった集計をやらないならこんなことは必要ないんだよね。
↓ の case Item::SUM_FUNC_ITEM:
に落ちる。最終的には spider_db_mbase_util::open_item_sum_func()
が実質的な変更を行っていそう。
int spider_db_print_item_type(
Item *item,
Field *field,
ha_spider *spider,
spider_string *str,
const char *alias,
uint alias_length,
uint dbton_id,
bool use_fields,
spider_fields *fields
) {
DBUG_ENTER("spider_db_print_item_type");
DBUG_PRINT("info",("spider COND type=%d", item->type()));
switch (item->type())
{
case Item::FUNC_ITEM:
DBUG_RETURN(spider_db_open_item_func((Item_func *) item, spider, str,
alias, alias_length, dbton_id, use_fields, fields));
#ifdef HANDLER_HAS_DIRECT_AGGREGATE
case Item::SUM_FUNC_ITEM:
DBUG_RETURN(spider_db_open_item_sum_func((Item_sum *)item, spider, str,
alias, alias_length, dbton_id, use_fields, fields));
#endif
...
default:
DBUG_ASSERT(FALSE);
DBUG_RETURN(spider_db_print_item_type_default(item, spider, str));
}
}
...
}
...
}
解決法の1つは、push down していい場合以外は COUNT の push down をやめること。つまり data node からすべてのデータを取ってきて spider node で集約させる。PostgreSQL の FDW はどうしてんだろ?
いや、どういう仕組で Spider node での結果の統合が行われるのかちゃんと理解しないと、ベストな結論は出せないな。次の目標は、Spider node はどのようにして data node から送られてきた結果を統合するのか理解するかな。
edit: その前に Spider 1.0 あたりを読んで全体の流れを掴んだほうがいいかも?
https://jira.mariadb.org/browse/MDEV-25116