nayuta-yanagisawa / server

MariaDB server is a community developed fork of MySQL server. Started by core members of the original MySQL team, MariaDB actively works with outside developers to deliver the most featureful, stable, and sanely licensed open SQL server in the industry.
GNU General Public License v2.0
1 stars 0 forks source link

MDEV-25116 #11

Open nayuta-yanagisawa opened 3 years ago

nayuta-yanagisawa commented 3 years ago

https://jira.mariadb.org/browse/MDEV-25116

--disable_warnings
--disable_query_log
--disable_result_log
--source ../../t/test_init.inc
--enable_result_log
--enable_query_log
--enable_warnings

--echo
--echo this test is for MDEV-25116
--echo

--echo prepare tables

--connection child2_1
CREATE DATABASE auto_test_remote;
USE auto_test_remote;

--disable_query_log
CREATE TABLE tbl_a (id INT);
--enable_query_log

--connection master_1
CREATE DATABASE auto_test_local;
USE auto_test_local;

--disable_query_log
eval CREATE TABLE tbl_a (
    id INT
) $MASTER_1_ENGINE $MASTER_1_CHARSET COMMENT='table "tbl_a", srv "s_2_1"';
--enable_query_log

--echo execute select

--connection master_1
SELECT IF(COUNT(id > 0),'Y','N') FROM tbl_a;

--echo clean up
--connection master_1
DROP DATABASE IF EXISTS auto_test_local;
--connection child2_1
DROP DATABASE IF EXISTS auto_test_remote;
nayuta-yanagisawa commented 3 years ago
Thread 30 "mysqld" hit Breakpoint 1, 0x00007ffff42f8ae0 in spider_db_query(st_spider_conn*, char const*, unsigned int, int, int*)@plt ()
   from /home/vagrant/repo/mariadb-server/build/mysql-test/var/plugins/ha_spider.so
(gdb) bt
#0  0x00007ffff42f8ae0 in spider_db_query(st_spider_conn*, char const*, unsigned int, int, int*)@plt ()
   from /home/vagrant/repo/mariadb-server/build/mysql-test/var/plugins/ha_spider.so
#1  0x00007ffff4434404 in spider_mbase_handler::execute_sql (this=0x7fff98148f50, sql_type=1, conn=0x7fff9814a990, quick_mode=3, need_mon=0x7fff98148df0)
    at /home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc:13885
#2  0x00007ffff444dec2 in spider_group_by_handler::init_scan (this=0x7fff98191650) at /home/vagrant/repo/mariadb-server/storage/spider/spd_group_by_handler.cc:1466
#3  0x0000555555e530e1 in Pushdown_query::execute (this=0x7fff98017240, join=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/group_by_handler.cc:49
#4  0x0000555555e07d6b in do_select (join=0x7fff980152f8, procedure=0x0) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:19938
#5  0x0000555555ddcf0f in JOIN::exec_inner (this=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4489
#6  0x0000555555ddc03e in JOIN::exec (this=0x7fff980152f8) at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4271
#7  0x0000555555ddd7aa in mysql_select (thd=0x7fff98000d90, tables=0x7fff980141e0, wild_num=0, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0,
    proc_param=0x0, select_options=2147486464, result=0x7fff980152d0, unit=0x7fff98004cc8, select_lex=0x7fff98013588)
    at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:4709
#8  0x0000555555dcd111 in handle_select (thd=0x7fff98000d90, lex=0x7fff98004c08, result=0x7fff980152d0, setup_tables_done_option=0)
    at /home/vagrant/repo/mariadb-server/sql/sql_select.cc:412
#9  0x0000555555d92080 in execute_sqlcom_select (thd=0x7fff98000d90, all_tables=0x7fff980141e0) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:6449
#10 0x0000555555d88892 in mysql_execute_command (thd=0x7fff98000d90) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:3968
#11 0x0000555555d960b1 in mysql_parse (thd=0x7fff98000d90, rawbuf=0x7fff980134c8 "SELECT IF(COUNT(id > 0),'Y','N') FROM tbl_a", length=43,
    parser_state=0x7ffff478d520, is_com_multi=false, is_next_command=false) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:7995
#12 0x0000555555d823c3 in dispatch_command (command=COM_QUERY, thd=0x7fff98000d90, packet=0x7fff9800ace1 "", packet_length=43, is_com_multi=false,
    is_next_command=false) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:1857
#13 0x0000555555d80c29 in do_command (thd=0x7fff98000d90) at /home/vagrant/repo/mariadb-server/sql/sql_parse.cc:1373
#14 0x0000555555f10ce3 in do_handle_one_connection (connect=0x555558280690) at /home/vagrant/repo/mariadb-server/sql/sql_connect.cc:1412
#15 0x0000555555f10a2c in handle_one_connection (arg=0x555558280690) at /home/vagrant/repo/mariadb-server/sql/sql_connect.cc:1316
#16 0x00007ffff7e9c609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#17 0x00007ffff7658293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
nayuta-yanagisawa commented 3 years ago

debug trace より、data node で実行されるクエリ。

select count((t0.`id` > 0)) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(id > 0),'Y','N')` from `auto_test_remote`.`tbl_a` t0

SELECT IF(id > 0,'Y','N') FROM tbl_a に変えると次のようなクエリになる。

select (if((t0.`id` > 0) , _latin1'Y' , _latin1'N')) `IF(id > 0,'Y','N')` from `auto_test_remote`.`tbl_a` t0
nayuta-yanagisawa commented 3 years ago
SELECT IF(COUNT(0),'Y','N') FROM tbl_a;
# => select count(0) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(0),'Y','N')` from `auto_test_remote`.`tbl_a` t0

SELECT IF(0,'Y','N') FROM tbl_a;
# => select 1 from `auto_test_remote`.`tbl_a` t0

SELECT COUNT(id) FROM tbl_a;
# => select count(t0.`id`) `COUNT(id)` from `auto_test_remote`.`tbl_a` t0
nayuta-yanagisawa commented 3 years ago

謎のバッククオートはたぶん別名なんだろうな。if(``, ... のバッククオートはそれを参照してる。

nayuta-yanagisawa commented 3 years ago

バッククオートに何か (x とする) 入っていても、if((select x,... にしないと動かないはず。また、そのようにしても ERROR 1247 (42S22): Reference 'x' not supported (reference to group function) みたいに言われるはず。

nayuta-yanagisawa commented 3 years ago

Q. query の書き換えはどこで行われてるの…

A. わからん

Q. 何故クエリの書き換えを行うのか?

A. たぶん、direct join (複数 shard のデータを集約して透過的に join する仕組み) をやるため。https://github.com/Kentoku/spider/pull/19

nayuta-yanagisawa commented 3 years ago

spider_mbase_handler の member の sql を見ればどういうクエリが組み立てられてるかわかる。

nayuta-yanagisawa commented 3 years ago

とりあえず spider_mbase_handler::append_list_item_select_part が問題のバッククオートの部分を組み立ててるのはわかった。

1316        if ((error_num = dbton_hdl->append_list_item_select_part(
(gdb) p
$8 = {mem_calc_inited = true, str = {<Charset> = {
      m_charset = 0x5555575092a0 <my_charset_utf8_general_ci>}, <Binary_string> = {<Static_binary_string> = {<Sql_alloc> = {<No data fields>},
        Ptr = 0x7fff9814fca0 "select ", str_length = 7}, Alloced_length = 8, extra_alloc = 0, alloced = true, thread_specific = false}, <No data fields>}, id = 59,
  func_name = 0x7ffff447661b "<unknown>", file_name = 0x7ffff4476258 "/home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc", line_no = 8616,
  current_alloc_mem = 8, next = 0x0}
(gdb) n
1317          query.select, NULL, 0, TRUE, fields, SPIDER_SQL_TYPE_SELECT_SQL)))
(gdb) p ((spider_mbase_handler*)dbton_hdl)->sql
$9 = {mem_calc_inited = true, str = {<Charset> = {
      m_charset = 0x5555575092a0 <my_charset_utf8_general_ci>}, <Binary_string> = {<Static_binary_string> = {<Sql_alloc> = {<No data fields>},
        Ptr = 0x7fff98052330 "select count((t0.`id` > 0)) ``,(if(`` , _latin1'Y' , _latin1'N')) `IF(COUNT(id > 0),'Y','N')`,", str_length = 93},
      Alloced_length = 120, extra_alloc = 0, alloced = true, thread_specific = false}, <No data fields>}, id = 59, func_name = 0x7ffff447661b "<unknown>",
  file_name = 0x7ffff4476258 "/home/vagrant/repo/mariadb-server/storage/spider/spd_db_mysql.cc", line_no = 8616, current_alloc_mem = 120, next = 0x0}
nayuta-yanagisawa commented 3 years ago

どうやら Spider はすべての field (column) をトップレベルの select の SELECT 句の後ろに並べるという書き換えを行うようである。append_list_item_select, spider_db_print_item_type あたりを参照。

これは想像だが、複数シャードでの実行結果を後で集計するためにやってるのかな…?たぶん、複数シャードにまたがった集計をやらないならこんなことは必要ないんだよね。

nayuta-yanagisawa commented 3 years ago

↓ の case Item::SUM_FUNC_ITEM: に落ちる。最終的には spider_db_mbase_util::open_item_sum_func() が実質的な変更を行っていそう。

int spider_db_print_item_type(
  Item *item,
  Field *field,
  ha_spider *spider,
  spider_string *str,
  const char *alias,
  uint alias_length,
  uint dbton_id,
  bool use_fields,
  spider_fields *fields
) {
  DBUG_ENTER("spider_db_print_item_type");
  DBUG_PRINT("info",("spider COND type=%d", item->type()));

  switch (item->type())
  {
    case Item::FUNC_ITEM:
      DBUG_RETURN(spider_db_open_item_func((Item_func *) item, spider, str,
        alias, alias_length, dbton_id, use_fields, fields));
#ifdef HANDLER_HAS_DIRECT_AGGREGATE
    case Item::SUM_FUNC_ITEM:
      DBUG_RETURN(spider_db_open_item_sum_func((Item_sum *)item, spider, str,
        alias, alias_length, dbton_id, use_fields, fields));
#endif
        ...
        default:
          DBUG_ASSERT(FALSE);
          DBUG_RETURN(spider_db_print_item_type_default(item, spider, str));
      }
    }
   ...
  }
  ...
}
nayuta-yanagisawa commented 3 years ago

解決法の1つは、push down していい場合以外は COUNT の push down をやめること。つまり data node からすべてのデータを取ってきて spider node で集約させる。PostgreSQL の FDW はどうしてんだろ?

nayuta-yanagisawa commented 3 years ago

いや、どういう仕組で Spider node での結果の統合が行われるのかちゃんと理解しないと、ベストな結論は出せないな。次の目標は、Spider node はどのようにして data node から送られてきた結果を統合するのか理解するかな。

edit: その前に Spider 1.0 あたりを読んで全体の流れを掴んだほうがいいかも?