sysown / proxysql

High-performance MySQL proxy with a GPL license.
http://www.proxysql.com
GNU General Public License v3.0
6.03k stars 981 forks source link

Queries can fail if you cannot explicitly close a prepared statement #4222

Open jaimesicam opened 1 year ago

jaimesicam commented 1 year ago

As per https://proxysql.com/documentation/prepared-statements, prepared statements are cached. This can become a problem if in between same SELECT * statements that the table structure becomes different:

Sample code:

#include <stdlib.h>
#include <iostream>
#include "mysql_connection.h"

#include <cppconn/driver.h>
#include <cppconn/exception.h>
#include <cppconn/resultset.h>
#include <cppconn/prepared_statement.h>
#include <string>

using namespace std;

void executeQuery(sql::Connection* con, const std::string& query)
{
  cout << "Query: " << query << endl;
  sql::PreparedStatement *pStmt;
  pStmt = con->prepareStatement(query);
  pStmt->execute();
  pStmt->close();
  delete pStmt;
}
int main(void)
{
  try {
    sql::Driver *driver;
    sql::Connection *con;

    driver = get_driver_instance();
    con = driver->connect("tcp://127.0.0.1:3306", "sbtest", "sbtest");
    con->setSchema("sbtest");

    executeQuery(con, "DROP TABLE IF EXISTS sample_table");
    executeQuery(con, "CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)");
    executeQuery(con, "INSERT INTO sample_table(value) VALUES('1'),('2'),('3')");
    executeQuery(con, "SELECT * FROM sample_table");
    executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
    executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
    executeQuery(con, "SELECT * FROM sample_table");
    executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
    delete con;
  } catch (sql::SQLException &e) {
    cout << "# ERR: SQLException in " << __FILE__;
    cout << "# ERR: " << e.what();
    cout << " (MySQL error code: " << e.getErrorCode();
    cout << ", SQLState: " << e.getSQLState() << " )" << endl;
  }

  cout << endl;

  return EXIT_SUCCESS;
}

If connecting to a PXC node directly, the commands execute just fine:

[root@pxc80-c8-3 ~]# ./test2 
Query: DROP TABLE IF EXISTS sample_table
Query: CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)
Query: INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
Query: SELECT * FROM sample_table
Query: SELECT * FROM sample_table LIMIT 1
Query: ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL
Query: SELECT * FROM sample_table
Query: SELECT * FROM sample_table LIMIT 1

The general log on Node 3 is:

2023-05-16T02:51:39.290479Z   262 Query set autocommit=1
2023-05-16T02:51:39.291721Z   262 Query USE `sbtest`
2023-05-16T02:51:39.292554Z   262 Prepare   DROP TABLE IF EXISTS sample_table
2023-05-16T02:51:39.292869Z   262 Execute   DROP TABLE IF EXISTS sample_table
2023-05-16T02:51:39.304104Z   262 Close stmt    
2023-05-16T02:51:39.304434Z   262 Prepare   CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)
2023-05-16T02:51:39.304679Z   262 Execute   CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)
2023-05-16T02:51:39.313248Z   262 Close stmt    
2023-05-16T02:51:39.313720Z   262 Prepare   INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
2023-05-16T02:51:39.313949Z   262 Execute   INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
2023-05-16T02:51:39.316058Z   262 Close stmt    
2023-05-16T02:51:39.316167Z   262 Prepare   SELECT * FROM sample_table
2023-05-16T02:51:39.316339Z   262 Execute   SELECT * FROM sample_table
2023-05-16T02:51:39.316945Z   262 Close stmt    
2023-05-16T02:51:39.316997Z   262 Prepare   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:51:39.317120Z   262 Execute   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:51:39.317348Z   262 Close stmt    
2023-05-16T02:51:39.317392Z   262 Prepare   ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL
2023-05-16T02:51:39.317504Z   262 Execute   ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL
2023-05-16T02:51:39.324875Z   262 Close stmt    
2023-05-16T02:51:39.325063Z   262 Prepare   SELECT * FROM sample_table
2023-05-16T02:51:39.325267Z   262 Execute   SELECT * FROM sample_table
2023-05-16T02:51:39.325578Z   262 Close stmt    
2023-05-16T02:51:39.325629Z   262 Prepare   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:51:39.325729Z   262 Execute   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:51:39.327213Z   262 Close stmt

But if you point the host to ProxySQL, this will be the result:

[root@pxc80-c8-3 ~]# ./test2 
Query: DROP TABLE IF EXISTS sample_table
Query: CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)
Query: INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
Query: SELECT * FROM sample_table
Query: SELECT * FROM sample_table LIMIT 1
Query: ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL
Query: SELECT * FROM sample_table
# ERR: SQLException in test2.c# ERR: The number of columns in the result set differs from the number of bound buffers. You must reset the statement, rebind the result set columns, and execute the statement again (MySQL error code: 2057, SQLState: HY000 )

Here's the log on node 1:

2023-05-16T02:54:21.008920Z   235 Connect   sbtest@192.168.0.132 on sbtest using TCP/IP
2023-05-16T02:54:21.010253Z   235 Prepare   DROP TABLE IF EXISTS sample_table
2023-05-16T02:54:21.010887Z   235 Execute   DROP TABLE IF EXISTS sample_table
2023-05-16T02:54:21.040308Z   235 Prepare   INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
2023-05-16T02:54:21.040507Z   235 Execute   INSERT INTO sample_table(value) VALUES('1'),('2'),('3')
2023-05-16T02:54:21.044329Z   235 Prepare   SELECT * FROM sample_table
2023-05-16T02:54:21.044629Z   235 Execute   SELECT * FROM sample_table
2023-05-16T02:54:21.045077Z   235 Prepare   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:54:21.045222Z   235 Execute   SELECT * FROM sample_table LIMIT 1
2023-05-16T02:54:21.045553Z   235 Prepare   ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL
2023-05-16T02:54:21.045675Z   235 Execute   ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL

Here's the log on node 2:

2023-05-16T02:54:21.027280Z   248 Connect   sbtest@192.168.0.132 on sbtest using TCP/IP
2023-05-16T02:54:21.028118Z   248 Prepare   CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)
2023-05-16T02:54:21.028332Z   248 Execute   CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)

Here's the log on node3:

2023-05-16T02:54:21.064870Z   268 Prepare   SELECT * FROM sample_table
2023-05-16T02:54:21.065084Z   268 Execute   SELECT * FROM sample_table

The workaround so far is to explicitly specify the columns you would like to SELECT from the table but perhaps it would be more resilient to invalidate the prepared statement in the cache and reprepare it before it's executed on the server.

renecannao commented 1 year ago

Hi @jaimesicam .

Unfortunately the real issue is your sample application. If has at least 4 issues , but only one is relevant for SQLException . I might assume that the first 3 issues are due the fact that you wanted to simplify the test case, but issue 4 is the real problem.

  1. SELECT * is an anti-pattern. As you said it yourself , one should "explicitly specify the columns you would like to SELECT from the table";
  2. prepared statements are meant to separate SQL code from its data . You should never prepare this: INSERT INTO sample_table(value) VALUES('1'),('2'),('3');
  3. prepared statements are meant to provide efficiency, compiling the SQL code only once and executing it multiple time. Your function executeQuery() goes absolutely against this principle, recompiling the SQL code for each query. This is a terrible idea, and make prepared statement not just useless but also very inefficient. Your code will be more efficient if you run text queries instead of prepared statements. ProxySQL transparently fixes applications doing this, reducing by 66% the number of commands sent by bad application;
  4. (the real issue) your sample application is running the ALTER TABLE when there is no prepared statement prepared against the same time. This is partly the result of my previous point number 3 , but also due the fact that your sample application is the only application running queries on that table. If another connection run ALTER TABLE , your sample code will fail too. In other words, if an ALTER TABLE run (for example in another connection) in between COM_PREPARE and COM_EXECUTE , your sample code will fail also if connecting directly to MySQL server.

Here your test2.c slightly modified to trigger the same exception:

#include <stdlib.h>
#include <iostream>
#include "mysql_connection.h"

#include <cppconn/driver.h>
#include <cppconn/exception.h>
#include <cppconn/resultset.h>
#include <cppconn/prepared_statement.h>
#include <string>

using namespace std;

void executeQuery(sql::Connection* con, const std::string& query, bool alter=false)
{
  cout << "Query: " << query << endl;
  sql::PreparedStatement *pStmt;
  pStmt = con->prepareStatement(query);
  if (alter) // let's pretend that another connection is altering the table between our PREPARE and EXECUTE
    executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
  pStmt->execute();
  pStmt->close();
  delete pStmt;
}
int main(void)
{
  try {
    sql::Driver *driver;
    sql::Connection *con;

    driver = get_driver_instance();
    con = driver->connect("tcp://127.0.0.1:3306", "sbtest", "sbtest");
    con->setSchema("sbtest");

    executeQuery(con, "DROP TABLE IF EXISTS sample_table");
    executeQuery(con, "CREATE TABLE sample_table(id INT NOT NULL auto_increment PRIMARY KEY, value VARCHAR(42) DEFAULT NULL)");
    executeQuery(con, "INSERT INTO sample_table(value) VALUES('1'),('2'),('3')");
    executeQuery(con, "SELECT * FROM sample_table");
    executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
    //executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
    executeQuery(con, "SELECT * FROM sample_table",true);
    executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
    delete con;
  } catch (sql::SQLException &e) {
    cout << "# ERR: SQLException in " << __FILE__;
    cout << "# ERR: " << e.what();
    cout << " (MySQL error code: " << e.getErrorCode();
    cout << ", SQLState: " << e.getSQLState() << " )" << endl;
  }

  cout << endl;

  return EXIT_SUCCESS;
}

For reference, here the diff:

@@ -10,11 +10,13 @@

 using namespace std;

-void executeQuery(sql::Connection* con, const std::string& query)
+void executeQuery(sql::Connection* con, const std::string& query, bool alter=false)
 {
   cout << "Query: " << query << endl;
   sql::PreparedStatement *pStmt;
   pStmt = con->prepareStatement(query);
+  if (alter) // let's pretend that another connection is altering the table between our PREPARE and EXECUTE
+    executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
   pStmt->execute();
   pStmt->close();
   delete pStmt;
@@ -34,8 +36,8 @@
     executeQuery(con, "INSERT INTO sample_table(value) VALUES('1'),('2'),('3')");
     executeQuery(con, "SELECT * FROM sample_table");
     executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
-    executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
-    executeQuery(con, "SELECT * FROM sample_table");
+    //executeQuery(con, "ALTER TABLE sample_table ADD COLUMN new_col INT NOT NULL");
+    executeQuery(con, "SELECT * FROM sample_table",true);
     executeQuery(con, "SELECT * FROM sample_table LIMIT 1");
     delete con;
   } catch (sql::SQLException &e) {