sysown / proxysql

High-performance MySQL proxy with a GPL license.
http://www.proxysql.com
GNU General Public License v3.0
6.03k stars 981 forks source link

Mem leak with large short connections #2817

Open jwongz opened 4 years ago

jwongz commented 4 years ago

Memory don't be free when proxysql is idle for long time My testcase is about 6000 per/second point select with short link, and the memory keep increasing util I stop my appication, but memory don't free after 12 hours. This happened in v2.0.6 and v2.0.12 which is running in docker.

MySQL [(none)]> select * from stats_memory_metrics;
+------------------------------+----------------+
| Variable_Name                | Variable_Value |
+------------------------------+----------------+
| SQLite3_memory_bytes         | 3561512        |
| jemalloc_resident            | 23936397312    |
| jemalloc_active              | 21980508160    |
| jemalloc_allocated           | 21966739488    |
| jemalloc_mapped              | 24270376960    |
| jemalloc_metadata            | 1618617464     |
| jemalloc_retained            | 175354376192   |
| Auth_memory                  | 974            |
| query_digest_memory          | 232            |
| mysql_query_rules_memory     | 224315         |
| stack_memory_mysql_threads   | 33554432       |
| stack_memory_admin_threads   | 8388608        |
| stack_memory_cluster_threads | 0              |
+------------------------------+----------------+
13 rows in set (0.00 sec)

If you are submitting a reproducible bug report, please provide: [ ] A clear description of your issue [x] The version of OS and ProxySQL centos7.6

proxysql v2.0.6 v2.0.12 [ ] Every step to reproduce the issue [ ] The FULL error log (not just the last few lines) [ ] If it is a crashing bug, please check here: https://github.com/sysown/proxysql/wiki/How-to-report-a-crash-bug

If the above is not provided, the issue is likely to be closed.

Please use markdown to format code or SQL: https://guides.github.com/features/mastering-markdown/

Thank you!

jwongz commented 4 years ago

jemalloc_active is close to jemalloc_resident, does it mean memory leak?

renecannao commented 4 years ago

Why should it free memory after hours, if it doesn't free it while using it?

Please submit a reproducible test case. Thanks

jwongz commented 4 years ago
package main

import (
    "database/sql"
    "fmt"
    "net"
    "flag"
    "time"

    _ "github.com/go-sql-driver/mysql"
)

var dbIp = flag.String("dbIp", "", "db ip")
var dbPort = flag.Int("port", 3306, "db port")
var user = flag.String("user", "", "db user")
var password = flag.String("password", "", "password")
var dbName = flag.String("dbName", "", "db name")
var workerCount = flag.Int("workerCount", 1000, "worker count")
var workTime = flag.Int("workTime", 5, "workTime: minutes")
var query = flag.String("query", "select now()", "query")
var isStdout = flag.Bool("isStdout", false, "whether output query's result")

func ConnectDB() (*sql.DB , error) {
    connIp := *dbIp
    parseIp := net.ParseIP(connIp)
    if parseIp.To4() == nil {
        connIp = fmt.Sprintf("[%s]", *dbIp)
    }

    address := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s", *user, *password, connIp, *dbPort, *dbName)
    //fmt.Println(address)
    db, err := sql.Open("mysql", address)
    if err != nil {
        return nil, err
    }

    err = db.Ping()
    if err != nil {
        return nil, err
    }

    return db, nil
}

func Query(q string) ([][]string, error) {
    db, err := ConnectDB()
    if err != nil {
        return nil, err
    }

    defer db.Close()
    rows, err := db.Query(q)
    if err != nil {
        return nil, err
    }

    // Get column names
    columns, err := rows.Columns()
    if err != nil {
        return nil, err
    }

    // Make a slice for the values
    values := make([]sql.RawBytes, len(columns))
    scanArgs := make([]interface{}, len(values))
    for i := range values {
        scanArgs[i] = &values[i]
    }

    data := make([][]string, 0)

    // Fetch rows
    for j := 0; rows.Next(); j++ {

        err = rows.Scan(scanArgs...)
        if err != nil {
            return nil, err
        }

        _data := make([]string, len(columns))
        value := ""
        for i, col := range values {
            if col == nil {
                value = ""
            } else {
                value = string(col)
            }
            _data[i] = value
        }
        data = append(data, _data)
    }
    if err = rows.Err(); err != nil {
        return nil, err
    }

    return data, nil
}

func testSelect(q string) {
    for i := 0; i < *workerCount; i++ {
        go func() {
            result, err := Query(q)
            if err != nil {
                fmt.Println(err)
            }

            print(result)
        }()
    }
}

func print(a ...interface{}) {
    if *isStdout {
        fmt.Print(a)
    }
}

func main() {
    flag.Parse()

    // timeout
    timeout := *workTime * 60
    callTicker := time.NewTicker(1 * time.Second)
    timeoutTicker := time.NewTicker(time.Duration(timeout) * time.Second)
    defer func() {
        callTicker.Stop()
        timeoutTicker.Stop()
    }()
    startTime := time.Now().Unix()

loop:
    for {
        select {
        case <-callTicker.C:
            testSelect(*query)

            useTime := time.Now().Unix() - startTime
            if (useTime > int64(timeout)) {
                break loop
            }

        case <-timeoutTicker.C:
            break loop
        }
    }
}

./test_shortcon -dbIp=10.9.121.207 -dbName=sbtest -password=hdausid12das -port=6033 -user=root -workTime=6000 -workerCount=6000 -query="select now()"

renecannao commented 4 years ago

Thank you! The test looks really simple to simulate. We will investigate.

jwongz commented 4 years ago

Thank you for your reply! I write it with go language.

In the beginning ,I think idle Mysql_session will be free after mysql-wait_timeout(default is 8 hours) , but I stop the test and wait for more than 12 hours, the useage of memory doesn't reduce. It's caused by mem leak? Or it's nornal just because jemalloc hold it. But as I know ,if application is idle, jemalloc_active will become very small, and jemalloc_resident does't change because of background_thread:false by default(https://github.com/jemalloc/jemalloc/blob/dev/TUNING.md), and in my case, jemalloc_active doesn't change too. So I doubt it may have some mem leak cause by creating and closing huge short links.

jwongz commented 4 years ago

look at these issues :

In their cases, the jemalloc_active become very small when application idle for a while.

renecannao commented 4 years ago

ProxySQL should never become 100% idle. Anyway, we will try to reproduce this very soon.

renecannao commented 4 years ago

Please share your configuration. Specifically, we need, the output of SELECT * FROM from the following tables:

Or the config file, if you are using one. Thanks

renecannao commented 4 years ago

Also, please share what kernel tuning you applied. This workload creates 6000 new connections per seconds, and without proper tuning you will have hundreds of thousands of sockets in TIME_WAIT in very few seconds.

jwongz commented 4 years ago

Also, please share what kernel tuning you applied. This workload creates 6000 new connections per seconds, and without proper tuning you will have hundreds of thousands of sockets in TIME_WAIT in very few seconds.

Inspired by this issue:https://github.com/sysown/proxysql/issues/2787, I run proxysql v2.0.6 with --idle-threads, memory will decrease after mysql-wait_timeout(I set to 5 minutes). As we all know, some mem leak issues were resolved above v2.0.9,there is still have mem leak in v2.0.6.

I will test in v2.0.12 later. Thanks a lot.

jwongz commented 4 years ago

Maybe the way of using idle-threads in wiki should be better.

renecannao commented 4 years ago

--idle-threads should not be related at all to this issue. Same for mysql-wait_timeout , because it has no effect. Your script is creating 6000 connections per second, it is not using persistent connections.

jwongz commented 4 years ago

Please share your configuration. Specifically, we need, the output of SELECT * FROM from the following tables:

  • mysql_servers
  • mysql_users
  • mysql_query_rules
  • global_variables

Or the config file, if you are using one. Thanks

proxysql.cnf:

#file proxysql.cfg

# This config file is parsed using libconfig , and its grammar is described in:
# http://www.hyperrealm.com/libconfig/libconfig_manual.html#Configuration-File-Grammar
# Grammar is also copied at the end of this file

datadir="/var/lib/proxysql"

admin_variables=
{
    admin_credentials="admin:admin"
#   mysql_ifaces="127.0.0.1:6032;/tmp/proxysql_admin.sock"
#

    mysql_ifaces="0.0.0.0:6032"

#   refresh_interval=2000
#   debug=true
}

mysql_variables=
{
    threads=4
    max_connections=10000
    default_query_delay=0
    default_query_timeout=36000000
    have_compress=true
    poll_timeout=2000

    interfaces="0.0.0.0:6033"

    default_schema="information_schema"
    stacksize=1048576
    server_version="5.5.30"
    connect_timeout_server=3000
# make sure to configure monitor username and password
# https://github.com/sysown/proxysql/wiki/Global-variables#mysql-monitor_username-mysql-monitor_password
    monitor_username="admin"
    monitor_password="c10f1d8e-d28f-404b-a581-39ac4c87105a"
    monitor_history=600000
    monitor_connect_interval=60000
    monitor_ping_interval=10000
    monitor_read_only_interval=1500
    monitor_read_only_timeout=500
    ping_interval_server_msec=120000
    ping_timeout_server=500
    commands_stats=true
    sessions_sort=true
    connect_retries_on_failure=10
    free_connections_pct=100
    multiplexing=false
}

# defines all the MySQL servers
mysql_servers =
(

        {
                address = "10.9.87.37"
                port = 4000
                hostgroup = 0
                max_connections = 10000
        },

        {
                address = "10.9.8.241"
                port = 4000
                hostgroup = 0
                max_connections = 10000
        },

        {
                address = "10.9.36.140"
                port = 4000
                hostgroup = 0
                max_connections = 10000
        }

#   {
#       address = "127.0.0.1" # no default, required . If port is 0 , address is interpred as a Unix Socket Domain
#       port = 3306        # no default, required . If port is 0 , address is interpred as a Unix Socket Domain
#       hostgroup = 0           # no default, required
#       status = "ONLINE"    # default: ONLINE
#       weight = 1          # default: 1
#       compression = 0    # default: 0
#   max_replication_lag = 10  # default 0 . If greater than 0 and replication lag passes such threshold, the server is shunned
#   },
#   {
#       address = "/var/lib/mysql/mysql.sock"
#       port = 0
#       hostgroup = 0
#   },
#   {
#       address="127.0.0.1"
#       port=21891
#       hostgroup=0
#       max_connections=200
#   },
#   { address="127.0.0.2" , port=3306 , hostgroup=0, max_connections=5 },
#   { address="127.0.0.1" , port=21892 , hostgroup=1 },
#   { address="127.0.0.1" , port=21893 , hostgroup=1 }
#   { address="127.0.0.2" , port=3306 , hostgroup=1 },
#   { address="127.0.0.3" , port=3306 , hostgroup=1 },
#   { address="127.0.0.4" , port=3306 , hostgroup=1 },
#   { address="/var/lib/mysql/mysql.sock" , port=0 , hostgroup=1 }
)

# defines all the MySQL users
mysql_users:
(
        {
                username = "ucloudadmin"
                password = "c10f1d8e-d28f-404b-a581-39ac4c87105a"
                default_hostgroup = 0
        },
        {
                username = "root"
                password = "pirloWang"
                default_hostgroup = 0
        }
#   {
#       username = "username" # no default , required
#       password = "password" # default: ''
#       default_hostgroup = 0 # default: 0
#       active = 1          # default: 1
#   },
#   {
#       username = "root"
#       password = ""
#       default_hostgroup = 0
#       max_connections=1000
#       default_schema="test"
#       active = 1
#   },
#   { username = "user1" , password = "password" , default_hostgroup = 0 , active = 0 }
)

#defines MySQL Query Rules
mysql_query_rules:
(
    # create user begin
    {
        rule_id=1
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*CREATE\s+USER"
        error_msg="not allow to create user, please contact the db admin."
        apply=1
    },
    {
        rule_id=2
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO\s+user"
        schemaname="mysql"
        error_msg="not allow to create user, please contact the db admin."
        apply=1
    },
    {
        rule_id=3
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO\s+mysql\s*\.\s*user"
        error_msg="not allow to create user, please contact the db admin."
        apply=1
    },
    # create user end

    # drop user begin
    {
        rule_id=4
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+USER"
        error_msg="not allow to drop user, please contact the db admin."
        apply=1
    },
    {
        rule_id=5
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM\s+user"
        schemaname="mysql"
        error_msg="not allow to drop user, please contact the db admin."
        apply=1
    },
    {
        rule_id=6
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM\s+mysql\s*\.\s*user"
        error_msg="not allow to drop user, please contact the db admin."
        apply=1
    },
    # drop user end

    # update user begin
    {
        rule_id=7
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*SET\s+PASSWORD"
        error_msg="not allow to update user, please contact the db admin."
        apply=1
    },
    {
        rule_id=8
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE\s+user"
        schemaname="mysql"
        error_msg="not allow to update user, please contact the db admin."
        apply=1
    },
    {
        rule_id=9
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE\s+mysql\s*\.\s*user"
        error_msg="not allow to update user, please contact the db admin."
        apply=1
    },
    # update user end

    # common db begin
    {
        rule_id=10
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*GRANT"
        error_msg="not allow to change db cfg, please contact the db admin."
        apply=1
    },
    {
        rule_id=11
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*REVOKE"
        error_msg="not allow to change db cfg, please contact the db admin."
        apply=1
    },
    {
        rule_id=12
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*FLUSH"
        error_msg="not allow to change db cfg, please contact the db admin."
        apply=1
    },
    # common db end

    #limit handling on INFORMATION_SCHEMA, PERFORMANCE_SCHEMA, mysql 
    #create
    {
        rule_id=13
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*CREATE\s+TABLE"
        schemaname="INFORMATION_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=14
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*CREATE\s+TABLE"
        schemaname="PERFORMANCE_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=15
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*CREATE\s+TABLE"
        schemaname="mysql"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },

    #drop
    {
        rule_id=16
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+DATABASE\s+(INFORMATION_SCHEMA|PERFORMANCE_SCHEMA|mysql)"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=17
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+TABLE"
        schemaname="INFORMATION_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=18
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+TABLE"
        schemaname="PERFORMANCE_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=19
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+TABLE"
        schemaname="mysql"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },

    #insert
    {
        rule_id=20
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO\s+(INFORMATION_SCHEMA|PERFORMANCE_SCHEMA|mysql)\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=21
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO"
        schemaname="INFORMATION_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=22
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO"
        schemaname="PERFORMANCE_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=23
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO"
        schemaname="mysql"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },

    #update
    {
        rule_id=24
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE\s+(INFORMATION_SCHEMA|PERFORMANCE_SCHEMA|mysql)\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=25
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE"
        schemaname="INFORMATION_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=26
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE"
        schemaname="PERFORMANCE_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=27
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE"
        schemaname="mysql"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },

    #delete
    {
        rule_id=28
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM\s+(INFORMATION_SCHEMA|PERFORMANCE_SCHEMA|mysql)\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=29
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM"
        schemaname="INFORMATION_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=30
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM"
        schemaname="PERFORMANCE_SCHEMA"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=31
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM"
        schemaname="mysql"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },

    # grafana_ucloud
    {
        rule_id=32
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+DATABASE\s+grafana_ucloud"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=33
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DROP\s+TABLE"
        schemaname="grafana_ucloud"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=34
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO\s+grafana_ucloud\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=35
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*INSERT\s+INTO"
        schemaname="grafana_ucloud"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=36
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM\s+grafana_ucloud\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=37
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*DELETE\s+FROM"
        schemaname="grafana_ucloud"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=38
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE\s+grafana_ucloud\s*\."
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    },
    {
        rule_id=39
        active=1
        match_pattern="^\s*(\/\*(.*?)\*\/)?\s*UPDATE"
        schemaname="grafana_ucloud"
        error_msg="not allow to change system db, please contact the db admin"
        apply=1
    }
)

scheduler=
(
#  {
#   id=1
#   active=0
#   interval_ms=10000
#   filename="/var/lib/proxysql/proxysql_galera_checker.sh"
#   arg1="0"
#   arg2="0"
#   arg3="0"
#   arg4="1"
#   arg5="/var/lib/proxysql/proxysql_galera_checker.log"
#  }
)

mysql_replication_hostgroups=
(
#       {
#               writer_hostgroup=30
#               reader_hostgroup=40
#               comment="test repl 1"
#      },
#      {
#               writer_hostgroup=50
#               reader_hostgroup=60
#               comment="test repl 2"
#       }
)

# http://www.hyperrealm.com/libconfig/libconfig_manual.html#Configuration-File-Grammar
#
# Below is the BNF grammar for configuration files. Comments and include directives are not part of the grammar, so they are not included here. 
#
# configuration = setting-list | empty
#
# setting-list = setting | setting-list setting
#    
# setting = name (":" | "=") value (";" | "," | empty)
#    
# value = scalar-value | array | list | group
#    
# value-list = value | value-list "," value
#    
# scalar-value = boolean | integer | integer64 | hex | hex64 | float
#               | string
#    
# scalar-value-list = scalar-value | scalar-value-list "," scalar-value
#    
# array = "[" (scalar-value-list | empty) "]"
#    
# list = "(" (value-list | empty) ")"
#    
# group = "{" (setting-list | empty) "}"
#    
# empty =
jwongz commented 4 years ago

Also, please share what kernel tuning you applied. This workload creates 6000 new connections per seconds, and without proper tuning you will have hundreds of thousands of sockets in TIME_WAIT in very few seconds.

I use sysctl to change net config to escape TIME_WAIT error.

jwongz commented 4 years ago

--idle-threads should not be related at all to this issue. Same for mysql-wait_timeout , because it has no effect. Your script is creating 6000 connections per second, it is not using persistent connections.

you are right, I test v2.0.6 without --idle-threads and v2.0.12 with --idle-threads at the same time, both of the memory usage are similar

jwongz commented 4 years ago

I build docker use this script.

FROM debian:stretch
MAINTAINER me

ENV VERSION 2.0.12

RUN apt-get update && \
    apt-get install -y wget mysql-client net-tools inotify-tools procps && \
    wget https://github.com/sysown/proxysql/releases/download/v${VERSION}/proxysql_${VERSION}-debian9_amd64.deb -O /opt/proxysql_${VERSION}-debian9_amd64.deb && \
    dpkg -i /opt/proxysql_${VERSION}-debian9_amd64.deb && \
    rm -f /opt/proxysql_${VERSION}-debian9_amd64.deb && \
    rm -rf /var/lib/apt/lists/*

VOLUME /var/lib/proxysql
EXPOSE 6032 6033 6080
jwongz commented 4 years ago

I run docker with docker run --env MALLOC_CONF="xmalloc:true,lg_tcache_max:16,prof:true,prof_prefix:jeprof.out,prof_active:false,prof_leak:true,lg_prof_sample:18,lg_prof_interval:27", but I haven't got any prof file.

I got this message in log ": Invalid conf pair: purge:decay", maybe purge:decay is invalid in jemalloc-5.2

jwongz commented 4 years ago

@renecannao

jwongz commented 4 years ago

29447-1-i1.pdf

I set jemalloc conf prof_active:true,no mem leak output to the log or screen.

jwongz commented 4 years ago

jerf.zip

I run proxysql like this:

`MALLOC_CONF="xmalloc:true,background_thread:true,dirty_decay_ms:0,muzzy_decay_ms:0,metadata_thp:auto,prof_prefix:/root/pirlo/jerf/jeprof.out,prof_active:true,prof_leak:true,lg_prof_interval:30,lg_prof_sample:18,prof_final:true" ./proxysql --idle-threads -c /etc/proxysql.cnf`

MySQL-wait_timeout is 120000.

When proxysql start, mem is 12MB.Then, I send 1000 per/second point select with short connection, the mem is growing to 100.9MB right now.

After 10 mins, the mem is growing to 150MB. Then I stop test-program.

20 mins later, the mem is decrease to 95.9 MB, and never changed.

From the perf-file , only MySQL_Data_Stream was released.

jwongz commented 4 years ago

image

ma_pvio_init seems never release.

MySQL [(none)]> select * from stats.stats_mysql_connection_pool;
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| hostgroup | srv_host    | srv_port | status | ConnUsed | ConnFree | ConnOK | ConnERR | MaxConnUsed | Queries | Queries_GTID_sync | Bytes_data_sent | Bytes_data_recv | Latency_us |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| 0         | 10.9.64.248 | 3306     | ONLINE | 0        | 1366     | 80052  | 251486  | 9281        | 9223773 | 0                 | 110685276       | 221173128       | 422        |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
1 row in set (0.00 sec)
jwongz commented 4 years ago

MySQL_HostGroups_Manager::get_multiple_idle_connections will drop idle connections, but it only be called at the beginning of MySQL_Thread::run()

renecannao commented 4 years ago

@jwongz : what is your goal? To drop memory usage below 95.9MB ?

jwongz commented 4 years ago

@jwongz : what is your goal? To drop memory usage below 95.9MB ?

Actually, my proxysql has oom every 4 or 5 days with large shot connections. I just want to know how to fix it.

jwongz commented 4 years ago

@jwongz : what is your goal? To drop memory usage below 95.9MB ?

Actually, my proxysql has oom every 4 or 5 days with large shot connections. I just want to know how to fix it.

The limits of proxysql memory are 32 GB, but it seems far from enough.

renecannao commented 4 years ago

Did you try to get a memory profile of the instances using 32GB ?

jwongz commented 4 years ago

Did you try to get a memory profile of the instances using 32GB ?

I run testcase yesterday,and the memory is grow up to 700MB, then I stop the testcase. I got a lot of memory profiles. And it show ma_pvio_init take 400MB, and it seems never reduce. May I upload these profiles?

jwongz commented 4 years ago

@jwongz : what is your goal? To drop memory usage below 95.9MB ?

Thanks for your reply, I have another question. May I configure jemalloc like this:

MALLOC_CONF="xmalloc:true,background_thread:true,dirty_decay_ms:0,muzzy_decay_ms:0,metadata_thp:auto

This config make proxysql only use 65% of before. I don't know whether it affect performance.

jwongz commented 4 years ago

jerf2.zip

MySQL [(none)]> select * from stats.stats_memory_metrics;
+------------------------------+----------------+
| Variable_Name                | Variable_Value |
+------------------------------+----------------+
| SQLite3_memory_bytes         | 3462248        |
| jemalloc_resident            | 611991552      |
| jemalloc_active              | 587493376      |
| jemalloc_allocated           | 464500312      |
| jemalloc_mapped              | 686059520      |
| jemalloc_metadata            | 25198888       |
| jemalloc_retained            | 2379452416     |
| Auth_memory                  | 706            |
| query_digest_memory          | 232            |
| mysql_query_rules_memory     | 1380           |
| mysql_firewall_users_table   | 0              |
| mysql_firewall_users_config  | 0              |
| mysql_firewall_rules_table   | 0              |
| mysql_firewall_rules_config  | 329            |
| stack_memory_mysql_threads   | 83886080       |
| stack_memory_admin_threads   | 10485760       |
| stack_memory_cluster_threads | 0              |
+------------------------------+----------------+
17 rows in set (0.00 sec)
./proxysql/deps/jemalloc/jemalloc/bin/jeprof /usr/bin/proxysql /data/jerf/jeprof.out.65428.0.i0.heap 
Using local file /usr/bin/proxysql.
Using local file /data/jerf/jeprof.out.65428.0.i0.heap.
Welcome to jeprof!  For help, type 'help'.
(jeprof) top
Total: 235.6 MB
   196.1  83.2%  83.2%    196.1  83.2% je_prof_backtrace
    33.0  14.0%  97.2%     33.0  14.0% ma_pvio_init
     1.5   0.6%  97.9%    140.1  59.4% mysql_optionsv
     1.3   0.5%  98.4%      1.3   0.5% mysql_init
     1.0   0.4%  98.8%      3.0   1.3% MySrvConnList::get_random_MyConn
     1.0   0.4%  99.3%      1.0   0.4% __gnu_cxx::new_allocator::allocate (inline)
     1.0   0.4%  99.7%      2.0   0.8% MySQL_Connection::MySQL_Connection
     0.5   0.2%  99.9%     41.4  17.6% MySQL_Thread::create_new_session_and_client_data_stream
     0.3   0.1% 100.0%    141.8  60.2% MySQL_Connection::handler
     0.0   0.0% 100.0%      0.3   0.1% ConsumerThread::run
(jeprof)
./proxysql/deps/jemalloc/jemalloc/bin/jeprof /usr/bin/proxysql /data/jerf/jeprof.out.65428.2007.i2007.heap 
Using local file /usr/bin/proxysql.
Using local file /data/jerf/jeprof.out.65428.2007.i2007.heap.
Welcome to jeprof!  For help, type 'help'.
(jeprof) top
Total: 567.2 MB
   418.8  73.8%  73.8%    418.8  73.8% ma_pvio_init
   114.2  20.1%  94.0%    114.2  20.1% je_prof_backtrace
    30.0   5.3%  99.3%     30.0   5.3% mysql_init
     1.3   0.2%  99.5%      2.8   0.5% MySrvConnList::get_random_MyConn
     0.8   0.1%  99.6%      0.8   0.1% __gnu_cxx::new_allocator::allocate (inline)
     0.8   0.1%  99.7%      1.5   0.3% MySQL_Connection::MySQL_Connection
     0.5   0.1%  99.8%     95.2  16.8% mysql_optionsv
     0.5   0.1%  99.9%    125.7  22.2% MySQL_Connection::handler
     0.5   0.1% 100.0%      3.8   0.7% pvio_socket_connect
     0.0   0.0% 100.0%     78.8  13.9% MySQL_Connection::async_connect
./proxysql/deps/jemalloc/jemalloc/bin/jeprof /usr/bin/proxysql --base=/data/jerf/jeprof.out.65428.0.i0.heap /data/jerf/jeprof.out.65428.2007.i2007.heap
Using local file /usr/bin/proxysql.
Using local file /data/jerf/jeprof.out.65428.2007.i2007.heap.
Welcome to jeprof!  For help, type 'help'.
(jeprof) top
Total: 331.6 MB
   385.8 116.3% 116.3%    385.8 116.3% ma_pvio_init
    28.8   8.7% 125.0%     28.8   8.7% mysql_init
     0.5   0.2% 125.2%      3.8   1.1% pvio_socket_connect
     0.3   0.1% 125.2%     -0.2  -0.1% MySrvConnList::get_random_MyConn
     0.3   0.1% 125.3%    -16.1  -4.8% MySQL_Connection::handler
     0.0   0.0% 125.3%     -0.3  -0.1% ConsumerThread::run
     0.0   0.0% 125.3%     78.3  23.6% MySQL_Connection::async_connect
     0.0   0.0% 125.3%      0.3   0.1% MySQL_Connection::async_query
     0.0   0.0% 125.3%     22.9   6.9% MySQL_Connection::connect_start
     0.0   0.0% 125.3%     -0.5  -0.2% MySQL_Connection_userinfo::set
jwongz commented 4 years ago

Did you try to get a memory profile of the instances using 32GB ?

I do not dare to restart proxysql with prof_active:true in production env.

renecannao commented 4 years ago

Error log please? Please, it is specified in new issue template!

I suspect the most interesting piece of information in https://github.com/sysown/proxysql/issues/2817#issuecomment-638142520 is:

MySQL [(none)]> select * from stats.stats_mysql_connection_pool;
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| hostgroup | srv_host    | srv_port | status | ConnUsed | ConnFree | ConnOK | ConnERR | MaxConnUsed | Queries | Queries_GTID_sync | Bytes_data_sent | Bytes_data_recv | Latency_us |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| 0         | 10.9.64.248 | 3306     | ONLINE | 0        | 1366     | 80052  | 251486  | 9281        | 9223773 | 0                 | 110685276       | 221173128       | 422        |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
1 row in set (0.00 sec)

Specifically, ConnERR=251486. 76% of your connections are failing!

The memory leak could be absolutely unrelated to short lived connections (your initial hypothesis) but to failed connections, that is clearly an issue.

jwongz commented 4 years ago

Error log please? Please, it is specified in new issue template!

I suspect the most interesting piece of information in #2817 (comment) is:

MySQL [(none)]> select * from stats.stats_mysql_connection_pool;
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| hostgroup | srv_host    | srv_port | status | ConnUsed | ConnFree | ConnOK | ConnERR | MaxConnUsed | Queries | Queries_GTID_sync | Bytes_data_sent | Bytes_data_recv | Latency_us |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
| 0         | 10.9.64.248 | 3306     | ONLINE | 0        | 1366     | 80052  | 251486  | 9281        | 9223773 | 0                 | 110685276       | 221173128       | 422        |
+-----------+-------------+----------+--------+----------+----------+--------+---------+-------------+---------+-------------------+-----------------+-----------------+------------+
1 row in set (0.00 sec)

Specifically, ConnERR=251486. 76% of your connections are failing!

The memory leak could be absolutely unrelated to short lived connections (your initial hypothesis) but to failed connections, that is clearly an issue.

This is error log: proxysql-log.zip

But I want to say that my production env have no ConnERR .

renecannao commented 4 years ago

Thank you for the log, I will check it.

But I want to say that my production env have no ConnERR .

So far I don't find any correlation between what you have been testing with what you are experiencing in production environment, because you don't know if they are the same memory leak, if any. Can you also copy the output of SELECT * FROM stats_mysql_global from prod? Thanks

jwongz commented 4 years ago

Thank you for the log, I will check it.

But I want to say that my production env have no ConnERR .

So far I don't find any correlation between what you have been testing with what you are experiencing in production environment, because you don't know if they are the same memory leak, if any. Can you also copy the output of SELECT * FROM stats_mysql_global from prod? Thanks

Thanks for your reply,

output is :


MySQL [(none)]> SELECT * FROM stats_mysql_global;
+---------------------------------------------+----------------+
| Variable_Name                               | Variable_Value |
+---------------------------------------------+----------------+
| ProxySQL_Uptime                             | 2672840        |
| Active_Transactions                         | 0              |
| Client_Connections_aborted                  | 291            |
| Client_Connections_connected                | 387            |
| Client_Connections_created                  | 2838080890     |
| Server_Connections_aborted                  | 128992         |
| Server_Connections_connected                | 660            |
| Server_Connections_created                  | 36940978       |
| Server_Connections_delayed                  | 0              |
| Client_Connections_non_idle                 | 387            |
| Queries_backends_bytes_recv                 | 4930066979802  |
| Queries_backends_bytes_sent                 | 1637180821565  |
| Queries_frontends_bytes_recv                | 1998832324232  |
| Queries_frontends_bytes_sent                | 8593345507119  |
| Query_Processor_time_nsec                   | 0              |
| Backend_query_time_nsec                     | 0              |
| mysql_backend_buffers_bytes                 | 26104960       |
| mysql_frontend_buffers_bytes                | 25362432       |
| mysql_session_internal_bytes                | 1224312        |
| Com_autocommit                              | 466377         |
| Com_autocommit_filtered                     | 466332         |
| Com_commit                                  | 7533871        |
| Com_commit_filtered                         | 4              |
| Com_rollback                                | 798            |
| Com_rollback_filtered                       | 798            |
| Com_backend_change_user                     | 15             |
| Com_backend_init_db                         | 42             |
| Com_backend_set_names                       | 2782407805     |
| Com_frontend_init_db                        | 2849402034     |
| Com_frontend_set_names                      | 2850650794     |
| Com_frontend_use_db                         | 34             |
| Com_backend_stmt_prepare                    | 12             |
| Com_backend_stmt_execute                    | 51             |
| Com_backend_stmt_close                      | 0              |
| Com_frontend_stmt_prepare                   | 51             |
| Com_frontend_stmt_execute                   | 51             |
| Com_frontend_stmt_close                     | 51             |
| Mirror_concurrency                          | 0              |
| Mirror_queue_length                         | 0              |
| Questions                                   | 6064058612     |
| Selects_for_update__autocommit0             | 0              |
| Slow_queries                                | 10075214       |
| GTID_consistent_queries                     | 0              |
| GTID_session_collected                      | 0              |
| Servers_table_version                       | 17             |
| MySQL_Thread_Workers                        | 4              |
| Access_Denied_Wrong_Password                | 291            |
| Access_Denied_Max_Connections               | 0              |
| Access_Denied_Max_User_Connections          | 0              |
| MySQL_Monitor_Workers                       | 8              |
| MySQL_Monitor_Workers_Aux                   | 0              |
| MySQL_Monitor_Workers_Started               | 8              |
| MySQL_Monitor_connect_check_OK              | 185546         |
| MySQL_Monitor_connect_check_ERR             | 27             |
| MySQL_Monitor_ping_check_OK                 | 1110450        |
| MySQL_Monitor_ping_check_ERR                | 4650           |
| MySQL_Monitor_read_only_check_OK            | 0              |
| MySQL_Monitor_read_only_check_ERR           | 0              |
| MySQL_Monitor_replication_lag_check_OK      | 0              |
| MySQL_Monitor_replication_lag_check_ERR     | 0              |
| ConnPool_get_conn_latency_awareness         | 0              |
| ConnPool_get_conn_immediate                 | 3              |
| ConnPool_get_conn_success                   | 2838689078     |
| ConnPool_get_conn_failure                   | 161700871      |
| generated_error_packets                     | 10677          |
| max_connect_timeouts                        | 10386          |
| backend_lagging_during_query                | 0              |
| backend_offline_during_query                | 3020046        |
| queries_with_max_lag_ms                     | 0              |
| queries_with_max_lag_ms__delayed            | 0              |
| queries_with_max_lag_ms__total_wait_time_us | 0              |
| mysql_unexpected_frontend_com_quit          | 0              |
| Client_Connections_hostgroup_locked         | 128078         |
| hostgroup_locked_set_cmds                   | 128078         |
| hostgroup_locked_queries                    | 0              |
| mysql_unexpected_frontend_packets           | 0              |
| aws_aurora_replicas_skipped_during_query    | 0              |
| mysql_killed_backend_connections            | 0              |
| mysql_killed_backend_queries                | 3482           |
| MyHGM_myconnpoll_get                        | 3000389947     |
| MyHGM_myconnpoll_get_ok                     | 2838689076     |
| MyHGM_myconnpoll_push                       | 2802978440     |
| MyHGM_myconnpoll_destroy                    | 36928438       |
| MyHGM_myconnpoll_reset                      | 2802431382     |
| SQLite3_memory_bytes                        | 5065840        |
| ConnPool_memory_bytes                       | 21097488       |
| Stmt_Client_Active_Total                    | 0              |
| Stmt_Client_Active_Unique                   | 0              |
| Stmt_Server_Active_Total                    | 0              |
| Stmt_Server_Active_Unique                   | 0              |
| Stmt_Max_Stmt_id                            | 2              |
| Stmt_Cached                                 | 1              |
| Query_Cache_Memory_bytes                    | 0              |
| Query_Cache_count_GET                       | 0              |
| Query_Cache_count_GET_OK                    | 0              |
| Query_Cache_count_SET                       | 0              |
| Query_Cache_bytes_IN                        | 0              |
| Query_Cache_bytes_OUT                       | 0              |
| Query_Cache_Purged                          | 0              |
| Query_Cache_Entries                         | 0              |
+---------------------------------------------+----------------+
100 rows in set (0.01 sec)
jwongz commented 4 years ago

Thank you for the log, I will check it.

But I want to say that my production env have no ConnERR .

So far I don't find any correlation between what you have been testing with what you are experiencing in production environment, because you don't know if they are the same memory leak, if any. Can you also copy the output of SELECT * FROM stats_mysql_global from prod? Thanks

I check the connection pool on my production env just now. It has some connErrs:

MySQL [(none)]> select * from stats.stats_mysql_connection_pool;
+-----------+---------------+----------+--------+----------+----------+----------+---------+-------------+-----------+-------------------+-----------------+-----------------+------------+
| hostgroup | srv_host      | srv_port | status | ConnUsed | ConnFree | ConnOK   | ConnERR | MaxConnUsed | Queries   | Queries_GTID_sync | Bytes_data_sent | Bytes_data_recv | Latency_us |
+-----------+---------------+----------+--------+----------+----------+----------+---------+-------------+-----------+-------------------+-----------------+-----------------+------------+
| 0         | server1 | 10020    | ONLINE | 74       | 37       | 3477328  | 36278   | 7805        | 585451224 | 0                 | 364770054281    | 1156018994656   | 1096       |
| 0         | server2 | 10020    | ONLINE | 62       | 46       | 3548183  | 28925   | 7493        | 580688794 | 0                 | 362683038952    | 1150135097130   | 751        |
| 0         | server3  | 10032    | ONLINE | 55       | 53       | 10642552 | 41512   | 6181        | 884729442 | 0                 | 441827405952    | 1285899977301   | 765        |
| 0         | server4  | 10096    | ONLINE | 73       | 39       | 2861487  | 7377    | 8023        | 188672628 | 0                 | 102588353418    | 328994174899    | 1505       |
| 0         | server5  | 10012    | ONLINE | 61       | 30       | 629982   | 20      | 7770        | 193617882 | 0                 | 106442385001    | 378560765156    | 2431       |
| 0         | server6  | 10014    | ONLINE | 52       | 55       | 547000   | 20      | 6553        | 193969357 | 0                 | 105076552000    | 342521437539    | 202        |
+-----------+---------------+----------+--------+----------+----------+----------+---------+-------------+-----------+-------------------+-----------------+-----------------+------------+
renecannao commented 4 years ago

I was looking for Stmt_Client_Active_ metrics: they are 0 , so we can exclude memory being used by prepared statement metadata cache (memory used there is not recorded in stats_memory_metrics).

Connection pool seems very efficient if we only look at this ratio: Questions/MyHGM_myconnpoll_get = 2.02 . Things becomes more interesting if looking at the high percentage of Com_frontend_init_db and Com_frontend_set_names compared to Questions.

Can you please attach the output of SELECT Command, Total_cnt FROM stats_mysql_commands_counters WHERE Total_cnt > 0; ? I will need more information after this. The output of the above won't show where the problem is, but I want to better understand your traffic to make some hypothesis, because your traffic is generating some weird behavior.

I also noticed a relatively high value for MyHGM_myconnpoll_destroy and a extreme high value for MyHGM_myconnpoll_reset:

This is absolutely not good,because connection pool isn't efficiently reusing connections.

Do you have similar patterns in your testing environments ?

jwongz commented 4 years ago

SELECT Command, Total_cnt FROM stats_mysql_commands_counters WHERE Total_cnt > 0; ?

the production env :

MySQL [(none)]> SELECT Command, Total_cnt FROM stats_mysql_commands_counters WHERE Total_cnt > 0;
+-------------------+------------+
| Command           | Total_cnt  |
+-------------------+------------+
| ALTER_TABLE       | 26         |
| COMMIT            | 7533888    |
| CREATE_DATABASE   | 2          |
| CREATE_TABLE      | 2          |
| DELETE            | 5547790    |
| INSERT            | 3507367    |
| KILL              | 9087       |
| REPLACE           | 1932168    |
| ROLLBACK          | 815        |
| SELECT            | 3169436766 |
| SET               | 2853626540 |
| SHOW_TABLE_STATUS | 166        |
| START_TRANSACTION | 7533886    |
| UPDATE            | 18788134   |
| SHOW              | 246471     |
| UNKNOWN           | 120        |
+-------------------+------------+
16 rows in set (0.01 sec)
jwongz commented 4 years ago

I was looking for Stmt_Client_Active_ metrics: they are 0 , so we can exclude memory being used by prepared statement metadata cache (memory used there is not recorded in stats_memory_metrics).

Connection pool seems very efficient if we only look at this ratio: Questions/MyHGM_myconnpoll_get = 2.02 . Things becomes more interesting if looking at the high percentage of Com_frontend_init_db and Com_frontend_set_names compared to Questions.

Can you please attach the output of SELECT Command, Total_cnt FROM stats_mysql_commands_counters WHERE Total_cnt > 0; ? I will need more information after this. The output of the above won't show where the problem is, but I want to better understand your traffic to make some hypothesis, because your traffic is generating some weird behavior.

I also noticed a relatively high value for MyHGM_myconnpoll_destroy and a extreme high value for MyHGM_myconnpoll_reset:

  • 1.3% of the connections are being destroyed
  • 99.98% of the connections returning to the connection pool are being reset.

This is absolutely not good,because connection pool isn't efficiently reusing connections.

Do you have similar patterns in your testing environments ?

Thanks for your reply,

It's hard to simulate the query like production env. My testing environments output is :

MySQL [(none)]> SELECT Command, Total_cnt FROM stats_mysql_commands_counters WHERE Total_cnt > 0;
+---------+-----------+
| Command | Total_cnt |
+---------+-----------+
| SELECT  | 9389490   |
+---------+-----------+
1 row in set (0.00 sec)
jwongz commented 4 years ago

So how to make connections be destroyed as quikly as it can?

renecannao commented 4 years ago

Connections should not be destroyed. If connections are destroyed, it means connection pool is not efficient.

Can you please copy the output of this query?

SELECT username,schemaname,count_star,digest_text FROM stats_mysql_query_digest WHERE digest_text LIKE 'SET%' GROUP BY username,schemaname,digest_text;

I want to try to understand why connections are being reset. Also, please copy the error log. Thanks

jwongz commented 4 years ago

SELECT username,schemaname,count_star,digest_text FROM stats_mysql_query_digest WHERE digest_text LIKE 'SET%' GROUP BY username,schemaname,digest_text;

output is :

MySQL [(none)]> SELECT username,schemaname,count_star,digest_text FROM stats_mysql_query_digest WHERE digest_text LIKE 'SET%' GROUP BY username,schemaname,digest_text;
+----------+--------------------+------------+---------------------------------------------------------+
| username | schemaname         | count_star | digest_text                                             |
+----------+--------------------+------------+---------------------------------------------------------+
| xsky    | xsky              | 296984     | SET AUTOCOMMIT = ?                                      |
| xsky    | xsky              | 2480511    | SET NAMES ?                                             |
| xsky    | xsky              | 4071       | SET NAMES UTF8                                          |
| xsky    | xsky              | 81653      | SET NAMES gbk                                           |
| xsky    | xsky              | 30107061   | SET NAMES utf8                                          |
| xsky    | xsky              | 2821703798 | SET NAMES utf8mb4                                       |
| xsky    | xsky              | 66         | SET PROFILING = ?                                       |
| xsky    | xsky              | 166667     | SET autocommit=?                                        |
| xsky    | xsky              | 127827     | SET character_set_results = NULL                        |
| xsky    | xsky              | 1596       | SET net_write_timeout=?                                 |
| xsky    | xsky              | 127824     | SET sql_mode=?                                          |
| xsky    | xsky              | 151665     | SET time_zone = ?                                       |
| xsky    | xsky              | 3075       | set autocommit=?                                        |
| xsky    | information_schema | 13372      | SET NAMES ?                                             |
| xsky    | information_schema | 7594       | SET NAMES UTF8MB4                                       |
| xsky    | information_schema | 3246       | SET NAMES utf8                                          |
| xsky    | information_schema | 542        | SET NAMES utf8mb4                                       |
| root     | xsky              | 2          | SET NAMES binary                                        |
| root     | xsky              | 27         | SET NAMES utf8mb4                                       |
| root     | xsky              | 3          | SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ |
| root     | xsky              | 2          | SET SESSION net_write_timeout = ?                       |
| root     | xsky              | 2          | SET SESSION wait_timeout = ?                            |
| root     | xsky              | 75         | SET SQL_SELECT_LIMIT=?                                  |
| root     | xsky              | 72         | SET SQL_SELECT_LIMIT=DEFAULT                            |
| root     | xsky              | 68         | SET autocommit=?                                        |
| root     | xsky              | 116        | SET net_write_timeout=?                                 |
| root     | xsky              | 24         | SET sql_mode=?                                          |
| root     | xsky              | 14         | set character_set_results=utf8                          |
| root     | information_schema | 12         | SET NAMES binary                                        |
| root     | information_schema | 82         | SET NAMES utf8mb4                                       |
| root     | information_schema | 12         | SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ |
| root     | information_schema | 12         | SET SESSION wait_timeout = ?                            |
| root     | information_schema | 3          | SET autocommit=?                                        |
| root     | information_schema | 8          | SET net_write_timeout=?                                 |
| root     | information_schema | 2          | SET sql_mode=?                                          |
+----------+--------------------+------------+---------------------------------------------------------+
35 rows in set (0.11 sec)

I'll copy error log later,thanks.

jwongz commented 4 years ago

Connections should not be destroyed. If connections are destroyed, it means connection pool is not efficient.

Can you please copy the output of this query?

SELECT username,schemaname,count_star,digest_text FROM stats_mysql_query_digest WHERE digest_text LIKE 'SET%' GROUP BY username,schemaname,digest_text;

I want to try to understand why connections are being reset. Also, please copy the error log. Thanks

Why conn-pool has so many connections? 2.8 billions? Maybe we should keep it in a suitable value. The default value of mysql-free_connections_pct is 10, I think proxysql only keep 10% of max_connections.So why connections only be detroyed 1.3%?

jwongz commented 4 years ago

my prod env log :

{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.514964478Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515048626Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515052949Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515134186Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515138621Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515223693Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515226958Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515317194Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515321956Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515408788Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515414238Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:12.515500271Z"}
{"log":"2020-05-23 18:15:12 MySQL_Session.cpp:3661:handler(): [WARNING] Retrying query.\n","stream":"stderr","time":"2020-05-23T10:15:12.515504222Z"}
{"log":"2020-05-23 18:15:13 MySQL_Session.cpp:3652:handler(): [ERROR] Detected an offline server during query: 10.10.108.60, 10032\n","stream":"stderr","time":"2020-05-23T10:15:13.535844486Z"}

 {"log":"2020-05-29 10:45:46 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:46.430381648Z"}
{"log":"2020-05-29 10:45:46 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:46.559819469Z"}
{"log":"2020-05-29 10:45:46 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:46.614087311Z"}
{"log":"2020-05-29 10:45:46 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:46.689455427Z"}
{"log":"2020-05-29 10:45:52 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:52.715102731Z"}
{"log":"2020-05-29 10:45:53 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:53.903389853Z"}
{"log":"2020-05-29 10:45:54 MySQL_Session.cpp:5094:handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_COM_QUERY_qpo(): [WARNING] Unable to parse unknown SET query. Setting lock_hostgroup. Please report a bug for future enhancements:SET character_set_results = NULL\n","stream":"stderr","time":"2020-05-29T02:45:54.066059947Z"}

{"log":"2020-05-29 11:20:02 MySQL_Session.cpp:3739:handler(): [WARNING] Error during query on 

{"log":"2020-05-29 22:16:08 MySQL_Session.cpp:2543:handler_again___status_CHANGING_CHARSET(): [ERROR] Detected a broken connection during SET NAMES on 10.10.108.60 , 10032 : 2019, Can't initialize character set (null) (path: compiled_in)\n","stream":"stderr","time":"2020-05-29T14:16:08.564180913Z"}