postgresml / pgcat

PostgreSQL pooler with sharding, load balancing and failover support.
MIT License
2.7k stars 160 forks source link

PGCat not using idle connections, instead spawns new ones. #720

Open jardayn opened 3 months ago

jardayn commented 3 months ago

Running PGCat on a EC2 server on the same network as RDS. I have min_pool_size set to 100. PGCat is installed via Ubuntu's APT. Version: 1.1.2-dev4

WIth no other clients connected, when I check how many connections exist: PG shows 100 connections (that's correct) Query: SELECT client_addr, state, COUNT(1) FROM pg_stat_activity GROUP BY 1,2 ORDER BY 3 DESC;

But when I run pgbench with 20 connections with this command:

pgbench -c 20 -f one.sql -j 20 -n -t 10000 -h localhost -U username -p 6432 test

The server shows 120 connections. ~115-119 of which are idle during the benchmark

Why isn't it using the existing connections? They're idle. Nothing else is using the server.

///

Another bug I noticed, is that if I run pgbench with 200 connections - pgcat will open >500 connections that'll sit idle for the duration of the benchmark

pgbench -c 200 -f one.sql -j 20 -n -t 10000 -h localhost -U username -p 6432 test

one.sql is just SELECT 1;

Config:


#
# PgCat config example.
#

#
# General pooler settings
[general]
# What IP to run on, 0.0.0.0 means accessible from everywhere.
host = "0.0.0.0"

# Port to run on, same as PgBouncer used in this example.
port = 6432

# Whether to enable prometheus exporter or not.
enable_prometheus_exporter = false

# Port at which prometheus exporter listens on.
prometheus_exporter_port = 9930

# How long to wait before aborting a server connection (ms).
connect_timeout = 20000 # milliseconds

# How long an idle connection with a server is left open (ms).
idle_timeout = 30000 # milliseconds

# Max connection lifetime before it's closed, even if actively used.
server_lifetime = 86400000 # 24 hours

# How long a client is allowed to be idle while in a transaction (ms).
idle_client_in_transaction_timeout = 0 # milliseconds

# How much time to give the health check query to return with a result (ms).
healthcheck_timeout = 1000 # milliseconds

# How long to keep connection available for immediate re-use, without running a healthcheck query on it
healthcheck_delay = 30000 # milliseconds

# How much time to give clients during shutdown before forcibly killing client connections (ms).
shutdown_timeout = 60000 # milliseconds

# How long to ban a server if it fails a health check (seconds).
ban_time = 60 # seconds

# If we should log client connections
log_client_connections = false

# If we should log client disconnections
log_client_disconnections = false

# When set to true, PgCat reloads configs if it detects a change in the config file.
autoreload = 15000

# Number of worker threads the Runtime will use (4 by default).
worker_threads = 5

# Number of seconds of connection idleness to wait before sending a keepalive packet to the server.
tcp_keepalives_idle = 5
# Number of unacknowledged keepalive packets allowed before giving up and closing the connection.
tcp_keepalives_count = 5
# Number of seconds between keepalive packets.
tcp_keepalives_interval = 5

# Path to TLS Certificate file to use for TLS connections
# tls_certificate = ".circleci/server.cert"
# Path to TLS private key file to use for TLS connections
# tls_private_key = ".circleci/server.key"

# Enable/disable server TLS
server_tls = false

# Verify server certificate is completely authentic.
verify_server_certificate = false

# User name to access the virtual administrative database (pgbouncer or pgcat)
# Connecting to that database allows running commands like `SHOW POOLS`, `SHOW DATABASES`, etc..
admin_username = "admin"
# Password to access the virtual administrative database
admin_password = "iAmPasswordYo"

# Intercept user queries and give a fake reply.
[plugins.intercept]
enabled = true

[plugins.intercept.queries.0]

query = "select current_database() as a, current_schemas(false) as b"
schema = [
  ["a", "text"],
  ["b", "text"],
]
result = [
  ["${DATABASE}", "{public}"],
]

[plugins.intercept.queries.1]

query = "select current_database(), current_schema(), current_user"
schema = [
  ["current_database", "text"],
  ["current_schema", "text"],
  ["current_user", "text"],
]
result = [
  ["${DATABASE}", "public", "${USER}"],
]

[pools.testserv]
pool_mode = "session"

[pools.testserv.users.0]
pool_size = 500
min_pool_size = 100
username = "username"
password = "password"

[pools.testserv.shards.0]
database = "test"
servers = [
    ["rds_server.us-east-1.rds.amazonaws.com", 5432, "primary"],
]
jardayn commented 3 months ago

Same issue on V1.1.1

BUT, if I run PGBench with -C (each new tx is a new connection) - PGCat just does... nothing

balheru commented 3 months ago

Intercept user queries and give a fake reply.

[plugins.intercept] enabled = true

jardayn commented 2 months ago

Intercept user queries and give a fake reply.

[plugins.intercept] enabled = true

Nah, it's not it. It's something to do with TCP connections I assum,e.