Closed hellower closed 4 years ago
I can't reproduce this locally.
What does
lsof -i -P -n | grep LISTEN
and
ps aux | grep pgagroal
say ?
Can you connect with psql ?
What is the output from
pgagroal-cli -c pgagroal.conf details
?
########################################################\ before postgresql restart
postgersql 9081 port pgagroal 9091 port debian 10 ########################################################\ [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9081 postgres 31210 postgres 3u IPv4 1522063856 0t0 TCP :9081 (LISTEN) postgres 31210 postgres 4u IPv6 1522063857 0t0 TCP :9081 (LISTEN) [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9091 pgagroal 11429 postgres 5u IPv4 1522104712 0t0 TCP 127.0.0.1:9091 (LISTEN) pgagroal 11527 postgres 4u IPv6 1522104710 0t0 TCP [::1]:9091 (LISTEN) pgagroal 11611 postgres 4u IPv6 1522104710 0t0 TCP [::1]:9091 (LISTEN) pgagroal 11838 postgres 4u IPv6 1522104710 0t0 TCP [::1]:9091 (LISTEN) pgagroal 11891 postgres 4u IPv6 1522104710 0t0 TCP [::1]:9091 (LISTEN) [root@ssdnodes00:/root] STAGE#
########################################################\ # pg_ctl restart -w -c ########################################################\
########################################################\ # after postgresql restart ########################################################\
[root@ssdnodes00:/root] STAGE# ps aux | grep pgagroal root 1294 0.0 0.0 6076 884 pts/5 R+ 23:30 0:00 grep pgagroal postgres 11429 0.0 0.0 10672 7012 pts/4 S+ 23:27 0:00 pgagroal -c pgagroal.conf -a pgagroal_hba.conf -l pgagroal_databases.conf -u pgagroal_users.conf
[root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9081 postgres 27890 postgres 3u IPv4 1522135264 0t0 TCP :9081 (LISTEN) postgres 27890 postgres 4u IPv6 1522135265 0t0 TCP :9081 (LISTEN) [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9091 pgagroal 11429 postgres 5u IPv4 1522104712 0t0 TCP 127.0.0.1:9091 (LISTEN) [root@ssdnodes00:/root]
......................... 5 minutes later ......................................................... [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9081 postgres 31788 postgres 3u IPv4 1522147407 0t0 TCP :9081 (LISTEN) postgres 31788 postgres 4u IPv6 1522147408 0t0 TCP :9081 (LISTEN) [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9091 [root@ssdnodes00:/root] STAGE# lsof -i -P -n | grep LISTEN|grep 9091 [root@ssdnodes00:/root] STAGE# ps aux | grep pgagroal postgres 11429 0.0 0.0 10672 7012 pts/4 S+ 23:27 0:00 pgagroal -c pgagroal.conf -a pgagroal_hba.conf -l pgagroal_databases.conf -u pgagroal_users.conf root 20853 0.0 0.0 6208 884 pts/5 S+ 23:31 0:00 grep pgagroal [root@ssdnodes00:/root] STAGE#
$ psql -U postgres -p 9091 psql: error: could not connect to server: could not connect to server: Connection refused Is the server running locally and accepting connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.9091"?
###################################################### \ pgagroal-cli -c pgagroal.conf details ######################################################
Connection 0: Not initialized
Connection 1: Not initialized
Connection 2: Not initialized
Connection 3: Not initialized
Connection 4: Not initialized
Connection 5: Not initialized
Connection 6: Not initialized
Connection 7: Not initialized
Connection 8: Not initialized
Connection 9: Not initialized
...............
Connection 993: Not initialized
Connection 994: Not initialized
Connection 995: Not initialized
Connection 996: Not initialized
Connection 997: Not initialized
Connection 998: Not initialized
Connection 999: Not initialized
[postgres@ssdnodes00:/usr/local/etc/pgagroal]
######################################################
It may be an os problem, I will test with same test case at centos 7.
So, the process is alive, but the port is gone... Unix Domain Socket still works.
Could you try with
idle_timeout = 0
?
Or maybe a newer version of libev ?
Something that looks related was fixed in libev 4.27...
##################### reproduction case ########################
# centos 8 + lastest pgagroal source
##########################################################
[postgres@ssdnodes00 bin]$ cat /etc/os-release
NAME="CentOS Linux" <------------------ !!!!
VERSION="8 (Core)"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="8"
PLATFORM_ID="platform:el8"
PRETTY_NAME="CentOS Linux 8 (Core)"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:centos:centos:8"
HOME_URL="https://www.centos.org/"
BUG_REPORT_URL="https://bugs.centos.org/"
CENTOS_MANTISBT_PROJECT="CentOS-8" CENTOS_MANTISBT_PROJECT_VERSION="8" REDHAT_SUPPORT_PRODUCT="centos" REDHAT_SUPPORT_PRODUCT_VERSION="8"
[postgres@ssdnodes00 bin]$ cat pgagroal.conf [pgagroal] host = localhost port = 9091
log_type = console log_level = info log_path =
max_connections = 1000 idle_timeout = 0 <---------------------------- validation = foreground unix_socket_dir = /tmp/.s.pgagroal libev=select log_connections = true log_disconnections = true
[primary] host = localhost port = 9081 [postgres@ssdnodes00 bin]$ ./pg_start.sh 04-02 15:28:05.892 616 616 I pgagroal.main pgagroal: started on localhost:9091 04-02 15:28:06.167 618 618 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 15:28:06.176 619 619 I pgagroal.worker connect: address=127.0.0.1 04-02 15:28:06.177 619 619 I pgagroal.worker disconnect: address=127.0.0.1 04-02 15:28:06.262 620 620 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 15:28:06.276 621 621 I pgagroal.worker connect: address=127.0.0.1 04-02 15:28:06.276 621 621 I pgagroal.worker disconnect: address=127.0.0.1 04-02 15:28:06.320 622 622 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 15:28:06.328 623 623 I pgagroal.worker connect: address=127.0.0.1 04-02 15:28:06.328 623 623 I pgagroal.worker disconnect: address=127.0.0.1
################################################ # FIRST pg_ctl restart -w -c ==> pgagroal works good ######################################################
################################################ # SECOND pg_ctl restart -w -c ==> pgagroal not working ######################################################
$ ps -ef|grep pgagroal -------- daemon alived postgres 24102 24101 0 00:28 pts/1 00:00:00 pgagroal -c pgagroal.conf -a pgagroal_hba.conf -l pgagroal_databases.conf -u pgagroal_users.conf postgres 28369 25666 0 00:32 pts/5 00:00:00 grep pgagroal
_$ nc -vz localhost 9091 localhost [127.0.0.1] 9091 (?) : Connection refused
########################################### \ libev version ############################################ [root@ssdnodes00 tmp]# cat /etc/os-release NAME="CentOS Linux" VERSION="8 (Core)" ID="centos" ...........................
[root@ssdnodes00 tmp]# rpm -qa|grep libev libev-4.24-6.el8.x86_64 libev-devel-4.24-6.el8.x86_64 [root@ssdnodes00 tmp]#
Could you try with libev = epoll
?
################################################# # libev version up # centos 8 ################################################### libev-devel-4.24-6.el8.x86_64 [root@ssdnodes00 /]# rpm -e libev-devel libev [root@ssdnodes00 /]#
wget http://dist.schmorp.de/libev/libev-4.33.tar.gz ./configure make make install
[root@ssdnodes00 libev-4.33]# ls -l /usr/local/lib total 892 -rw-r--r-- 1 root root 629846 Apr 2 16:16 libev.a -rwxr-xr-x 1 root root 921 Apr 2 16:16 libev.la lrwxrwxrwx 1 root root 14 Apr 2 16:16 libev.so -> libev.so.4.0.0 lrwxrwxrwx 1 root root 14 Apr 2 16:16 libev.so.4 -> libev.so.4.0.0 -rwxr-xr-x 1 root root 274848 Apr 2 16:16 libev.so.4.0.0 [root@ssdnodes00 libev-4.33]#
pgagroal compile
...... [root@ssdnodes00 build]# cmake -DCMAKE_INSTALL_PREFIX=/usr/local .. -- The C compiler identification is GNU 8.3.1 -- Check for working C compiler: /usr/bin/cc -- Check for working C compiler: /usr/bin/cc -- works -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Detecting C compile features -- Detecting C compile features - done -- pgagroal 0.6.0 -- Build type is Release -- Performing Test COMPILER_SUPPORTS_C17 -- Performing Test COMPILER_SUPPORTS_C17 - Success -- Found Libev: /usr/local/lib/libev.so (found suitable version "4.33", minimum required is "4.11") <=========== libev 4.33 -- libev found -- Found OpenSSL: /usr/lib64/libcrypto.so (found version "1.1.1c") -- OpenSSL found -- Found Rst2Man: /usr/bin/rst2man (found version "0.14,") -- rst2man found -- Configuring done -- Generating done -- Build files have been written to: /usr/local/src/pgagroal/build [root@ssdnodes00 build]# .make ..
################################################# # libev=select ################################################### first pg restart --> pgagroal works good second pg restart --> pgagroal not working
################################################# # libev=epoll ################################################### first pg restart --> pgagroal workgs good second pg restart --> pgagroal good !!! ..... fifth pg restart ---> pgagroal good !!!! ... 1 minutes later
pg_restart . ---> pgagroal not working <------------ error !!
..... STAGE$ psql -U tarantula -d tarantula -h localhost -p 9091 psql: error: could not connect to server: could not connect to server: Connection refused Is the server running on host "localhost" (::1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (::1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (127.0.0.1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (127.0.0.1) and accepting TCP/IP connections on port 9091?
pgagroal log...... ....................... 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.663 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor 04-02 16:23:48.664 5688 5688 W pgagroal.main accept: 5 Bad file descriptor ................................................
I have changed the code to try a restart of the I/O channels when there is a fatal error from accept(). Of course the clients currently in the queue will fail, but future should be successful.
Could you give either master
or 0.5.x
a try ?
################################################### # centos 8 + libev 4.33 + libev=poll + lastet pgagroal source ###################################################
after serveral postgresql restart same error!
[postgres@ssdnodes00 bin]$ ./pg_start.sh 04-03 13:53:17.734 339 339 I pgagroal.main pgagroal: started on localhost:9091 04-03 13:53:17.736 341 341 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:17.736 341 341 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:18.241 342 342 I pgagroal.worker connect: address=:: 04-03 13:53:18.241 342 342 I pgagroal.worker disconnect: address=:: 04-03 13:53:18.474 343 343 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:18.491 344 344 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:18.546 345 345 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:21.586 346 346 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:33.266 355 355 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:33.266 355 355 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:47.956 356 356 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:48.254 357 357 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:48.254 357 357 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:48.978 356 356 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:49.823 358 358 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:51.336 358 358 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:52.130 359 359 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:52.916 359 359 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:53:53.054 347 347 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:53.054 347 347 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:57.081 348 348 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:57.081 348 348 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:58.029 349 349 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:58.029 349 349 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:53:59.028 350 350 I pgagroal.worker connect: address=127.0.0.1 04-03 13:53:59.028 350 350 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:00.004 351 351 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:00.005 351 351 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:01.063 352 352 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:01.063 352 352 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:02.082 353 353 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:02.082 353 353 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:03.100 354 354 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:03.100 354 354 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:03.268 360 360 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:03.268 360 360 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:04.330 343 343 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:04.334 345 345 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:04.346 344 344 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:04.351 346 346 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:05.084 361 361 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:05.084 361 361 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:06.624 363 363 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:06.624 363 363 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:06.844 365 365 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:06.844 365 365 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:07.635 362 362 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:07.635 362 362 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:07.639 339 339 W pgagroal.main Restarting listening port due to: Bad file descriptor (5) 04-03 13:54:07.693 364 364 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:07.693 364 364 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:07.700 366 366 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:07.700 366 366 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:08.118 367 367 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:08.118 367 367 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:08.130 368 368 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:08.130 368 368 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:09.634 369 369 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:09.634 369 369 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:09.645 370 370 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:09.645 370 370 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:09.845 371 371 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:09.846 371 371 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:10.703 372 372 W pgagroal.network pgagroal_connect: connect: 5 Connection refused 04-03 13:54:10.703 372 372 E pgagroal.pool pgagroal: No connection to localhost:9081 04-03 13:54:10.703 372 372 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:10.703 372 372 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:10.709 373 373 W pgagroal.network pgagroal_connect: connect: 5 Connection refused 04-03 13:54:10.709 373 373 E pgagroal.security pgagroal: No connection to localhost:9081 04-03 13:54:10.709 373 373 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:10.709 373 373 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:11.185 374 374 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:11.185 374 374 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:11.195 375 375 I pgagroal.worker connect: address=127.0.0.1 04-03 13:54:11.195 375 375 I pgagroal.worker disconnect: address=127.0.0.1 04-03 13:54:12.718 376 376 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:12.858 377 377 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:13.739 378 378 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:18.260 379 379 I pgagroal.worker connect: address=:: 04-03 13:54:18.260 379 379 I pgagroal.worker disconnect: address=:: 04-03 13:54:22.378 380 380 I pgagroal.worker connect: user=tarantula database=tarantula address=:: 04-03 13:54:24.067 380 380 I pgagroal.worker disconnect: user=tarantula database=tarantula address=:: 04-03 13:54:24.702 381 381 I pgagroal.worker connect: user=tarantula database=tarantula address=:: 04-03 13:54:25.431 381 381 I pgagroal.worker disconnect: user=tarantula database=tarantula address=:: 04-03 13:54:27.147 378 378 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:27.148 376 376 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:27.148 377 377 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-03 13:54:33.237 382 382 I pgagroal.worker connect: address=::
########################################################## STAGE$ psql -U tarantula -d tarantula -h localhost -p 9091 psql: error: could not connect to server: could not connect to server: Connection refused Is the server running on host "localhost" (::1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (::1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (127.0.0.1) and accepting TCP/IP connections on port 9091? could not connect to server: Connection refused Is the server running on host "localhost" (127.0.0.1) and accepting TCP/IP connections on port 9091? [postgres@ssdnodes00:/home/postgres] STAGE$
##########################################################
Connection 0: Free 2020-04-03 13:54:40 tarantula tarantula
Connection 1: Not initialized
Connection 2: Not initialized
Connection 3: Not initialized
Connection 4: Not initialized
Connection 5: Not initialized
Connection 6: Not initialized
Connection 7: Not initialized
Connection 8: Not initialized
Connection 9: Not initialized
Connection 10: Not initialized
Connection 11: Not initialized
.................
Connection 994: Not initialized
Connection 995: Not initialized
Connection 996: Not initialized
Connection 997: Not initialized
Connection 998: Not initialized
Connection 999: Not initialized
This is a bit different since pgagroal can't connect to PostgreSQL on localhost:9081.
Is the PostgreSQL instance alive ?
I have added a FLUSH_GRACEFULLY
as well
[postgres@ssdnodes00:/home/postgres] STAGE$ psql -U tarantula -d tarantula -h 127.0.0.1 -p 9091 psql: error: could not connect to server: could not connect to server: Connection refused Is the server running on host "127.0.0.1" and accepting TCP/IP connections on port 9091?
-- pg alived
[postgres@ssdnodes00:/home/postgres]
STAGE$ psql -U tarantula -d tarantula -h 127.0.0.1 -p 9081
psql (12.2 (Debian 12.2-2.pgdg100+1))
Type "help" for help.
tarantula=#
Ok I will retry it !
################ new pgagroal compile #####################
[root@ssdnodes00 src]# pwd
/usr/local/src
[root@ssdnodes00 src]# grep -r FLUSH_GRACEFULLY
pgagroal/src/libpgagroal/pool.c: else if (mode == FLUSH_ALL || mode == FLUSH_GRACEFULLY)
pgagroal/src/libpgagroal/pool.c: else if (mode == FLUSH_GRACEFULLY)
pgagroal/src/main.c: pgagroal_flush(ai->shmem, FLUSH_GRACEFULLY);
pgagroal/src/cli.c: mode = FLUSH_GRACEFULLY;
pgagroal/src/include/pgagroal.h:#define FLUSH_GRACEFULLY 1
[root@ssdnodes00 src]# build....done ####################################################3
I got same error.
############ pg alive ###################################### [postgres@ssdnodes00:/home/postgres] STAGE$ psql -U tarantula -d tarantula -h 127.0.0.1 -p 9081 psql (12.2 (Debian 12.2-2.pgdg100+1)) Type "help" for help.
tarantula=# \q [postgres@ssdnodes00:/home/postgres] STAGE$ psql -U tarantula -d tarantula -h 127.0.0.1 -p 9091 psql: error: could not connect to server: could not connect to server: Connection refused Is the server running on host "127.0.0.1" and accepting TCP/IP connections on port 9091? [postgres@ssdnodes00:/home/postgres] STAGE$
################ lsof ###########################################3 [postgres@ssdnodes00 bin]$ ps -ef|grep pgag postgres 860 859 0 14:16 pts/1 00:00:00 pgagroal -c pgagroal.conf -a pgagroal_hba.conf -l pgagroal_databases.conf -u pgagroal_users.conf postgres 897 400 0 14:19 pts/2 00:00:00 grep --color=auto pgag
[postgres@ssdnodes00 bin]$ lsof -p 860
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
pgagroal 860 postgres cwd DIR 0,270 4096 20714479 /usr/local/bin
pgagroal 860 postgres rtd DIR 0,270 4096 20709393 /
pgagroal 860 postgres txt REG 0,270 66832 20709634 /usr/local/bin/pgagroal
pgagroal 860 postgres mem REG 8,1 20709634 /usr/local/bin/pgagroal (path dev=0,270)
pgagroal 860 postgres DEL REG 0,5 1551035754 /dev/zero
pgagroal 860 postgres mem REG 8,1 18750965 /usr/lib64/libnss_files-2.28.so (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18751001 /usr/lib64/libpthread-2.28.so (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18750834 /usr/lib64/libdl-2.28.so (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18751078 /usr/lib64/libz.so.1.2.11 (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18750928 /usr/lib64/libm-2.28.so (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18750808 /usr/lib64/libc-2.28.so (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 20709633 /usr/local/lib64/libpgagroal.so.0.6.0 (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18750822 /usr/lib64/libcrypto.so.1.1.1c (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18751042 /usr/lib64/libssl.so.1.1.1c (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 20714497 /usr/local/lib/libev.so.4.0.0 (path dev=0,270)
pgagroal 860 postgres mem REG 8,1 18750782 /usr/lib64/ld-2.28.so (path dev=0,270)
pgagroal 860 postgres 0u CHR 136,1 0t0 4 /dev/pts/1
pgagroal 860 postgres 1u CHR 136,1 0t0 4 /dev/pts/1
pgagroal 860 postgres 2u CHR 136,1 0t0 4 /dev/pts/1
pgagroal 860 postgres 3u unix 0x00000000d195b458 0t0 1551035755 /tmp/.s.pgagroal type=STREAM
pgagroal 860 postgres 5u a_inode 0,13 0 8269 [eventpoll]
pgagroal 860 postgres 6u a_inode 0,13 0 8269 [eventfd]
pgagroal 860 postgres 10u sock 0,9 0t0 1551033863 protocol: TCP
pgagroal 860 postgres 11u sock 0,9 0t0 1551033869 protocol: TCP
pgagroal 860 postgres 13u sock 0,9 0t0 1551035802 protocol: TCP
pgagroal 860 postgres 14u sock 0,9 0t0 1551035812 protocol: TCP
pgagroal 860 postgres 16u sock 0,9 0t0 1551036590 protocol: TCP
pgagroal 860 postgres 17u sock 0,9 0t0 1551036596 protocol: TCP
pgagroal 860 postgres 18u IPv4 1551036603 0t0 TCP localhost:51532->localhost:9081 (CLOSE_WAIT)
pgagroal 860 postgres 20u IPv4 1551035853 0t0 TCP localhost:51538->localhost:9081 (CLOSE_WAIT)
[postgres@ssdnodes00 bin]$
my test environment:
local os ===> debian 10 + postgresql 12 docker ( in local debian 10) ==> centos 8 + pgagroal (container)
Ok, thanks for trying. I'll keep looking
I have changed the connect logic, and fixed the socket descriptor leak.
If you can still reproduce it could you include output from the log plus
ps -ef | grep pgagroal
lsof -i -P -n | grep LISTEN
lsof -p <pid>
Any chance you can send me your container setup per mail ?
I got same error. I sent an email.
Thank you ! I'll check it out -- will use podman though
I have been unable to reproduce it with your image.
Multiple restarts of PostgreSQL while keeping pgagroal as is doesn't yield any error. Also, I havn't seen the EBADF
error code during accept()
.
Testing with https://github.com/jesperpedersen/libfaults/ doesn't trigger any new error scenario either using current master
or 0.5.x
.
Is there a way you can get the Debian Kernel Team to help debug this ?
BTW, are you on Debian 10.2 ? And is the kernel 4.19.98 ? Latest is 4.19.114.
[root@ssdnodes00:/root] STAGE# cat /etc/debian_version 10.3 [root@ssdnodes00:/root] STAGE# uname -a Linux ssdnodes00 4.19.0-6-amd64 #1 SMP Debian 4.19.67-2+deb10u2 (2019-11-11) x86_64 GNU/Linux [root@ssdnodes00:/root] STAGE#
I will retry it with another linux server
I will report again after test.
Thank you for support.
Using latest pgagroal source.
test case#1 ######################################### host os: debian 10 docker image: centos8+ pgagroal ######################################### It works good
test case#2 ######################################### host os: centos 7 docker image: centos8+ pgagroal ######################################### It works good.
Maybe it's because of my wrong testing or because of changes in the latest source
Anyway, there are no problems at all. Thanks!
Great !
Thanks for your feedback, testing and help with getting this issue closed.
##################################### pgagroal.conf ##################################### [pgagroal] host = localhost port = 9091
log_type = console log_level = info log_path =
max_connections = 1000 idle_timeout = 600 validation = foreground unix_socket_dir = /tmp/.s.pgagroal libev=select log_connections = true log_disconnections = true
[primary] host = localhost port = 9081
##################################### pgagroal version ##################################### $ pgagroal -V (git clone... using latest source!) pgagroal 0.6.0
$ cat pg_start.sh
!/bin/bash
pgagroal -c pgagroal.conf -a pgagroal_hba.conf -l pgagroal_databases.conf -u pgagroal_users.conf
STAGE$ ./pg_start.sh 04-02 08:24:12.842 17741 17741 I pgagroal.main pgagroal: started on localhost:9091 04-02 08:24:13.356 18052 18052 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:13.554 18067 18067 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:13.555 18068 18068 I pgagroal.worker connect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:13.559 18069 18069 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:13.559 18069 18069 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:13.560 18070 18070 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:13.560 18070 18070 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:18.259 20207 20207 I pgagroal.worker connect: address=:: 04-02 08:24:18.260 20207 20207 I pgagroal.worker disconnect: address=:: 04-02 08:24:33.257 25631 25631 I pgagroal.worker connect: address=:: 04-02 08:24:33.257 25631 25631 I pgagroal.worker disconnect: address=:: 04-02 08:24:43.069 18068 18068 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:43.075 18067 18067 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:43.095 18052 18052 I pgagroal.worker disconnect: user=tarantula database=tarantula address=127.0.0.1 04-02 08:24:43.460 31505 31505 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:43.460 31505 31505 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:43.636 31511 31511 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:43.636 31511 31511 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:43.650 31512 31512 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:43.650 31512 31512 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:46.051 32263 32263 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:46.051 32263 32263 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:46.149 31799 31799 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:46.149 31799 31799 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:46.257 31904 31904 I pgagroal.worker connect: address=127.0.0.1 04-02 08:24:46.257 31904 31904 I pgagroal.worker disconnect: address=127.0.0.1 04-02 08:24:48.265 684 684 I pgagroal.worker connect: address=:: 04-02 08:24:48.265 684 684 I pgagroal.worker disconnect: address=::
################################## before postgresql restart ################################## STAGE$ nc -vz localhost 9091 localhost [127.0.0.1] 9091 (?) open [postgres@ssdnodes00:/home/postgres] STAGE$
################################## db restart ################################## postgresql restart -w -c
################################## after postgresql restart ################################## [postgres@ssdnodes00:/home/postgres] STAGE$ nc -vz localhost 9091 localhost [127.0.0.1] 9091 (?) : Connection refused <============ pgagroal die ??? [postgres@ssdnodes00:/home/postgres]
STAGE$ nc -vz localhost 9091 localhost [127.0.0.1] 9091 (?) : Connection refused [postgres@ssdnodes00:/home/postgres] STAGE$
after 5 minutes .......................
[postgres@ssdnodes00:/home/postgres] STAGE$ nc -vz localhost 9091 localhost [127.0.0.1] 9091 (?) : Connection refused <===== still refused [postgres@ssdnodes00:/home/postgres] STAGE$