Closed grzech0 closed 10 months ago
Hi,
Yeah, I'm not surprised that there might be some bugs in here. I'll have a poke at it.
Cheers, Andrew
Hmm, and I say this as slightly surprised, but it seems to be pretty reliable for me, using links from https://fastest.fish/test-files
# 1G download with curl(1)
$ curl -L -o /dev/null https://speed.hetzner.de/1GB.bin
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1000M 100 1000M 0 0 3349k 0 0:05:05 0:05:05 --:--:-- 8119k
# Same 1G download with wget(1)
$ wget -O /dev/null https://speed.hetzner.de/1GB.bin
--2023-10-04 22:11:26-- https://speed.hetzner.de/1GB.bin
Resolving speed.hetzner.de (speed.hetzner.de)... 2a01:4f8:0:59ed::2, 88.198.248.254
Connecting to speed.hetzner.de (speed.hetzner.de)|2a01:4f8:0:59ed::2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1048576000 (1000M) [application/octet-stream]
Saving to: ‘/dev/null’
/dev/null 100%[===================>] 1000M 14.7MB/s in 77s
2023-10-04 22:12:44 (13.0 MB/s) - ‘/dev/null’ saved [1048576000/1048576000]
# Next two happened simultaneously
$ wget -O /dev/null https://speed.hetzner.de/1GB.bin
--2023-10-04 22:15:17-- https://speed.hetzner.de/1GB.bin
Resolving speed.hetzner.de (speed.hetzner.de)... 2a01:4f8:0:59ed::2, 88.198.248.254
Connecting to speed.hetzner.de (speed.hetzner.de)|2a01:4f8:0:59ed::2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1048576000 (1000M) [application/octet-stream]
Saving to: ‘/dev/null’
/dev/null 100%[===================>] 1000M 6.71MB/s in 1m 51s
2023-10-04 22:17:09 (8.97 MB/s) - ‘/dev/null’ saved [1048576000/1048576000]
$ curl -L -o /dev/null https://speed.hetzner.de/1GB.bin
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1000M 100 1000M 0 0 7168k 0 0:02:22 0:02:22 --:--:-- 12.6M
You could try the following, somewhat hacky patch, but it should get things unstuck...
diff --git a/src/proxy.c b/src/proxy.c
index 6acf7ae..70963e7 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -105,6 +105,8 @@ struct conn {
};
static int epollfd;
+static ac_slist_t *conns;
+
extern bool use_sni;
extern ac_slist_t *listen_fds;
@@ -242,6 +244,8 @@ static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
}
ac_slist_preadd(close_list, conn);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->fd, NULL);
+
+ ac_slist_remove(&conns, conn, NULL);
}
static void check_proxy_connect(struct conn *conn)
@@ -446,9 +450,30 @@ static int do_accept(int lfd)
ev.data.ptr = (void *)conn;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ ac_slist_preadd(&conns, conn);
+
return 0;
}
+static void try_io(void *item, void *data)
+{
+ bool from;
+ bool to;
+ struct conn *conn = item;
+ struct conn *other = conn->other;
+ ac_slist_t *close_list = data;
+
+ from = read_from_sock(conn);
+ to = write_to_sock(other, conn);
+ if (!from || !to)
+ set_conn_close(&close_list, conn);
+
+ from = read_from_sock(other);
+ to = write_to_sock(conn, other);
+ if (!from || !to)
+ set_conn_close(&close_list, conn);
+}
+
static void do_proxy(const struct addrinfo *proxy)
{
for (;;) {
@@ -457,7 +482,9 @@ static void do_proxy(const struct addrinfo *proxy)
ac_slist_t *close_list = NULL;
struct epoll_event events[MAX_EVENTS];
- nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ nfds = epoll_wait(epollfd, events, MAX_EVENTS, 200);
+ if (nfds == 0)
+ ac_slist_foreach(conns, try_io, close_list);
for (n = 0; n < nfds; n++) {
struct epoll_event *ev = &events[n];
struct conn *conn = ev->data.ptr;
@@ -501,20 +528,8 @@ static void do_proxy(const struct addrinfo *proxy)
continue;
}
- if (ev->events & (EPOLLIN | EPOLLOUT)) {
- bool from;
- bool to;
-
- from = read_from_sock(conn);
- to = write_to_sock(other, conn);
- if (!from || !to)
- set_conn_close(&close_list, conn);
-
- from = read_from_sock(other);
- to = write_to_sock(conn, other);
- if (!from || !to)
- set_conn_close(&close_list, conn);
- }
+ if (ev->events & (EPOLLIN | EPOLLOUT))
+ try_io(conn, close_list);
}
if (!close_list)
continue;
Hm... I can't apply this patch :( What I'm doing wrong? grk@devuan:/home/grk/dl/sprotly-master$ patch -p1 < ./proxy-download.diff patching file src/proxy.c Hunk #2 FAILED at 244. Hunk #3 FAILED at 448. Hunk #4 FAILED at 459. Hunk #5 FAILED at 503. 4 out of 5 hunks FAILED -- saving rejects to file src/proxy.c.rej
Looks it got white spaced damaged while copy/pasting. Try this one.
diff --git a/src/proxy.c b/src/proxy.c
index 6acf7ae..70963e7 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -105,6 +105,8 @@ struct conn {
};
static int epollfd;
+static ac_slist_t *conns;
+
extern bool use_sni;
extern ac_slist_t *listen_fds;
@@ -242,6 +244,8 @@ static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
}
ac_slist_preadd(close_list, conn);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->fd, NULL);
+
+ ac_slist_remove(&conns, conn, NULL);
}
static void check_proxy_connect(struct conn *conn)
@@ -446,9 +450,30 @@ static int do_accept(int lfd)
ev.data.ptr = (void *)conn;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ ac_slist_preadd(&conns, conn);
+
return 0;
}
+static void try_io(void *item, void *data)
+{
+ bool from;
+ bool to;
+ struct conn *conn = item;
+ struct conn *other = conn->other;
+ ac_slist_t *close_list = data;
+
+ from = read_from_sock(conn);
+ to = write_to_sock(other, conn);
+ if (!from || !to)
+ set_conn_close(&close_list, conn);
+
+ from = read_from_sock(other);
+ to = write_to_sock(conn, other);
+ if (!from || !to)
+ set_conn_close(&close_list, conn);
+}
+
static void do_proxy(const struct addrinfo *proxy)
{
for (;;) {
@@ -457,7 +482,9 @@ static void do_proxy(const struct addrinfo *proxy)
ac_slist_t *close_list = NULL;
struct epoll_event events[MAX_EVENTS];
- nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ nfds = epoll_wait(epollfd, events, MAX_EVENTS, 200);
+ if (nfds == 0)
+ ac_slist_foreach(conns, try_io, close_list);
for (n = 0; n < nfds; n++) {
struct epoll_event *ev = &events[n];
struct conn *conn = ev->data.ptr;
@@ -501,20 +528,8 @@ static void do_proxy(const struct addrinfo *proxy)
continue;
}
- if (ev->events & (EPOLLIN | EPOLLOUT)) {
- bool from;
- bool to;
-
- from = read_from_sock(conn);
- to = write_to_sock(other, conn);
- if (!from || !to)
- set_conn_close(&close_list, conn);
-
- from = read_from_sock(other);
- to = write_to_sock(conn, other);
- if (!from || !to)
- set_conn_close(&close_list, conn);
- }
+ if (ev->events & (EPOLLIN | EPOLLOUT))
+ try_io(conn, close_list);
}
if (!close_list)
continue;
compilation error :(
sprotly.c: In function ‘main’: sprotly.c:578:17: warning: passing argument 1 of ‘ac_fs_mkdir_p’ makes pointer from integer without a cast [-Wint-conversion] 578 | ac_fs_mkdir_p(-1, LOG_PATH, 0777); | ^~ |
---|---|---|
int |
In file included from sprotly.c:43:
/usr/local/include/libac.h:187:31: note: expected ‘const char ’ but argument is of type ‘int’
187 | int ac_fs_mkdir_p(const char path);
| ~~^~
sprotly.c:578:3: error: too many arguments to function ‘ac_fs_mkdir_p’
578 | ac_fs_mkdir_p(-1, LOG_PATH, 0777);
| ^~~~~
In file included from sprotly.c:43:
/usr/local/include/libac.h:187:5: note: declared here
187 | int ac_fs_mkdir_p(const char *path);
| ^~~~~
make: *** [Makefile:51: sprotly.o] Error 1
Sorry, it was "false alert". I had older "libac.h" on path. After update to newest version, I get no errors during compilation
looks like with patch aplied, sprotly has no problems with large files download. only 1 strange thing I noticed is error appearing in system logs: [Thu Oct 5 13:59:01 2023] traps: sprotly[5201] general protection fault ip:55e86ba1b3ab sp:7ffd13907d60 error:0 in sprotly[55e86ba1a000+5000] [Thu Oct 5 14:09:47 2023] traps: sprotly[5235] general protection fault ip:55e86ba1b3ab sp:7ffd13907d60 error:0 in sprotly[55e86ba1a000+5000] [Thu Oct 5 14:21:23 2023] traps: sprotly[6251] general protection fault ip:558ba06b03ab sp:7fff2fec9cb0 error:0 in sprotly[558ba06af000+5000] [Thu Oct 5 14:33:40 2023] traps: sprotly[6392] general protection fault ip:559b08bcc3ab sp:7ffd69525d50 error:0 in sprotly[559b08bcb000+5000] I didn't catch the moment error appearing. is it during interrupt while killing the process? or during start? don't know...
I think I found the moment when error shows up
in sprotly access.log: ... [2023-10-05 14:59:48 +0200] 1377: Starting 1 worker processes [2023-10-05 14:59:48 +0200] 4124: Worker switched to user nobody:nobody (65534:65534) ... [2023-10-05 15:01:22 +0200] 1377: Starting 1 worker processes [2023-10-05 15:01:22 +0200] 4133: Worker switched to user nobody:nobody (65534:65534) ...
in syslog: ... Oct 5 14:59:48 dev4-20-tunapi kernel: [ 1155.558368] traps: sprotly[1380] general protection fault ip:558fce27d3ab sp:7ffdfe29d5a0 error:0 in sprotly[558fce27c000+5000] Oct 5 15:01:22 dev4-20-tunapi kernel: [ 1249.379746] traps: sprotly[4124] general protection fault ip:558fce27d3ab sp:7ffdfe29d5a0 error:0 in sprotly[558fce27c000+5000]
I hope this report helps
Looks like it's crashing.
Try this patch on top of the previous one (this should at least bring back the 'closed' messages).
diff --git a/src/proxy.c b/src/proxy.c
index 70963e7..389829e 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -106,6 +106,7 @@ struct conn {
static int epollfd;
static ac_slist_t *conns;
+static ac_slist_t *close_list;
extern bool use_sni;
extern ac_slist_t *listen_fds;
@@ -227,9 +228,9 @@ static void close_conn(void *data, void *user_data __always_unused)
conn->bytes_tx, conn->bytes_rx, et*1000.0);
}
-static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
+static void set_conn_close(struct conn *conn)
{
- ac_slist_t *p = *close_list;
+ ac_slist_t *p = close_list;
while (p) {
/* Don't add duplicate entries */
@@ -239,10 +240,10 @@ static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
}
if (conn->other) {
- ac_slist_preadd(close_list, conn->other);
+ ac_slist_preadd(&close_list, conn->other);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->other->fd, NULL);
}
- ac_slist_preadd(close_list, conn);
+ ac_slist_preadd(&close_list, conn);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->fd, NULL);
ac_slist_remove(&conns, conn, NULL);
@@ -455,23 +456,22 @@ static int do_accept(int lfd)
return 0;
}
-static void try_io(void *item, void *data)
+static void try_io(void *item, void *data __unused)
{
bool from;
bool to;
struct conn *conn = item;
struct conn *other = conn->other;
- ac_slist_t *close_list = data;
from = read_from_sock(conn);
to = write_to_sock(other, conn);
if (!from || !to)
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
from = read_from_sock(other);
to = write_to_sock(conn, other);
if (!from || !to)
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
}
static void do_proxy(const struct addrinfo *proxy)
@@ -479,12 +479,13 @@ static void do_proxy(const struct addrinfo *proxy)
for (;;) {
int n;
int nfds;
- ac_slist_t *close_list = NULL;
struct epoll_event events[MAX_EVENTS];
+ close_list = NULL;
+
nfds = epoll_wait(epollfd, events, MAX_EVENTS, 200);
if (nfds == 0)
- ac_slist_foreach(conns, try_io, close_list);
+ ac_slist_foreach(conns, try_io, NULL);
for (n = 0; n < nfds; n++) {
struct epoll_event *ev = &events[n];
struct conn *conn = ev->data.ptr;
@@ -505,7 +506,7 @@ static void do_proxy(const struct addrinfo *proxy)
}
if (ev->events & (EPOLLERR | EPOLLHUP)) {
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
continue;
}
@@ -515,7 +516,7 @@ static void do_proxy(const struct addrinfo *proxy)
continue;
other = do_open_conn(proxy, conn);
if (!other) {
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
continue;
}
read_from_sock(conn);
@@ -529,7 +530,7 @@ static void do_proxy(const struct addrinfo *proxy)
}
if (ev->events & (EPOLLIN | EPOLLOUT))
- try_io(conn, close_list);
+ try_io(conn, NULL);
}
if (!close_list)
continue;
If you still get those other errors, if you have core dumps, then you can do
gdb /poth/to/sprotly /path/to/core-dump (gdb) bt full
and paste the output here, if you don't have core dumps you can try enabling them or you could try the addr2line(1) utility
Something like
$ addr2line -e /path/to/sprotly -fCi addr
For addr
try, using the ip address from the dmesg errors, e.g
$ addr2line -e /path/to/sprotly -fCi 0x558fce27d3ab
or you may need to use ip - sprotly load address, e.g 558fce27d3ab - 558fce27c000
$ addr2line -e /path/to/sprotly -fCi 0x13AB
Those addresses will change with updates to the code, so make sure you using the right ones.
Sorry, I'm completely new in debugging and don't know how to properly use such tools as gdb or addr2line. I couldn't run sprotly in gdb to have it running with arguments :( and ... I have more serious looking error here: [Thu Oct 5 18:29:12 2023] sprotly[1382]: segfault at 2c ip 000055b62f255119 sp 00007fff5c2c0d50 error 4 in sprotly[55b62f254000+5000] [Thu Oct 5 18:29:12 2023] Code: 6d f0 ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 53 48 89 fb eb 13 66 2e 0f 1f 84 00 00 00 00 00 48 01 43 30 83 3b 01 74 3f <8b> 53 2c 8b 7b 18 31 c9 31 f6 41 b9 03 00 00 00 41 b8 00 40 00 00
looks like this segfault is related to starting worker process too: [2023-10-05 18:29:12 +0200] 1378: Starting 1 worker processes [2023-10-05 18:29:12 +0200] 4164: Worker switched to user nobody:nobody (65534:65534)
Could you send me a copy of the sprotly binary? Just send it to my email address as listed in the Contributing section of the readme.
If you haven't rebuilt it since that segfault error you posted then OK, otherwise run it again and send me the latest segfault error from dmesg.
Do you get the segfault if you run it in the foreground with the -D option?
Thanks for the email and information, it was a long shot that addr2line would work on that binary, it's hit and miss at the best of times.
I do however have a Debian 11 VM and tried building sprotly against a static libac and with the patches applied and have reproduced the crash.
Full patch to be applied against a clean tree...
diff --git a/src/proxy.c b/src/proxy.c
index 6acf7ae..e89a565 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -105,6 +105,9 @@ struct conn {
};
static int epollfd;
+static ac_slist_t *conns;
+static ac_slist_t *close_list;
+
extern bool use_sni;
extern ac_slist_t *listen_fds;
@@ -223,11 +226,13 @@ static void close_conn(void *data, void *user_data __always_unused)
(ipv6 || use_sni) ? "[" : "", conn->dst_addr,
(ipv6 || use_sni) ? "]" : "", conn->dst_port,
conn->bytes_tx, conn->bytes_rx, et*1000.0);
+
+ ac_slist_remove(&conns, conn, NULL);
}
-static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
+static void set_conn_close(struct conn *conn)
{
- ac_slist_t *p = *close_list;
+ ac_slist_t *p = close_list;
while (p) {
/* Don't add duplicate entries */
@@ -237,10 +242,10 @@ static void set_conn_close(ac_slist_t **close_list, struct conn *conn)
}
if (conn->other) {
- ac_slist_preadd(close_list, conn->other);
+ ac_slist_preadd(&close_list, conn->other);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->other->fd, NULL);
}
- ac_slist_preadd(close_list, conn);
+ ac_slist_preadd(&close_list, conn);
epoll_ctl(epollfd, EPOLL_CTL_DEL, conn->fd, NULL);
}
@@ -446,18 +451,41 @@ static int do_accept(int lfd)
ev.data.ptr = (void *)conn;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ ac_slist_preadd(&conns, conn);
+
return 0;
}
+static void try_io(void *item, void *data __unused)
+{
+ bool from;
+ bool to;
+ struct conn *conn = item;
+ struct conn *other = conn->other;
+
+ from = read_from_sock(conn);
+ to = write_to_sock(other, conn);
+ if (!from || !to)
+ set_conn_close(conn);
+
+ from = read_from_sock(other);
+ to = write_to_sock(conn, other);
+ if (!from || !to)
+ set_conn_close(conn);
+}
+
static void do_proxy(const struct addrinfo *proxy)
{
for (;;) {
int n;
int nfds;
- ac_slist_t *close_list = NULL;
struct epoll_event events[MAX_EVENTS];
- nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ close_list = NULL;
+
+ nfds = epoll_wait(epollfd, events, MAX_EVENTS, 200);
+ if (nfds == 0)
+ ac_slist_foreach(conns, try_io, NULL);
for (n = 0; n < nfds; n++) {
struct epoll_event *ev = &events[n];
struct conn *conn = ev->data.ptr;
@@ -478,7 +506,7 @@ static void do_proxy(const struct addrinfo *proxy)
}
if (ev->events & (EPOLLERR | EPOLLHUP)) {
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
continue;
}
@@ -488,7 +516,7 @@ static void do_proxy(const struct addrinfo *proxy)
continue;
other = do_open_conn(proxy, conn);
if (!other) {
- set_conn_close(&close_list, conn);
+ set_conn_close(conn);
continue;
}
read_from_sock(conn);
@@ -501,20 +529,8 @@ static void do_proxy(const struct addrinfo *proxy)
continue;
}
- if (ev->events & (EPOLLIN | EPOLLOUT)) {
- bool from;
- bool to;
-
- from = read_from_sock(conn);
- to = write_to_sock(other, conn);
- if (!from || !to)
- set_conn_close(&close_list, conn);
-
- from = read_from_sock(other);
- to = write_to_sock(conn, other);
- if (!from || !to)
- set_conn_close(&close_list, conn);
- }
+ if (ev->events & (EPOLLIN | EPOLLOUT))
+ try_io(conn, NULL);
}
if (!close_list)
continue;
Thanks for last patch. Now sprotly runs without any errors, large downloads work without any problems. One bad thing I noticed is many simultaneous downloads may slow down significantly. good example of simultaneous downloads slowdown is docker pull of large image. maybe increasing number of workers could help? Should I open separate issue for this problem?
On Fri, 06 Oct 2023 01:11:59 -0700 grzech0 @.***> wrote:
Thanks for last patch. Now sprotly runs without any errors, large downloads work without any problems. One bad thing I noticed is many simultaneous downloads may slow down significantly. good example of simultaneous downloads slowdown is docker pull of large image. maybe increasing number of workers could help? Should I open separate issue for this problem?
In theory each worker should be able to handle thousands of connections.
But yeah, two things you can try
1) Increasing the number of workers. Edit
655 create_workers(-1, proxy);
in sprotly.c and change the -1 to some positive number (i.e how many workers you want).
2) Reduce the epoll_wait(2) timeout. Edit
486 nfds = epoll_wait(epollfd, events, MAX_EVENTS, 200);
in proxy.c and lower the 200 value. This tells epoll_wait to timeout after n milliseconds even if no events occurred. I changed this from -1 (no timeout) to 200 in the patch you have. Try 100 or 50, but you don't want it too low. I have an idea of setting this dynamically depending on the current situation...
Cheers, Andrew
Thank you for your advice! I'll play with tweaks you mentioned. It's great idea to get rid of magic numbers and make all more dynamic
Cheers, Greg
Latest changes should address the various above issues.
Feel free to re-open if you still encounter issues...
Cheers, Andrew
Thank you for new changes. Now sprotly works really great, without any performance issues
On Wed, 03 Jan 2024 05:46:59 -0800 grzech0 @.***> wrote:
Thank you for new changes. Now sprotly works really great, without any performance issues
Great!
Thanks for your assistance with this.
during tests I noticed that downloads are stuck when getting large files. My tests show problem appears with files larger then few tens on megabytes. Shows in almost every download of file with size ~100 MB
random download testing site: https://fastest.fish/test-files
I made tests in popular browsers FF and Chrome . on terminal I use wget to make the tests.
From client side it looks like, several seconds after download starts, wget stops to download given file. Nothing special in sprotly access.log as well as in squid access.log
here is wget download report showing this problem: devuan@dev4-20-tunapi:~/test$ wget -O /dev/null https://speed.hetzner.de/1GB.bin --2023-10-03 14:05:43-- https://speed.hetzner.de/1GB.bin Resolving speed.hetzner.de (speed.hetzner.de)... 88.198.248.254, 2a01:4f8:0:59ed::2 Connecting to speed.hetzner.de (speed.hetzner.de)|88.198.248.254|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 1048576000 (1000M) [application/octet-stream] Saving to: ‘/dev/null’
/dev/null 2%[ ] 25.61M --.-KB/s in 15m 2s
2023-10-03 14:20:45 (29.1 KB/s) - Read error at byte 26853054/1048576000 (Connection timed out). Retrying.
--2023-10-03 14:20:46-- (try: 2) https://speed.hetzner.de/1GB.bin Connecting to speed.hetzner.de (speed.hetzner.de)|88.198.248.254|:443... connected. HTTP request sent, awaiting response... 206 Partial Content Length: 1048576000 (1000M), 1021722946 (974M) remaining [application/octet-stream] Saving to: ‘/dev/null’
/dev/null 38%[======> ] 385.87M --.-KB/s in 15m 22s
2023-10-03 14:36:09 (400 KB/s) - Read error at byte 404618576/1048576000 (Connection timed out). Retrying.
Looks like squid isn't a problem here - without sprotly downloads are done properly.
Regards, Greg