Closed const-t closed 3 weeks ago
This problem is reproduced only for the loopback interface
@krizhanovsky @const-t @RomanBelozerov So far I cannot reproduce this leak, I patched the test file, kept the client running for enough time, and ran the test several times, but no leak was found. In my environment, the memory remained at 38% before and after the test.
diff --git a/t_stress/test_wordpress.py b/t_stress/test_wordpress.py
index 1c1a483..37b9ca9 100644
--- a/t_stress/test_wordpress.py
+++ b/t_stress/test_wordpress.py
@@ -53,8 +53,9 @@ class TestWordpressStress(CustomMtuMixin, tester.TempestaTest):
def test_get_large_images(self):
self.start_all_services(client=False)
- client = self.get_client("get_images")
- client.start()
- self.wait_while_busy(client)
- client.stop()
- self.assertGreater(client.statuses[200], 0, "Client has not received 200 responses.")
+ for _ in range(1000):
+ client = self.get_client("get_images")
+ client.start()
+ self.wait_while_busy(client)
+ client.stop()
+ self.assertGreater(client.statuses[200], 0, "Client has not received 200 responses.")
Setup to reproduce:
tests_config.sample.ini
or helpers.tf_cfg.TestFrameworkCfg.defaults
methodAnd just run test
./run_tests.py -n t_stress.test_wordpress.H2WordpressStress.test_get_large_images
.
The "docker build" failed:
---> 8b951aad8184
Step 19/28 : RUN wp core install --path="${WORDPRESS_DIR}" --url="${WORDPRESS_URL}" --title="Tempesta Wordpress Test" --admin_user=admin --admin_password=secret --admin_email=test@tempesta-tech.com --skip-email
---> Running in 2d6ff0bb3064
Error: Error: WordPress 6.6.1 requires MySQL 5.5.5 or higher
======================================================================
ERROR: test_get_large_images (t_stress.test_wordpress.H2WordpressStress)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/kingluo/tempesta-test/helpers/remote.py", line 107, in run_cmd
assert p.returncode == 0, "Cmd: '%s' return code is not 0 (%d)." % (
AssertionError: Cmd: 'cat /tmp/nginx/tt-wordpress.tar.gz | docker build - --tag tt-wordpress --network host' return code is not 0 (1).
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/kingluo/tempesta-test/t_stress/test_wordpress.py", line 74, in test_get_large_images
self.start_all()
File "/home/kingluo/tempesta-test/t_stress/test_wordpress.py", line 69, in start_all
self.start_all_servers()
File "/home/kingluo/tempesta-test/framework/tester.py", line 320, in start_all_servers
srv.start()
File "/home/kingluo/tempesta-test/helpers/stateful.py", line 60, in start
self.run_start()
File "/home/kingluo/tempesta-test/framework/docker_server.py", line 123, in run_start
self._build_image()
File "/home/kingluo/tempesta-test/framework/docker_server.py", line 168, in _build_image
stdout, stderr = self.node.run_cmd(
File "/home/kingluo/tempesta-test/helpers/remote.py", line 121, in run_cmd
raise CmdError(
helpers.remote.CmdError: ('Сmd cat /tmp/nginx/tt-wordpress.tar.gz | docker build - --tag tt-wordpress --network host exited with return code 1', b'', b'#0 building with
"default" instance using docker driver\n\n#1 [internal] load remote build context\n#1 DONE 0.0s\n\n#2 copy /context /\n#2 DONE 0.0s\n\n#3 [internal] load metadata for doc
ker.io/library/wordpress:php8.1-apache\n#3 DONE 1.1s\n\n#4 [internal] load metadata for docker.io/composer/composer:2.4.3\n#4 DONE 2.1s\n\n#5 FROM docker.io/composer/comp
oser:2.4.3@sha256:2d5ee5c7dedd179206563d6631b6cf6ca0bca9561dd728de4de717e5ce0b63a2\n#5 DONE 0.0s\n\n#6 [stage-0 1/17] FROM docker.io/library/wordpress:php8.1-apache@sha2
56:aa78ee76c622598ea8e7807f1c391888e4ee60ac8f9296771a943515a25b2a04\n#6 DONE 0.0s\n\n#7 [stage-0 6/17] COPY --chown=www-data composer.json /var/www/.composer/composer.js
on\n#7 CACHED\n\n#8 [stage-0 7/17] COPY tt-docker-entrypoint.sh /usr/local/bin\n#8 CACHED\n\n#9 [stage-0 8/17] RUN chmod +x /usr/local/bin/tt-docker-entrypoint.sh\n#9 C
ACHED\n\n#10 [stage-0 2/17] COPY --from=composer/composer:2.4.3 /usr/bin/composer /usr/bin/composer\n#10 CACHED\n\n#11 [stage-0 4/17] RUN chown www-data /var/www\n#11 C
ACHED\n\n#12 [stage-0 3/17] RUN apt-get -y update && apt install -y libzip-dev imagemagick\n#12 CACHED\n\n#13 [stage-0 9/17] RUN composer global update --no-interaction
\n#13 CACHED\n\n#14 [stage-0 5/17] COPY --chown=www-data wp-config.php /usr/src/wordpress/wp-config.php\n#14 CACHED\n\n#15 [stage-0 10/17] RUN cp /var/www/.composer/wp-c
ontent/wp-sqlite-db/src/db.php "/usr/src/wordpress/wp-content/db.php"\n#15 CACHED\n\n#16 [stage-0 11/17] RUN wp core install --path="/usr/src/wordpress" --url="http://127
.0.0.1" --title="Tempesta Wordpress Test" --admin_user=admin --admin_password=secret --admin_email=test@tempesta-tech.com --skip-email\n#16 0.317 Error: Error: WordPress
6.6.1 requires MySQL 5.5.5 or higher\n#16 ERROR: process "/bin/sh -c wp core install --path=\\"${WORDPRESS_DIR}\\" --url=\\"${WORDPRESS_URL}\\" --title=\\"Tempesta Wordpr
ess Test\\" --admin_user=admin --admin_password=secret --admin_email=test@tempesta-tech.com --skip-email" did not complete successfully: exit code: 1\n------\n > [stage-0
11/17] RUN wp core install --path="/usr/src/wordpress" --url="http://127.0.0.1" --title="Tempesta Wordpress Test" --admin_user=admin --admin_password=secret --admin_emai
l=test@tempesta-tech.com --skip-email:\n0.317 Error: Error: WordPress 6.6.1 requires MySQL 5.5.5 or higher\n------\nDockerfile:27\n--------------------\n 25 | RUN cp
/var/www/.composer/wp-content/wp-sqlite-db/src/db.php "${WORDPRESS_DIR}/wp-content/db.php"\n 26 | \n 27 | >>> RUN wp core install --path="${WORDPRESS_DIR}" --url="
${WORDPRESS_URL}" --title="Tempesta Wordpress Test" --admin_user=admin --admin_password=secret --admin_email=test@tempesta-tech.com --skip-email\n 28 | \n 29 |
# Generate images\n--------------------\nERROR: failed to solve: process "/bin/sh -c wp core install --path=\\"${WORDPRESS_DIR}\\" --url=\\"${WORDPRESS_URL}\\" --title=\\
"Tempesta Wordpress Test\\" --admin_user=admin --admin_password=secret --admin_email=test@tempesta-tech.com --skip-email" did not complete successfully: exit code: 1\n',
1)
Roughly, this leak happens in the TLS encoding code path (response) and has nothing to do with the HTTP version. From the pcap, you can observe that the 10MB image response is split into many small parts, such as 100 bytes.
[ 372.867463] ksoftirqd/1: page allocation failure: order:2, mode:0xa20(GFP_ATOMIC), nodemask=(null),cpuset=/,mems_allowed=0
[ 372.869016] CPU: 1 PID: 20 Comm: ksoftirqd/1 Kdump: loaded Tainted: G OE 5.10.35+ #1
[ 372.870749] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 372.872680] Call Trace:
[ 372.874559] dump_stack+0x96/0xc4
[ 372.876373] warn_alloc.cold+0x87/0x12a
[ 372.878169] ? zone_watermark_ok_safe+0x140/0x140
[ 372.880074] ? wakeup_kswapd+0xfe/0x290
[ 372.881990] ? __zone_watermark_ok+0x270/0x270
[ 372.883767] ? wake_all_kswapds+0xf4/0x140
[ 372.885506] ? __alloc_pages_slowpath.constprop.0+0x11f4/0x1390
[ 372.887263] __alloc_pages_slowpath.constprop.0+0x1359/0x1390
[ 372.889004] ? warn_alloc+0x120/0x120
[ 372.890731] ? sk_free+0x24/0x70
[ 372.892408] ? __zone_watermark_ok+0x270/0x270
[ 372.894054] ? loopback_xmit+0x15e/0x1b0
[ 372.895703] ? __kasan_check_read+0x11/0x20
[ 372.897338] __alloc_pages_nodemask+0x509/0x540
[ 372.898986] ? __alloc_pages_slowpath.constprop.0+0x1390/0x1390
[ 372.900657] ? __extend_pgfrags+0x3c6/0x4a0 [tempesta_fw]
[ 372.902276] alloc_pages_current+0x96/0x140
[ 372.903918] ss_skb_to_sgvec_with_new_pages+0xeb/0x490 [tempesta_fw]
[ 372.905555] tfw_tls_encrypt+0x514/0xbc0 [tempesta_fw]
[ 372.907220] ? tfw_tls_connection_recv+0x710/0x710 [tempesta_fw]
In this scenario, the skb frags pages of the response body from Wordpress are shared, so for TLS encryption we replace them with new pages via alloc_pages
. However, the new (maybe compound) pages are not freed after the response is sent. The reason is unknown. Looks like somewhere the kernel caches the pages and does not make them free-to-reuse.
int
ss_skb_to_sgvec_with_new_pages(struct sk_buff *skb, struct scatterlist *sgl,
struct page ***old_pages)
{
...
if (skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG) { // <--- pages are shared
if (head_data_len) {
sg_set_buf(sgl + out_frags, skb->data, head_data_len);
out_frags++;
}
...
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *f = &skb_shinfo(skb)->frags[i];
unsigned int size;
struct page *p;
size = skb_frag_size(f);
if (remain < size) {
int order = get_order(size);
p = alloc_pages(GFP_ATOMIC, order); <--- allocate new pages and replace the old ones
if (!p)
return -ENOMEM;
remain = (1 << order) * PAGE_SIZE;
offset = 0;
} else {
skb_frag_t *prev_f;
prev_f = &skb_shinfo(skb)->frags[i - 1];
p = skb_frag_page(prev_f);
get_page(p);
}
**old_pages = skb_frag_page(f);
(*old_pages)++;
sg_set_page(sgl + out_frags, p, size, offset);
__skb_fill_page_desc(skb, i, p, offset, size);
remain -= size;
offset += size;
out_frags++;
}
How to prove it? If we cache and reuse the new pages, the leak is gone.
Patch tempesta (Assume the requests are serial and 2 seconds apart):
diff --git a/fw/ss_skb.c b/fw/ss_skb.c
index 3bf00b49..88002d0b 100644
--- a/fw/ss_skb.c
+++ b/fw/ss_skb.c
@@ -38,6 +38,69 @@
#include "procfs.h"
#include "ss_skb.h"
+#include <linux/timekeeping.h>
+#include <linux/hashtable.h>
+
+struct cached_page {
+ bool used;
+ struct page* p;
+ struct hlist_node node;
+};
+
+DECLARE_HASHTABLE(page_cache, 10);
+static u64 t1 = 0;
+static int phase = 0;
+
+static struct page* get_cache_page(int order)
+{
+ struct page *p = NULL;
+
+ if (!phase)
+ hash_init(page_cache);
+
+ if (ktime_get_seconds() - t1 > 2) {
+ struct cached_page *t;
+ unsigned bkt;
+
+ phase++;
+ t1 = ktime_get_seconds();
+ hash_for_each(page_cache, bkt, t, node) {
+ t->used = false;
+ if (page_count(t->p) != 1)
+ printk("---> page_count()=%d, p=%p\n", page_count(t->p), t->p);
+ }
+ }
+
+ if (phase == 1) {
+new_page:
+ struct cached_page *t = kmalloc(sizeof(struct cached_page), GFP_ATOMIC);
+ }
+
+ if (phase == 1) {
+new_page:
+ struct cached_page *t = kmalloc(sizeof(struct cached_page), GFP_ATOMIC);
+
+ p = alloc_pages(GFP_ATOMIC, order);
+ get_page(p);
+ t->used = false;
+ t->p = p;
+ hash_add(page_cache, &t->node, order);
+ } else {
+ struct cached_page *t;
+
+ hash_for_each_possible(page_cache, t, node, order) {
+ if (!t->used && page_count(t->p) == 1) {
+ t->used = true;
+ p = t->p;
+ get_page(p);
+ break;
+ }
+ }
+ }
+
+ if (unlikely(!p)) {
+ WARN_ON_ONCE(phase <= 1);
+ goto new_page;
+ }
+
+ return p;
+}
+
/**
* Get @skb's source address and port as a string, e.g. "127.0.0.1", "::1".
*
@@ -1590,7 +1653,7 @@ ss_skb_to_sgvec_with_new_pages(struct sk_buff *skb, struct scatterlist *sgl,
size = skb_frag_size(f);
if (remain < size) {
int order = get_order(size);
- p = alloc_pages(GFP_ATOMIC, order);
+ p = get_cache_page(order);
if (!p)
return -ENOMEM;
remain = (1 << order) * PAGE_SIZE;
Patch tempesta-test:
diff --git a/framework/docker_server.py b/framework/docker_server.py
index fff77c0..06850e7 100644
--- a/framework/docker_server.py
+++ b/framework/docker_server.py
@@ -120,7 +120,7 @@ class DockerServer(DockerServerArguments, stateful.Stateful):
def run_start(self):
tf_cfg.dbg(3, f"\tDocker Server: Start {self.id} (image {self.image})")
self.port_checker.check_ports_status()
- self._build_image()
+ #self._build_image()
stdout, stderr = self.node.run_cmd(
self._form_run_command(), err_msg=self._form_error(action="start")
)
diff --git a/t_stress/test_wordpress.py b/t_stress/test_wordpress.py
index 5bd06d7..645a640 100644
--- a/t_stress/test_wordpress.py
+++ b/t_stress/test_wordpress.py
@@ -72,11 +72,14 @@ class BaseWorpressStress(CustomMtuMixin, tester.TempestaTest, base=True):
def test_get_large_images(self):
self.start_all()
- client = self.get_client("get_images")
- client.start()
- self.wait_while_busy(client)
- client.stop()
- self.assertGreater(client.statuses[200], 0, "Client has not received 200 responses.")
+ print("ok")
+ import time
+ time.sleep(60*5)
+ #client = self.get_client("get_images")
+ #client.start()
+ #self.wait_while_busy(client)
+ #client.stop()
+ #self.assertEqual(client.statuses[200], 0, "Client has not received 200 responses.")
class TlsWordpressStress(BaseWorpressStress):
Test:
t -n t_stress.test_wordpress.TlsWordpressStress.test_get_large_images
for ((i=0;i<20;i++)); do
sleep 2
curl --no-progress-meter --show-error --output /dev/null \
--header 'Connection: close' --http2-prior-knowledge \
--interface 127.0.0.2 --insecure --parallel --parallel-immediate \
--parallel-max 10 'https://127.0.0.1/images/2048.jpg?ver=[1-1]' \
-w "%{http_code} %{size_download}\n"
done
Check Committed_AS
in /proc/meminfo
before and after the test, or watch the memory with btm
.
I think if we can't find the root cause, the workaround should be to create a dedicated page cache for TLS encryption just like the patch above.
Scope
Tempesta: Commit 930ba7dc761f8236424894fddc40cd5cc1c7af7f Tests Commit: df8a0e7ec6f4a4cec96666db9f953490a710c564 Test:
./run_tests.py -n t_stress.test_wordpress.H2WordpressStress.test_get_large_images
Every test run we lose a memory. I can't reproduce it without the test.