linux-test-project / ltp

Linux Test Project (mailing list: https://lists.linux.it/listinfo/ltp)
https://linux-test-project.readthedocs.io/
GNU General Public License v2.0
2.32k stars 1.02k forks source link

LTP syscall "migrate_pages03" case will be failed due to infinite loop and timeout when any qemu guest is launched in v6.8 - v6.10 kernel #1166

Open xupengfe opened 3 months ago

xupengfe commented 3 months ago

LTP syscall "migrate_pages03" case will be failed due to infinite loop and timeout when any qemu guest is launched in v6.8 - v6.10 kernel

ltp/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c https://github.com/linux-test-project/ltp/blob/87c632c8bfbb772efde047e83e72c215bfeb1aba/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c#L98 -> wait_ksmd_full_scan(); ltp/testcases/kernel/lib/ksm_helper.c wait_ksmd_full_scan() https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/lib/ksm_helper.c while (full_scans < at_least_one_full_scan) { sleep(1); count++; SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans); }

-> SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans); // After this function, /sys/kernel/mm/ksm/full_scans should increase without guest launched and then it could exit the loop as expected. But when qemu launched guest is launched, /sys/kernel/mm/ksm/full_scans for KSM(kernel samepage merging) will not increase, and then it will execute above infinite loop until timeout.

We need to know if it's a normal behavior and it's better to improve the above wait_ksmd_full_scan() function to avoid infinite loop situation.

Thank you!

xupengfe commented 3 months ago

I added some debug code and executed migrate_pages03 in the host and infinite loop when any qemu guest is launched:

# ./migrate_pages03 
tst_test.c:1806: TINFO: LTP version: 20240524-92-gd39edc7fb
tst_test.c:1652: TINFO: Timeout per run is 0h 05m 30s
migrate_pages03.c:97: TINFO: wait_ksmd_full_scan start: orig_ksm_run:-1
ksm_helper.c:18: TINFO: SAFE_FILE_SCANF start
ksm_helper.c:19: TINFO: /sys/kernel/mm/ksm/full_scans, lineno:19, fmt:%lu
ksm_helper.c:19: TINFO: exp_convs:1
ksm_helper.c:19: TINFO: Expected 1 conversions got 1 FILE '/sys/kernel/mm/ksm/full_scans'
ksm_helper.c:31: TINFO: at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:35: TINFO: -> Before SAFE_FILE_SCANF count:1, at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:36: TINFO: /sys/kernel/mm/ksm/full_scans, lineno:36, fmt:%lu
ksm_helper.c:36: TINFO: exp_convs:1
ksm_helper.c:36: TINFO: Expected 1 conversions got 1 FILE '/sys/kernel/mm/ksm/full_scans'
ksm_helper.c:37: TINFO: -> After SAFE_FILE_SCANF at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:35: TINFO: -> Before SAFE_FILE_SCANF count:2, at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:36: TINFO: /sys/kernel/mm/ksm/full_scans, lineno:36, fmt:%lu
ksm_helper.c:36: TINFO: exp_convs:1
ksm_helper.c:36: TINFO: Expected 1 conversions got 1 FILE '/sys/kernel/mm/ksm/full_scans'
ksm_helper.c:37: TINFO: -> After SAFE_FILE_SCANF at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:35: TINFO: -> Before SAFE_FILE_SCANF count:3, at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:36: TINFO: /sys/kernel/mm/ksm/full_scans, lineno:36, fmt:%lu
ksm_helper.c:36: TINFO: exp_convs:1
ksm_helper.c:36: TINFO: Expected 1 conversions got 1 FILE '/sys/kernel/mm/ksm/full_scans'
ksm_helper.c:37: TINFO: -> After SAFE_FILE_SCANF at_least_one_full_scan:880,  full_scans:877
ksm_helper.c:35: TINFO: -> Before SAFE_FILE_SCANF count:4, at_least_one_full_scan:880,  full_scans:877

Debug code:

diff --git a/lib/safe_file_ops.c b/lib/safe_file_ops.c
index 63ae2dbbe..db9d4b1be 100644
--- a/lib/safe_file_ops.c
+++ b/lib/safe_file_ops.c
@@ -126,7 +126,7 @@ void safe_file_scanf(const char *file, const int lineno,
        int exp_convs, ret;

        f = fopen(path, "r");
-
+       tst_resm_(file, lineno, TINFO, "%s, lineno:%d, fmt:%s", path, lineno, fmt);
        if (f == NULL) {
                tst_brkm_(file, lineno, TBROK | TERRNO, cleanup_fn,
                        "Failed to open FILE '%s' for reading", path);
@@ -134,7 +134,6 @@ void safe_file_scanf(const char *file, const int lineno,
        }

        exp_convs = tst_count_scanf_conversions(fmt);
-
        va_start(va, fmt);
        ret = vfscanf(f, fmt, va);
        va_end(va);
@@ -145,6 +144,8 @@ void safe_file_scanf(const char *file, const int lineno,
                return;
        }

+       tst_resm_(file, lineno, TINFO, "Expected %i conversions got %i FILE '%s'",
+                       exp_convs, ret, path);
        if (ret != exp_convs) {
                tst_brkm_(file, lineno, TBROK, cleanup_fn,
                        "Expected %i conversions got %i FILE '%s'",
diff --git a/testcases/kernel/lib/ksm_helper.c b/testcases/kernel/lib/ksm_helper.c
index 586a0d1ee..ab424cca0 100644
--- a/testcases/kernel/lib/ksm_helper.c
+++ b/testcases/kernel/lib/ksm_helper.c
@@ -30,7 +30,9 @@ void wait_ksmd_full_scan(void)
        while (full_scans < at_least_one_full_scan) {
                sleep(1);
                count++;
+               tst_res(TINFO, "-> Before SAFE_FILE_SCANF count:%d, at_least_one_full_scan:%ld,  full_scans:%ld", count, at_least_one_full_scan, full_scans);
                SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
+               tst_res(TINFO, "-> After SAFE_FILE_SCANF at_least_one_full_scan:%ld,  full_scans:%ld", at_least_one_full_scan, full_scans);
        }

        tst_res(TINFO, "ksm daemon takes %ds to run two full scans", count);
diff --git a/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c b/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c
index 4d3299b61..fdd7b398f 100644
--- a/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c
+++ b/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c
@@ -72,7 +72,6 @@ static void setup(void)
        set_bit(new_nodes[1], nodes[1], 1);

        page_size = getpagesize();
-
        for (n = 0; n < N_PAGES; n++) {
                test_pages[n] = SAFE_MMAP(NULL, page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
                                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -95,6 +94,7 @@ static void setup(void)

        SAFE_FILE_SCANF(PATH_KSM "run", "%d", &orig_ksm_run);
        SAFE_FILE_PRINTF(PATH_KSM "run", "%d", 1);
+       tst_res(TINFO, "wait_ksmd_full_scan start: orig_ksm_run:%d", orig_ksm_run);
        wait_ksmd_full_scan();
 }

@@ -118,6 +118,7 @@ static void migrate_test(void)
 {
        int loop, i, ret;

+       tst_res(TINFO, "SAFE_SETEUID");
        SAFE_SETEUID(ltpuser->pw_uid);
        for (loop = 0; loop < N_LOOPS; loop++) {
                i = loop % 2;

Thanks!

xupengfe commented 3 months ago

One more thing, could we skip the ltp/testcases/kernel/syscalls/migrate_pages/migrate_pages03.c wait_ksmd_full_scan(); function? Because even we remove this step checking, this case could be passed. Thanks!

metan-ucw commented 4 weeks ago

No we can't remove the wait_ksmd_full_scan() because we need these pages to be merged by KSM before the test.

I have no idea why ksmd fails to perform a single scan on your machine. I've tried to run the test in QEMU on host kernel 6.9.3 and with two different guest kernels 5.14.21 on openSUSE and 6.1.0 on Debian and everything works fine.