ClangBuiltLinux / linux

Linux kernel source tree
Other
242 stars 14 forks source link

modpost warning from specialized __create_pgd_mapping_locked() after LLVM commit cc7bb7080fc8 #1838

Open nathanchance opened 1 year ago

nathanchance commented 1 year ago

After https://github.com/llvm/llvm-project/commit/cc7bb7080fc8e6f4d217e7f9b971fbdbf091f9e7, I see a modpost warning when building ARCH=arm64 defconfig + CONFIG_LTO_CLANG_THIN=y:

# Flip on CONFIG_LTO_CLANG_THIN in menuconfig
$ make -skj"$(nproc)" ARCH=arm64 LLVM=1 O=build clean defconfig menuconfig vmlinux
WARNING: modpost: vmlinux.o: section mismatch in reference: __create_pgd_mapping_locked (section: .text.__create_pgd_mapping_locked.23) -> early_pgtable_alloc (section: .init.text)
WARNING: modpost: vmlinux.o: section mismatch in reference: __create_pgd_mapping_locked (section: .text.__create_pgd_mapping_locked.23) -> early_pgtable_alloc (section: .init.text)
WARNING: modpost: vmlinux.o: section mismatch in reference: __create_pgd_mapping_locked (section: .text.__create_pgd_mapping_locked.23) -> early_pgtable_alloc (section: .init.text)

This sounds like GCC's constprop optimization that is allowlisted in scripts/mod/modpost.c (Pattern 5 above secref_whitelist) but I am not sure; if it is, I am not sure how we would go about allowlisting this, since there is not a nice marker like constprop for these functions. __create_pgd_mapping_locked() is only called from __create_pgd_mapping(), which is only called with early_pgtable_alloc() as the pgtable_alloc argument in __init code, so I do not think that there is a bug here, but this warning should be addressed in some way.

nathanchance commented 1 year ago

Looking at this a little closer, I am not sure we can extend the current optim_symbols allowlist, as the symbol name is not specialized but rather the section name (due to LTO / -ffunction-sections), at least in this case. Something like the following would work but I am not sure that this is safe to do always?

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 9466b6a2abae..016d29619a68 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -846,6 +846,8 @@ static const char *const init_sections[] = { ALL_INIT_SECTIONS, NULL };

 /* all text sections */
 static const char *const text_sections[] = { ALL_TEXT_SECTIONS, NULL };
+static const char *const llvm_specialized_text_sections[] =
+       { ".text.*.[0-9]", ".text.*.[0-9][0-9]", NULL };

 /* data section */
 static const char *const data_sections[] = { DATA_SECTIONS, NULL };
@@ -1109,6 +1111,10 @@ static int secref_whitelist(const struct sectioncheck *mismatch,
        if (strstarts(fromsym, ".L"))
                return 0;

+       if (match(fromsec, llvm_specialized_text_sections) &&
+           match(tosec, init_sections))
+               return 0;
+
        return 1;
 }
nathanchance commented 1 year ago

@nickdesaulniers suggested seeing if __always_inline would help eliminate this warning. The following diff resolves this warning for me, as it avoids the specialization while allowing modpost to continue to audit the pgtable_alloc function pointer parameter for section mismatches.

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index af6bc8403ee4..1e0b3c9084db 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -366,11 +366,10 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
        pud_clear_fixmap();
 }

-static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
-                                       unsigned long virt, phys_addr_t size,
-                                       pgprot_t prot,
-                                       phys_addr_t (*pgtable_alloc)(int),
-                                       int flags)
+static __always_inline void
+__create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+                           phys_addr_t size, pgprot_t prot,
+                           phys_addr_t (*pgtable_alloc)(int), int flags)
 {
        unsigned long addr, end, next;
        pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
@@ -394,11 +393,10 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
        } while (pgdp++, addr = next, addr != end);
 }

-static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
-                                unsigned long virt, phys_addr_t size,
-                                pgprot_t prot,
-                                phys_addr_t (*pgtable_alloc)(int),
-                                int flags)
+static __always_inline void
+__create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+                    phys_addr_t size, pgprot_t prot,
+                    phys_addr_t (*pgtable_alloc)(int), int flags)
 {
        mutex_lock(&fixmap_lock);
        __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,

If this seems like a reasonable workaround or permanent fix, I will send it along for review upstream. I will do some code size measurements as well to see if this change is significant there.

nickdesaulniers commented 1 year ago

__create_pgd_mapping_locked() is only called from __create_pgd_mapping(), which is only called with early_pgtable_alloc() as the pgtable_alloc argument

The three specific calls are coming from __map_memblock, map_kernel_segment, and create_idmap.

If size is somehow a concern (as in, if we don't want to __always_inline __create_pgd_mapping into its 6 callers), we could instead:

  1. make those 3 fns call an __init variant of __create_early_pgd_mapping which hard codes the early_pgtable_alloc callback.
  2. mark __create_pgd_mapping_locked __always_inline as you did above since it would only have 2 call sites.
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index af6bc8403ee4..fba41288a5eb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -366,11 +366,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
    pud_clear_fixmap();
 }

-static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
-                   unsigned long virt, phys_addr_t size,
-                   pgprot_t prot,
-                   phys_addr_t (*pgtable_alloc)(int),
-                   int flags)
+static __always_inline
+void __create_pgd_mapping_locked(pgd_t *pgdir,phys_addr_t phys,
+                unsigned long virt, phys_addr_t size,
+                pgprot_t prot,
+                phys_addr_t (*pgtable_alloc)(int), int flags)
 {
    unsigned long addr, end, next;
    pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
@@ -406,6 +406,16 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
    mutex_unlock(&fixmap_lock);
 }

+static void __init __create_early_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+                         unsigned long virt,
+                         phys_addr_t size, pgprot_t prot,
+                         int flags) {
+   mutex_lock(&fixmap_lock);
+   __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
+                   early_pgtable_alloc, flags);
+   mutex_unlock(&fixmap_lock);
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 extern __alias(__create_pgd_mapping_locked)
 void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
@@ -494,8 +504,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
 static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
                  phys_addr_t end, pgprot_t prot, int flags)
 {
-   __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
-                prot, early_pgtable_alloc, flags);
+   __create_early_pgd_mapping(pgdp, start, __phys_to_virt(start),
+                  end - start, prot, flags);
 }

 void __init mark_linear_text_alias_ro(void)
@@ -648,8 +658,8 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
    BUG_ON(!PAGE_ALIGNED(pa_start));
    BUG_ON(!PAGE_ALIGNED(size));

-   __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
-                early_pgtable_alloc, flags);
+   __create_early_pgd_mapping(pgdp, pa_start, (unsigned long)va_start,
+                  size, prot, flags);

    if (!(vm_flags & VM_NO_GUARD))
        size += PAGE_SIZE;
@@ -765,8 +775,7 @@ static void __init create_idmap(void)
            __pgd(pgd_phys | P4D_TYPE_TABLE));
        pgd = __va(pgd_phys);
    }
-   __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
-                early_pgtable_alloc, 0);
+   __create_early_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, 0);

    if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
        extern u32 __idmap_kpti_flag;
ardbiesheuvel commented 1 year ago

One might argue that creating a specialized copy for a single caller that lives in .init.text should itself be emitted into .init.text. That would solve this issue, and as a bonus, move the cloned code into the .init section so it is discarded after boot.

I understand that adding such logic to the toolchain is problematic, though, so I'd prefer it if we could simply prevent the compiler from doing this const propagation.

Refactoring the logic to paper over it is my least favorite approach, as LTO is now a niche use case after the move to kCFI, and churn like this is not good for maintainability.