ClangBuiltLinux / linux

Linux kernel source tree
Other
241 stars 14 forks source link

i386 without CONFIG_X86_CMPXCHG64 "error: inline assembly requires more registers than available" in kernel/bpf/core.c after commit 95ece48165c1 in -next #2018

Closed nathanchance closed 4 months ago

nathanchance commented 6 months ago

After commit 95ece48165c1 ("locking/atomic/x86: Rewrite x86_32 archatomic64{,fetch}_{and,or,xor}() functions") in -next, I see the following error with Debian's i386 configuration:

$ curl -LSso .config https://github.com/nathanchance/llvm-kernel-testing/raw/9e1ae215de95b32d83b8635aceac3d47d261a0b2/configs/debian/i386.config

$ make -skj"$(nproc)" ARCH=i386 LLVM=1 olddefconfig kernel/bpf/core.o
In file included from kernel/bpf/core.c:21:
In file included from include/linux/filter.h:8:
In file included from include/linux/atomic.h:7:
In file included from arch/x86/include/asm/atomic.h:8:
In file included from arch/x86/include/asm/cmpxchg.h:143:
arch/x86/include/asm/cmpxchg_32.h:156:9: error: inline assembly requires more registers than available
  156 |         return __arch_try_cmpxchg64_emu(ptr, oldp, new);
      |                ^
arch/x86/include/asm/cmpxchg_32.h:138:15: note: expanded from macro '__arch_try_cmpxchg64_emu'
  138 |         asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE                       \
      |                      ^
arch/x86/include/asm/alternative.h:218:2: note: expanded from macro 'ALTERNATIVE'
  218 |         OLDINSTR(oldinstr, 1)                                           \
      |         ^
arch/x86/include/asm/alternative.h:168:2: note: expanded from macro 'OLDINSTR'
  168 |         "# ALT: oldnstr\n"                                              \
      |         ^
In file included from kernel/bpf/core.c:21:
In file included from include/linux/filter.h:8:
In file included from include/linux/atomic.h:7:
In file included from arch/x86/include/asm/atomic.h:8:
In file included from arch/x86/include/asm/cmpxchg.h:143:
arch/x86/include/asm/cmpxchg_32.h:156:9: error: inline assembly requires more registers than available
arch/x86/include/asm/cmpxchg_32.h:138:15: note: expanded from macro '__arch_try_cmpxchg64_emu'
  138 |         asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE                       \
      |                      ^
arch/x86/include/asm/alternative.h:218:2: note: expanded from macro 'ALTERNATIVE'
  218 |         OLDINSTR(oldinstr, 1)                                           \
      |         ^
arch/x86/include/asm/alternative.h:168:2: note: expanded from macro 'OLDINSTR'
  168 |         "# ALT: oldnstr\n"                                              \
      |         ^
In file included from kernel/bpf/core.c:21:
In file included from include/linux/filter.h:8:
In file included from include/linux/atomic.h:7:
In file included from arch/x86/include/asm/atomic.h:8:
In file included from arch/x86/include/asm/cmpxchg.h:143:
arch/x86/include/asm/cmpxchg_32.h:156:9: error: inline assembly requires more registers than available
arch/x86/include/asm/cmpxchg_32.h:138:15: note: expanded from macro '__arch_try_cmpxchg64_emu'
  138 |         asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE                       \
      |                      ^
arch/x86/include/asm/alternative.h:218:2: note: expanded from macro 'ALTERNATIVE'
  218 |         OLDINSTR(oldinstr, 1)                                           \
      |         ^
arch/x86/include/asm/alternative.h:168:2: note: expanded from macro 'OLDINSTR'
  168 |         "# ALT: oldnstr\n"                                              \
      |         ^
...

This code is only built when CONFIG_X86_CMPXCHG64 is disabled, which can be achieved by setting a lower processor baseline like Debian does.

$ echo 'CONFIG_M686=n
CONFIG_MGEODE_LX=y' >arch/x86/configs/repro.config

$ make -skj"$(nproc)" ARCH=i386 LLVM=1 {def,repro.}config kernel/bpf/core.o
<error like above>

cvise spits out a fun looking reproducer:

int arch_try_cmpxchg64_o, arch_try_cmpxchg64_n, ___bpf_prog_run_insn_0;
short ___bpf_prog_run_insn_1;
long ___bpf_prog_run_regs;
void arch_try_cmpxchg64(long long *ptr) {
  asm(".pushsection .altinstructions,\"a\"\n"
      ".popsection\n"
      : "+m"(*ptr), "+a"(arch_try_cmpxchg64_o), "+d"(arch_try_cmpxchg64_o)
      : "b"(arch_try_cmpxchg64_n), "c"(0), "S"(ptr));
}
typedef struct {
  long long counter;
} atomic64_t;
long atomic64_fetch_xor(atomic64_t *v) {
  atomic64_t *__trans_tmp_7 = v, *__trans_tmp_5 = __trans_tmp_7,
             *__trans_tmp_9 = __trans_tmp_5;
  v = __trans_tmp_9;
  arch_try_cmpxchg64(&v->counter);
  return 0;
}
long ___bpf_prog_run() {
  void *jumptable[] = {&&default_label,
                       &&ALU_ADD_X,
                       &&ALU_SUB_X,
                       &&ALU_AND_X,
                       &&ALU_OR_X,
                       &&ALU_LSH_X,
                       &&ALU_RSH_X,
                       &&ALU_XOR_X,
                       &&ALU_MUL_X,
                       &&ALU_MOV_X,
                       &&ALU_ARSH_X,
                       &&ALU_DIV_X,
                       &&ALU_MOD_X,
                       &&ALU_NEG,
                       &&ALU_END_TO_BE,
                       &&ALU_END_TO_LE,
                       &&ALU_ADD_K,
                       &&ALU_SUB_K,
                       &&ALU_AND_K,
                       &&ALU_OR_K,
                       &&ALU_LSH_K,
                       &&ALU_RSH_K,
                       &&ALU_XOR_K,
                       &&ALU_MUL_K,
                       &&ALU_MOV_K,
                       &&ALU_ARSH_K,
                       &&ALU_DIV_K,
                       &&ALU_MOD_K,
                       &&ALU64_ADD_X,
                       &&ALU64_SUB_X,
                       &&ALU64_AND_X,
                       &&ALU64_OR_X,
                       &&ALU64_LSH_X,
                       &&ALU64_RSH_X,
                       &&ALU64_XOR_X,
                       &&ALU64_MUL_X,
                       &&ALU64_MOV_X,
                       &&ALU64_ARSH_X,
                       &&ALU64_DIV_X,
                       &&ALU64_MOD_X,
                       &&ALU64_NEG,
                       &&ALU64_END_TO_LE,
                       &&ALU64_ADD_K,
                       &&ALU64_SUB_K,
                       &&ALU64_AND_K,
                       &&ALU64_OR_K,
                       &&ALU64_LSH_K,
                       &&ALU64_RSH_K,
                       &&ALU64_XOR_K,
                       &&ALU64_MUL_K,
                       &&ALU64_MOV_K,
                       &&ALU64_ARSH_K,
                       &&ALU64_DIV_K,
                       &&ALU64_MOD_K,
                       &&JMP_CALL,
                       &&JMP_EXIT,
                       &&JMP32_JEQ_X,
                       &&JMP32_JNE_X,
                       &&JMP32_JGT_X,
                       &&JMP32_JLT_X,
                       &&JMP32_JGE_X,
                       &&JMP32_JLE_X,
                       &&JMP32_JSGT_X,
                       &&JMP32_JSLT_X,
                       &&JMP32_JSGE_X,
                       &&JMP32_JSLE_X,
                       &&JMP32_JSET_X,
                       &&JMP32_JEQ_K,
                       &&JMP32_JGT_K,
                       &&JMP32_JLT_K,
                       &&JMP32_JGE_K,
                       &&JMP32_JLE_K,
                       &&JMP32_JSGT_K,
                       &&JMP32_JSLT_K,
                       &&JMP32_JSGE_K,
                       &&JMP32_JSLE_K,
                       &&JMP32_JSET_K,
                       &&JMP_JEQ_X,
                       &&JMP_JNE_X,
                       &&JMP_JGT_X,
                       &&JMP_JLT_X,
                       &&JMP_JGE_X,
                       &&JMP_JLE_X,
                       &&JMP_JSGT_X,
                       &&JMP_JSLT_X,
                       &&JMP_JSGE_X,
                       &&JMP_JSLE_X,
                       &&JMP_JSET_X,
                       &&JMP_JEQ_K,
                       &&JMP_JNE_K,
                       &&JMP_JGT_K,
                       &&JMP_JLT_K,
                       &&JMP_JGE_K,
                       &&JMP_JLE_K,
                       &&JMP_JSGT_K,
                       &&JMP_JSLT_K,
                       &&JMP_JSGE_K,
                       &&JMP_JSLE_K,
                       &&JMP_JSET_K,
                       &&JMP_JA,
                       &&JMP32_JA,
                       &&STX_MEM_B,
                       &&STX_MEM_H,
                       &&STX_MEM_W,
                       &&STX_MEM_DW,
                       &&STX_ATOMIC_W,
                       &&STX_ATOMIC_DW,
                       &&ST_MEM_B,
                       &&ST_MEM_H,
                       &&ST_MEM_W,
                       &&ST_MEM_DW,
                       &&LDX_MEM_B,
                       &&LDX_MEM_H,
                       &&LDX_MEM_W,
                       &&LDX_MEM_DW,
                       &&LDX_MEMSX_B,
                       &&LDX_MEMSX_H,
                       &&LDX_MEMSX_W,
                       &&LD_IMM_DW,
                       &&JMP_CALL_ARGS,
                       &&JMP_TAIL_CALL,
                       &&ST_NOSPEC,
                       &&LDX_PROBE_MEM_B,
                       &&LDX_PROBE_MEM_H,
                       &&LDX_PROBE_MEM_W,
                       &&LDX_PROBE_MEM_DW,
                       &&LDX_PROBE_MEMSX_B,
                       &&LDX_PROBE_MEMSX_H,
                       &&LDX_PROBE_MEMSX_W};
select_insn:
  goto *jumptable[___bpf_prog_run_insn_0];
ALU64_ADD_X:
ALU_ADD_X:
ALU64_ADD_K:
ALU_ADD_K:
ALU64_SUB_X:
ALU_SUB_X:
ALU64_SUB_K:
ALU_SUB_K:
ALU64_AND_X:
ALU_AND_X:
ALU64_AND_K:
ALU_AND_K:
ALU64_OR_X:
ALU_OR_X:
ALU64_OR_K:
ALU_OR_K:
ALU64_XOR_X:
ALU_XOR_X:
ALU64_XOR_K:
ALU_XOR_K:
ALU64_MUL_X:
ALU_MUL_X:
ALU64_MUL_K:
ALU_MUL_K:
ALU64_LSH_X:
ALU_LSH_X:
ALU64_LSH_K:
ALU_LSH_K:
ALU64_RSH_X:
ALU_RSH_X:
ALU64_RSH_K:
ALU_RSH_K:
ALU_NEG:
ALU64_NEG:
ALU_MOV_X:
ALU_MOV_K:
ALU64_MOV_X:
ALU64_MOV_K:
LD_IMM_DW:
ALU_ARSH_X:
ALU_ARSH_K:
ALU64_ARSH_X:
ALU64_ARSH_K:
ALU64_MOD_X:
ALU_MOD_X:
ALU64_MOD_K:
ALU_MOD_K:
ALU64_DIV_X:
ALU_DIV_X:
ALU64_DIV_K:
ALU_DIV_K:
ALU_END_TO_BE:
ALU_END_TO_LE:
ALU64_END_TO_LE:
JMP_CALL:
JMP_CALL_ARGS:
JMP_TAIL_CALL: {
  int *map = (int *)___bpf_prog_run_regs, *array = ({
    void *__mptr = map;
    __mptr;
  });
  goto out;
  ({ array; });
out:
  goto select_insn;
}
JMP_JA:
JMP32_JA:
JMP_EXIT:
JMP_JEQ_X:
JMP32_JEQ_X:
JMP_JEQ_K:
JMP32_JEQ_K:
JMP_JNE_X:
JMP32_JNE_X:
JMP_JNE_K:
JMP32_JNE_K:
JMP_JGT_X:
JMP32_JGT_X:
JMP_JGT_K:
JMP32_JGT_K:
JMP_JLT_X:
JMP32_JLT_X:
JMP_JLT_K:
JMP32_JLT_K:
JMP_JGE_X:
JMP32_JGE_X:
JMP_JGE_K:
JMP32_JGE_K:
JMP_JLE_X:
JMP32_JLE_X:
JMP_JLE_K:
JMP32_JLE_K:
JMP_JSET_X:
JMP32_JSET_X:
JMP_JSET_K:
JMP32_JSET_K:
JMP_JSGT_X:
JMP32_JSGT_X:
JMP_JSGT_K:
JMP32_JSGT_K:
JMP_JSLT_X:
JMP32_JSLT_X:
JMP_JSLT_K:
JMP32_JSLT_K:
JMP_JSGE_X:
JMP32_JSGE_X:
JMP_JSGE_K:
JMP32_JSGE_K:
JMP_JSLE_X:
JMP32_JSLE_X:
JMP_JSLE_K:
JMP32_JSLE_K:
ST_NOSPEC:
STX_MEM_B:
ST_MEM_B:
LDX_MEM_B:
LDX_PROBE_MEM_B:
STX_MEM_H:
ST_MEM_H:
LDX_MEM_H:
LDX_PROBE_MEM_H:
STX_MEM_W:
ST_MEM_W:
LDX_MEM_W:
LDX_PROBE_MEM_W:
STX_MEM_DW:
ST_MEM_DW:
LDX_MEM_DW:
LDX_PROBE_MEM_DW:
LDX_MEMSX_B:
LDX_PROBE_MEMSX_B:
LDX_MEMSX_H:
LDX_PROBE_MEMSX_H:
LDX_MEMSX_W:
LDX_PROBE_MEMSX_W:
STX_ATOMIC_DW:
STX_ATOMIC_W:
  atomic64_fetch_xor((atomic64_t *)___bpf_prog_run_regs +
                     ___bpf_prog_run_insn_1);
default_label:
  return 0;
}

It only appears with -fno-omit-frame-pointer and -O2 though.

$ clang --target=x86_64-linux-gnu -m32 -Wall -Wextra -c -o /dev/null core.i -O0

$ clang --target=x86_64-linux-gnu -m32 -Wall -Wextra -c -o /dev/null core.i -O2

$ clang --target=x86_64-linux-gnu -m32 -Wall -Wextra -c -o /dev/null core.i -O0 -fno-omit-frame-pointer

$ clang --target=x86_64-linux-gnu -m32 -Wall -Wextra -c -o /dev/null core.i -O2 -fno-omit-frame-pointer
core.i:5:7: error: inline assembly requires more registers than available
    5 |   asm(".pushsection .altinstructions,\"a\"\n"
      |       ^
1 error generated.
nathanchance commented 4 months ago

Linus fixed this in 6.10-rc6 with https://git.kernel.org/linus/769327258a141ba80ac8b96fce35c68631228370