[Question] Weird AArch64 assemblies

wormtql commented 7 months ago

LLVM: 17.0.2 When I compile libgmp into AArch64 assemblies (clang++ -O3), I got some weird instructions:

.LBB28_35:
sub x8, x29, #312
ldur    x8, [x8, #-256]                 // 8-byte Folded Reload
lsr x8, x8, #0                       // that's weird
add x8, x8, #15
and x9, x8, #0xfffffffffffffff0
mov x8, sp
subs    x0, x8, x9
mov sp, x0
sub x8, x29, #328
stur    x0, [x8, #-256]                 // 8-byte Folded Spill
b   .LBB28_37

the instruction lsr x8, x8, #0 is confusing. To my best knowledge, I think it just did nothing

another problem:

.LBB200_16:
ldr x8, [sp, #72]                   // 8-byte Folded Reload
ldr x12, [sp, #88]                  // 8-byte Folded Reload
ldur    x11, [x29, #-24]                // 8-byte Folded Reload
ldur    w13, [x29, #-68]                // 4-byte Folded Reload
ldur    w14, [x29, #-72]                // 4-byte Folded Reload
ldr x9, [sp, #96]                   // 8-byte Folded Reload
mov w10, w9                         // move to w10
and w10, w10, w14, lsl #1       // but immediately overwrite w10
eor w10, w10, w13
asr x13, x11, #63
add x11, x11, x13
eor x11, x11, x13
stur    x12, [x29, #-112]               // 8-byte Folded Spill
stur    x11, [x29, #-104]               // 8-byte Folded Spill
stur    w10, [x29, #-92]                // 4-byte Folded Spill
stur    x9, [x29, #-88]                 // 8-byte Folded Spill
stur    x8, [x29, #-80]                 // 8-byte Folded Spill
b   .LBB200_17

why not use one instruction and w10, w9, w14, lsl #1 instead of the two instruction above?

another problem:

add x11, x11, x13
subs    x8, x8, x11
cset    w14, ne
                                      // implicit-def: $x8
mov w8, w14
sbfx    x11, x8, #0, #1
mov w8, #2                          // =0x2
                                      // kill: def $x8 killed $w8
and w14, w14, #0x1            // I think this instruction is redundant?
ands    w14, w14, #0x1
csinc   x8, x8, xzr, ne
subs    x10, x10, #31
add x12, x12, x13
subs    x10, x10, x12

llvmbot commented 7 months ago

@llvm/issue-subscribers-backend-aarch64

Author: wormtql (wormtql)

LLVM: 17.0.2 When I compile libgmp into AArch64 assemblies (clang++ -O3), I got some weird instructions: ``` .LBB28_35: sub x8, x29, #312 ldur x8, [x8, #-256] // 8-byte Folded Reload lsr x8, x8, #0 // that's weird add x8, x8, #15 and x9, x8, #0xfffffffffffffff0 mov x8, sp subs x0, x8, x9 mov sp, x0 sub x8, x29, #328 stur x0, [x8, #-256] // 8-byte Folded Spill b .LBB28_37 ``` the instruction `lsr x8, x8, #0` is confusing. To my best knowledge, I think it just did nothing another problem: ``` .LBB200_16: ldr x8, [sp, #72] // 8-byte Folded Reload ldr x12, [sp, #88] // 8-byte Folded Reload ldur x11, [x29, #-24] // 8-byte Folded Reload ldur w13, [x29, #-68] // 4-byte Folded Reload ldur w14, [x29, #-72] // 4-byte Folded Reload ldr x9, [sp, #96] // 8-byte Folded Reload mov w10, w9 // move to w10 and w10, w10, w14, lsl #1 // but immediately overwrite w10 eor w10, w10, w13 asr x13, x11, #63 add x11, x11, x13 eor x11, x11, x13 stur x12, [x29, #-112] // 8-byte Folded Spill stur x11, [x29, #-104] // 8-byte Folded Spill stur w10, [x29, #-92] // 4-byte Folded Spill stur x9, [x29, #-88] // 8-byte Folded Spill stur x8, [x29, #-80] // 8-byte Folded Spill b .LBB200_17 ``` why not use one instruction `and w10, w9, w14, lsl #1` instead of the two instruction above? another problem: ``` add x11, x11, x13 subs x8, x8, x11 cset w14, ne // implicit-def: $x8 mov w8, w14 sbfx x11, x8, #0, #1 mov w8, #2 // =0x2 // kill: def $x8 killed $w8 and w14, w14, #0x1 // I think this instruction is redundant? ands w14, w14, #0x1 csinc x8, x8, xzr, ne subs x10, x10, #31 add x12, x12, x13 subs x10, x10, x12 ```

davemgreen commented 6 months ago

Hi. Do you have reproducers for these problems? Thanks.

llvm / llvm-project

[Question] Weird AArch64 assemblies #86918