Open llvmbot opened 7 years ago
given test.c:
typedef struct { unsigned char Src; unsigned char Dst; unsigned int Len; } ContextType;
void test(ContextType pContext) { unsigned char Src = (pContext->Src) - 26; unsigned int Offset = (pContext->Len) - 26; unsigned char pDst = pContext->Dst; unsigned int dest_p1 = ((unsigned int)(&Src[4])); unsigned short dest_p2 = ((unsigned short)(&Src[8])); unsigned short src_p1 = ((unsigned short)(&Src[10])); unsigned int src_p2 = ((unsigned int)(&Src[12])); unsigned short EType = ((unsigned short)(&Src[24])); ((unsigned int)(&pDst[4+Offset])) = dest_p1; ((unsigned short)(&pDst[8+Offset])) = dest_p2; ((unsigned short)(&pDst[10+Offset])) = src_p1; ((unsigned int)(&pDst[12+Offset])) = src_p2; ((unsigned short*)(&pDst[24+Offset])) = EType; }
clang -Os -mcpu=cortex-m3 test.c -o llvm.s generates:
@ BB#0: @ %entry .save {r4, r6, r7, lr} push {r4, r6, r7, lr} .setfp r7, sp, #8 add r7, sp, #8 ldrd r1, lr, [r0] ldr r0, [r0, #8] ldrh r12, [r1, #-2] ldrh r3, [r1, #-16] ldrh r2, [r1, #-18] ldr r4, [r1, #-22] ldr r1, [r1, #-14] add r0, lr str r4, [r0, #-22] strh r2, [r0, #-18] strh r3, [r0, #-16] str r1, [r0, #-14] strh r12, [r0, #-2] pop {r4, r6, r7, pc}
Since the offset are all negative, the ld/st are wide instructions. The expected code are like: subs r1, #26 subs r0, #26
ldrh r12, [r1, #-2] ldrh r3, [r1, #​10] ldrh r2, [r1, #​8] ldr r4, [r1, #​4] ldr r1, [r1, #​12] str r4, [r0, #​4] strh r2, [r0, #​8] strh r3, [r0, #​10] str r1, [r0, #​12]
when there are multiple ld/sts with the same base, pre-substract it will save code size
Extended Description
given test.c:
typedef struct { unsigned char Src; unsigned char Dst; unsigned int Len; } ContextType;
void test(ContextType pContext) { unsigned char Src = (pContext->Src) - 26; unsigned int Offset = (pContext->Len) - 26; unsigned char pDst = pContext->Dst; unsigned int dest_p1 = ((unsigned int)(&Src[4])); unsigned short dest_p2 = ((unsigned short)(&Src[8])); unsigned short src_p1 = ((unsigned short)(&Src[10])); unsigned int src_p2 = ((unsigned int)(&Src[12])); unsigned short EType = ((unsigned short)(&Src[24])); ((unsigned int)(&pDst[4+Offset])) = dest_p1; ((unsigned short)(&pDst[8+Offset])) = dest_p2; ((unsigned short)(&pDst[10+Offset])) = src_p1; ((unsigned int)(&pDst[12+Offset])) = src_p2; ((unsigned short*)(&pDst[24+Offset])) = EType; }
clang -Os -mcpu=cortex-m3 test.c -o llvm.s generates:
@ BB#0: @ %entry .save {r4, r6, r7, lr} push {r4, r6, r7, lr} .setfp r7, sp, #8 add r7, sp, #8 ldrd r1, lr, [r0] ldr r0, [r0, #8] ldrh r12, [r1, #-2] ldrh r3, [r1, #-16] ldrh r2, [r1, #-18] ldr r4, [r1, #-22] ldr r1, [r1, #-14] add r0, lr str r4, [r0, #-22] strh r2, [r0, #-18] strh r3, [r0, #-16] str r1, [r0, #-14] strh r12, [r0, #-2] pop {r4, r6, r7, pc}
Since the offset are all negative, the ld/st are wide instructions. The expected code are like: subs r1, #26 subs r0, #26