earlephilhower / esp-quick-toolchain

GCC toolchain for esp8266/arduino on MacOS, Linux, ARM64, Raspberry Pi, and Windows
87 stars 24 forks source link

GCC: Try to avoid using L32R to load 32/64bit constant #20

Closed jjsuwa-sys3175 closed 3 years ago

jjsuwa-sys3175 commented 3 years ago

example:

/* MOVI.N + SLLI  */
unsigned int test_reverse_msb(unsigned int a) {
    return a ^ 0x80000000U;  /* -0x20 << 26 */
}

/* split 64-bit constant loading */
unsigned long long test_64bit_const_0(void) {
    return 0xFFFFFFFFFFFFFFFFULL;  /* lowpart: -1, highpart: -1 */
}
unsigned long long test_64bit_const_1(void) {
    return 0x5000000000AC0000ULL;  /* lowpart: 0x56 << 17,  highpart: 0x50 << 24 */
}
unsigned long long test_64bit_const_2(void) {
    return 0x7D9FD4312C64B4DAULL;  /* 2 pairs of `L32R` + dword const in litpool */
}

results:

[before]
    .file   "test.c"
    .text
    .literal_position
    .literal .LC0, -2147483648
    .align  4
    .global test_reverse_msb
    .type   test_reverse_msb, @function
test_reverse_msb:
    l32r    a3, .LC0
    xor a2, a2, a3
    ret.n
    .size   test_reverse_msb, .-test_reverse_msb
    .literal_position
    .literal .LC1, -1, -1
    .align  4
    .global test_64bit_const_0
    .type   test_64bit_const_0, @function
test_64bit_const_0:
    l32r    a2, .LC1
    l32r    a3, .LC1+4
    ret.n
    .size   test_64bit_const_0, .-test_64bit_const_0
    .literal_position
    .literal .LC2, 11272192, 1342177280
    .align  4
    .global test_64bit_const_1
    .type   test_64bit_const_1, @function
test_64bit_const_1:
    l32r    a2, .LC2
    l32r    a3, .LC2+4
    ret.n
    .size   test_64bit_const_1, .-test_64bit_const_1
    .literal_position
    .literal .LC3, 744797402, 2107626545
    .align  4
    .global test_64bit_const_2
    .type   test_64bit_const_2, @function
test_64bit_const_2:
    l32r    a2, .LC3
    l32r    a3, .LC3+4
    ret.n
    .size   test_64bit_const_2, .-test_64bit_const_2
    .ident  "GCC: (GNU) 10.2.0"
[after]
    .file   "test.c"
    .text
    .literal_position
    .align  4
    .global test_reverse_msb
    .type   test_reverse_msb, @function
test_reverse_msb:
    movi.n  a3, -0x20
    slli    a3, a3, 26
    xor a2, a2, a3
    ret.n
    .size   test_reverse_msb, .-test_reverse_msb
    .literal_position
    .align  4
    .global test_64bit_const_0
    .type   test_64bit_const_0, @function
test_64bit_const_0:
    movi.n  a3, -1
    mov.n   a2, a3
    ret.n
    .size   test_64bit_const_0, .-test_64bit_const_0
    .literal_position
    .align  4
    .global test_64bit_const_1
    .type   test_64bit_const_1, @function
test_64bit_const_1:
    movi.n  a2, 0x56
    movi.n  a3, 0x50
    slli    a2, a2, 17
    slli    a3, a3, 24
    ret.n
    .size   test_64bit_const_1, .-test_64bit_const_1
    .literal_position
    .literal .LC0, 744797402
    .literal .LC1, 2107626545
    .align  4
    .global test_64bit_const_2
    .type   test_64bit_const_2, @function
test_64bit_const_2:
    l32r    a2, .LC0
    l32r    a3, .LC1
    ret.n
    .size   test_64bit_const_2, .-test_64bit_const_2
    .ident  "GCC: (GNU) 10.2.0"
jcmvbkbc commented 3 years ago

@jjsuwa-sys3175 I agree with the idea, but I'd do it a bit differently:

Something like this:

diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index be1eb21a0b60..8d3ce39da73d 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -1082,6 +1082,21 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode)

       if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16)
        {
+         /* Try to emit MOVI + SLLI sequence, that is smaller
+            than L32R + literal.  */
+         if (optimize_size && mode == SImode && register_operand (dst, mode))
+           {
+             HOST_WIDE_INT srcval = INTVAL (src);
+             int shift = ctz_hwi (srcval);
+
+             if ( shift > 0 && xtensa_simm12b (srcval >> shift))
+               {
+                 emit_move_insn (dst, GEN_INT (srcval >> shift));
+                 emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift)));
+                 return 1;
+               }
+           }
+
          src = force_const_mem (SImode, src);
          operands[1] = src;
        }
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 671c4bea144f..5fbe4ad4af9f 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -727,8 +727,23 @@
        (match_operand:DI 1 "general_operand" ""))]
   ""
 {
-  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
-    operands[1] = force_const_mem (DImode, operands[1]);
+  if (CONSTANT_P (operands[1]))
+    {
+      /* Split in halves if 64-bit Const-to-Reg moves
+        because of offering further optimization opportunities.  */
+      if (register_operand (operands[0], DImode))
+       {
+         rtx first, second;
+
+         split_double (operands[1], &first, &second);
+         emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first));
+         emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second));
+         DONE;
+       }
+
+      if (!TARGET_CONST16)
+       operands[1] = force_const_mem (DImode, operands[1]);
+    }

   if (!register_operand (operands[0], DImode)
       && !register_operand (operands[1], DImode))
jjsuwa-sys3175 commented 3 years ago

@jcmvbkbc LGTM, and i respect your decision. thx. (note: the patch above is detabbed and thus will be rejected)

built the toolchain indeed and checked the assembly emission... (GCC options: -Os -fno-inline-functions -mlongcalls -mtext-section-literals -Wextra -S)

unsigned int test_reverse_msb(unsigned int a) {
    return a ^ 0x80000000U;  /* -1 << 31 */
}

unsigned long long test_64bit_const_0(void) {
    return 0xFFFFFFFFFFFFFFFFULL;  /* lowpart: -1, highpart: -1 */
}

unsigned long long test_64bit_const_1(void) {
    return 0x5550000000AC0000ULL;  /* lowpart: 0x2b << 18,  highpart: 0x555 << 20 */
}
    .file   "test.c"
    .text
    .literal_position
    .align  4
    .global test_reverse_msb
    .type   test_reverse_msb, @function
test_reverse_msb:
    movi.n  a3, -1
    slli    a3, a3, 31
    xor a2, a2, a3
    ret.n
    .size   test_reverse_msb, .-test_reverse_msb
    .literal_position
    .align  4
    .global test_64bit_const_0
    .type   test_64bit_const_0, @function
test_64bit_const_0:
    movi.n  a3, -1
    mov.n   a2, a3
    ret.n
    .size   test_64bit_const_0, .-test_64bit_const_0
    .literal_position
    .align  4
    .global test_64bit_const_1
    .type   test_64bit_const_1, @function
test_64bit_const_1:
    movi.n  a2, 0x2b
    movi    a3, 0x555
    slli    a2, a2, 18
    slli    a3, a3, 20
    ret.n
    .size   test_64bit_const_1, .-test_64bit_const_1
    .ident  "GCC: (GNU) 10.2.0"

something went wrong... yes, seems to be good.

jcmvbkbc commented 3 years ago

Ok, I'll submit it then. @jjsuwa-sys3175 May I specify you as the author? If so what are the name and email that should I use?

jjsuwa-sys3175 commented 3 years ago

May I specify you as the author?

of course yes :)

If so what's your email that should I use?

jjsuwa_sys3175@yahoo.co.jp (primary), or jjsuwa.sys3175@gmail.com (alternative) i'd receive both addresses.

jcmvbkbc commented 3 years ago

Done