ewlu / gcc-precommit-ci

1 stars 0 forks source link

Patch Status 34061-RISCV_Use_widening_shift_for_scattergather_if_applicable-1 #1557

Open github-actions[bot] opened 2 weeks ago

github-actions[bot] commented 2 weeks ago

Precommit CI Run information

Logs can be found in the associated Github Actions run: https://github.com/ewlu/gcc-precommit-ci/actions/runs/9130778601

Patch information

Applied patches: 1 -> 1 Associated series: https://patchwork.sourceware.org/project/gcc/list/?series=34061 Last patch applied: https://patchwork.sourceware.org/project/gcc/patch/e871b68e-3ee4-4d94-bf44-ef98efc70911@gmail.com/ Patch id: 90387

Build Targets

Some targets are built as multilibs. If a build target ends with multilib, please refer to the table below to see all the targets within that multilib. Target name -march string
newlib-rv64gc-lp64d-multilib rv32gc-ilp32d, rv64gc-lp64d
newlib-rv64gcv-lp64d-multilib rv64gcv-lp64d
linux-rv64gcv-lp64d-multilib rv32gcv-ilp32d, rv64gcv-lp64d

Target Information

Target Shorthand -march string
Bitmanip gc_zba_zbb_zbc_zbs

Notes

Testsuite results use a more lenient allowlist to reduce error reporting with flakey tests. Please take a look at the current allowlist. Results come from a sum file comparator. Each patch is applied to a well known, non-broken baseline taken from our gcc postcommit framework (here) which runs the full gcc testsuite every 6 hours. If you have any questions or encounter any issues which may seem like false-positives, please contact us at patchworks-ci@rivosinc.com

github-actions[bot] commented 2 weeks ago

Lint Status

The following issues have been found with 34061-RISCV_Use_widening_shift_for_scattergather_if_applicable-1 using gcc's ./contrib/check_GNU_style.py. Please use your best judgement when resolving these issues. These are only warnings and do not need to be resolved in order to merge your patch. If any of these warnings seem like false-positives that could be guarded against please contact me: patchworks-ci@rivosinc.com.

=== ERROR type #1: blocks of 8 spaces should be replaced with tabs (2 error(s)) ===
gcc/config/riscv/vector-crypto.md:298:0:████████ (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "vrvk"))
gcc/config/riscv/vector-crypto.md:319:0:████████ (match_operand 4 "pmode_reg_or_uimm5_operand"      "   rK,    rK"))

=== ERROR type #2: lines should not exceed 80 characters (3 error(s)) ===
gcc/config/riscv/riscv-v.cc:4073:80:              emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
gcc/config/riscv/vector-crypto.md:319:80:         (match_operand 4 "pmode_reg_or_uimm5_operand"                "   rK,    rK"))
gcc/config/riscv/vector.md:756:80:                                vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vwsll")

Additional information

github-actions[bot] commented 2 weeks ago

Apply Status

Target Status
Baseline hash: https://github.com/gcc-mirror/gcc/commit/d477d683d5c6db90c80d348c795709ae6444ba7a Failed
Tip of tree hash: https://github.com/gcc-mirror/gcc/commit/b59de4113262f2bee14147eb17eb3592f03d9556 Failed

Command

> git am ../patches/*.patch --whitespace=fix -q --3way --empty=drop

Output

error: sha1 information is lacking or useless (gcc/config/riscv/vector-crypto.md).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
hint: When you have resolved this problem, run "git am --continue".
hint: If you prefer to skip this patch, run "git am --skip" instead.
hint: To restore the original branch and stop patching, run "git am --abort".
hint: Disable this message with "git config advice.mergeConflict false"
Patch failed at 0001 RISC-V: Use widening shift for scatter/gather if applicable.
---
 gcc/config/riscv/riscv-v.cc                   |  42 +++++--
 gcc/config/riscv/vector-crypto.md             |   4 +-
 gcc/config/riscv/vector.md                    |   4 +-
 .../gather-scatter/gather_load_64-12-zvbb.c   | 113 ++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |  48 +++++++-
 5 files changed, 193 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 814c5febabe..8b41b9c7774 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4016,7 +4016,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
 {
   rtx ptr, vec_offset, vec_reg;
   bool zero_extend_p;
-  int scale_log2;
+  int shift;
   rtx mask = ops[5];
   rtx len = ops[6];
   if (is_load)
@@ -4025,7 +4025,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[1];
       vec_offset = ops[2];
       zero_extend_p = INTVAL (ops[3]);
-      scale_log2 = exact_log2 (INTVAL (ops[4]));
+      shift = exact_log2 (INTVAL (ops[4]));
     }
   else
     {
@@ -4033,7 +4033,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[0];
       vec_offset = ops[1];
       zero_extend_p = INTVAL (ops[2]);
-      scale_log2 = exact_log2 (INTVAL (ops[3]));
+      shift = exact_log2 (INTVAL (ops[3]));
     }

   machine_mode vec_mode = GET_MODE (vec_reg);
@@ -4043,9 +4043,12 @@ expand_gather_scatter (rtx *ops, bool is_load)
   poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
   bool is_vlmax = is_vlmax_len_p (vec_mode, len);

+  bool use_widening_shift = false;
+
   /* Extend the offset element to address width.  */
   if (inner_offsize < BITS_PER_WORD)
     {
+      use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1;
       /* 7.2. Vector Load/Store Addressing Modes.
     If the vector offset elements are narrower than XLEN, they are
     zero-extended to XLEN before adding to the ptr effective address. If
@@ -4054,8 +4057,8 @@ expand_gather_scatter (rtx *ops, bool is_load)
     raise an illegal instruction exception if the EEW is not supported for
     offset elements.

-    RVV spec only refers to the scale_log == 0 case.  */
-      if (!zero_extend_p || scale_log2 != 0)
+    RVV spec only refers to the shift == 0 case.  */
+      if (!zero_extend_p || shift)
    {
      if (zero_extend_p)
        inner_idx_mode
@@ -4064,19 +4067,32 @@ expand_gather_scatter (rtx *ops, bool is_load)
        inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require ();
      machine_mode new_idx_mode
        = get_vector_mode (inner_idx_mode, nunits).require ();
-     rtx tmp = gen_reg_rtx (new_idx_mode);
-     emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
-                     zero_extend_p ? true : false));
-     vec_offset = tmp;
+     if (!use_widening_shift)
+       {
+         rtx tmp = gen_reg_rtx (new_idx_mode);
+         emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
+                     zero_extend_p ? true : false));
+         vec_offset = tmp;
+       }
      idx_mode = new_idx_mode;
    }
     }

-  if (scale_log2 != 0)
+  if (shift)
     {
-      rtx tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
-                 gen_int_mode (scale_log2, Pmode), NULL_RTX, 0,
-                 OPTAB_DIRECT);
+      rtx tmp;
+      if (!use_widening_shift)
+   tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
+               gen_int_mode (shift, Pmode), NULL_RTX, 0,
+               OPTAB_DIRECT);
+      else
+   {
+     tmp = gen_reg_rtx (idx_mode);
+     insn_code icode = code_for_pred_vwsll_scalar (idx_mode);
+     rtx ops[] = {tmp, vec_offset, const1_rtx};
+     emit_vlmax_insn (icode, BINARY_OP, ops);
+   }
+
       vec_offset = tmp;
     }

diff --git a/gcc/config/riscv/vector-crypto.md b/gcc/config/riscv/vector-crypto.md
index 24822e2712c..0ddc2f3f3c6 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -295,7 +295,7 @@ (define_insn "@pred_vwsll<mode>"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "vr"))
-         (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"  "vr"))
+         (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "vrvk"))
        (match_operand:VWEXTI 2 "vector_merge_operand" "0vu")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
@@ -316,7 +316,7 @@ (define_insn "@pred_vwsll<mode>_scalar"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "   vr,    vr"))
-         (match_operand:<VSUBEL> 4 "pmode_reg_or_uimm5_operand" "   rK,    rK"))
+         (match_operand 4 "pmode_reg_or_uimm5_operand"     "   rK,    rK"))
        (match_operand:VWEXTI 2 "vector_merge_operand"           "   vu,    0")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 248461302dd..c6a3845dc13 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -750,10 +750,10 @@ (define_attr "mode_idx" ""
           (const_int 1)

           (eq_attr "type" "vssegte,vmpop,vmffs")
-          (const_int 2)       
+          (const_int 2)

           (eq_attr "type" "vstux,vstox,vssegts,vssegtux,vssegtox,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
-               vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo")
+               vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vwsll")
           (const_int 3)

           (eq_attr "type" "viwalu,viwmul,viwmuladd,vfwalu,vfwmul,vfwmuladd")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
new file mode 100644
index 00000000000..11a4031f47b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
@@ -0,0 +1,113 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-fno-vect-cost-model -fdump-tree-vect-details -mrvv-max-lmul=m4" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(DATA_TYPE, INDEX_TYPE)                                       \
+  void __attribute__ ((noinline, noclone))                                     \
+  f_##DATA_TYPE##_##INDEX_TYPE (DATA_TYPE *restrict y, DATA_TYPE *restrict x,  \
+               INDEX_TYPE *restrict index)                    \
+  {                                                                            \
+    for (int i = 0; i < 100; ++i)                                              \
+      {                                                                        \
+   y[i * 2] = x[index[i * 2]] + 1;                                        \
+   y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;                                \
+      }                                                                        \
+  }
+
+TEST_LOOP (int8_t, int8_t)
+TEST_LOOP (uint8_t, int8_t)
+TEST_LOOP (int16_t, int8_t)
+TEST_LOOP (uint16_t, int8_t)
+TEST_LOOP (int32_t, int8_t)
+TEST_LOOP (uint32_t, int8_t)
+TEST_LOOP (int64_t, int8_t)
+TEST_LOOP (uint64_t, int8_t)
+TEST_LOOP (_Float16, int8_t)
+TEST_LOOP (float, int8_t)
+TEST_LOOP (double, int8_t)
+TEST_LOOP (int8_t, int16_t)
+TEST_LOOP (uint8_t, int16_t)
+TEST_LOOP (int16_t, int16_t)
+TEST_LOOP (uint16_t, int16_t)
+TEST_LOOP (int32_t, int16_t)
+TEST_LOOP (uint32_t, int16_t)
+TEST_LOOP (int64_t, int16_t)
+TEST_LOOP (uint64_t, int16_t)
+TEST_LOOP (_Float16, int16_t)
+TEST_LOOP (float, int16_t)
+TEST_LOOP (double, int16_t)
+TEST_LOOP (int8_t, int32_t)
+TEST_LOOP (uint8_t, int32_t)
+TEST_LOOP (int16_t, int32_t)
+TEST_LOOP (uint16_t, int32_t)
+TEST_LOOP (int32_t, int32_t)
+TEST_LOOP (uint32_t, int32_t)
+TEST_LOOP (int64_t, int32_t)
+TEST_LOOP (uint64_t, int32_t)
+TEST_LOOP (_Float16, int32_t)
+TEST_LOOP (float, int32_t)
+TEST_LOOP (double, int32_t)
+TEST_LOOP (int8_t, int64_t)
+TEST_LOOP (uint8_t, int64_t)
+TEST_LOOP (int16_t, int64_t)
+TEST_LOOP (uint16_t, int64_t)
+TEST_LOOP (int32_t, int64_t)
+TEST_LOOP (uint32_t, int64_t)
+TEST_LOOP (int64_t, int64_t)
+TEST_LOOP (uint64_t, int64_t)
+TEST_LOOP (_Float16, int64_t)
+TEST_LOOP (float, int64_t)
+TEST_LOOP (double, int64_t)
+TEST_LOOP (int8_t, uint8_t)
+TEST_LOOP (uint8_t, uint8_t)
+TEST_LOOP (int16_t, uint8_t)
+TEST_LOOP (uint16_t, uint8_t)
+TEST_LOOP (int32_t, uint8_t)
+TEST_LOOP (uint32_t, uint8_t)
+TEST_LOOP (int64_t, uint8_t)
+TEST_LOOP (uint64_t, uint8_t)
+TEST_LOOP (_Float16, uint8_t)
+TEST_LOOP (float, uint8_t)
+TEST_LOOP (double, uint8_t)
+TEST_LOOP (int8_t, uint16_t)
+TEST_LOOP (uint8_t, uint16_t)
+TEST_LOOP (int16_t, uint16_t)
+TEST_LOOP (uint16_t, uint16_t)
+TEST_LOOP (int32_t, uint16_t)
+TEST_LOOP (uint32_t, uint16_t)
+TEST_LOOP (int64_t, uint16_t)
+TEST_LOOP (uint64_t, uint16_t)
+TEST_LOOP (_Float16, uint16_t)
+TEST_LOOP (float, uint16_t)
+TEST_LOOP (double, uint16_t)
+TEST_LOOP (int8_t, uint32_t)
+TEST_LOOP (uint8_t, uint32_t)
+TEST_LOOP (int16_t, uint32_t)
+TEST_LOOP (uint16_t, uint32_t)
+TEST_LOOP (int32_t, uint32_t)
+TEST_LOOP (uint32_t, uint32_t)
+TEST_LOOP (int64_t, uint32_t)
+TEST_LOOP (uint64_t, uint32_t)
+TEST_LOOP (_Float16, uint32_t)
+TEST_LOOP (float, uint32_t)
+TEST_LOOP (double, uint32_t)
+TEST_LOOP (int8_t, uint64_t)
+TEST_LOOP (uint8_t, uint64_t)
+TEST_LOOP (int16_t, uint64_t)
+TEST_LOOP (uint16_t, uint64_t)
+TEST_LOOP (int32_t, uint64_t)
+TEST_LOOP (uint32_t, uint64_t)
+TEST_LOOP (int64_t, uint64_t)
+TEST_LOOP (uint64_t, uint64_t)
+TEST_LOOP (_Float16, uint64_t)
+TEST_LOOP (float, uint64_t)
+TEST_LOOP (double, uint64_t)
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 88 "vect" } } */
+/* { dg-final { scan-tree-dump " \.MASK_LEN_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-assembler "vwsll.vi" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..999e2e974ef 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1965,6 +1965,17 @@ proc check_effective_target_riscv_zbb { } {
     }]
 }

+# Return 1 if the target arch supports the Zbb extension, 0 otherwise.
+# Cache the result.
+
+proc check_effective_target_riscv_zvbb { } {
+    return [check_no_compiler_messages riscv_ext_zvbb assembly {
+       #ifndef __riscv_zvbb
+       #error "Not __riscv_zvbb"
+       #endif
+    }]
+}
+
 # Return 1 if the target arch supports the XTheadVector extension, 0 otherwise.
 # Cache the result.

@@ -2053,10 +2064,33 @@ proc check_effective_target_riscv_zvfh_ok { } {
     return 0
 }

+proc check_effective_target_riscv_zvbb_ok { } {
+    # If the target already supports v without any added options,
+    # we may assume we can execute just fine.
+    if { [check_effective_target_riscv_zvbb] } {
+   return 1
+    }
+
+    # check if we can execute vector insns with the given hardware or
+    # simulator
+    set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &zvbb]
+    if { [check_runtime ${gcc_march}_exec {
+   int main()
+   {
+       asm ("vsetivli zero,8,e16,m1,ta,ma");
+       asm ("vwsll.vi v8,v16,2" : : : "v8");
+       return 0;
+   } } "-march=${gcc_march}"] } {
+       return 1
+   }
+
+    return 0
+}
+
 proc riscv_get_arch { } {
     set gcc_march ""
     # ??? do we neeed to add more extensions to the list below?
-    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvfh ztso } {
+    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvbb zvfh ztso } {
    if { [check_no_compiler_messages  riscv_ext_$ext assembly [string map [list DEF __riscv_$ext] {
        #ifndef DEF
        #error "Not DEF"
@@ -2151,6 +2185,18 @@ proc add_options_for_riscv_zvfh { flags } {
     return "$flags -march=[riscv_get_arch]_zvfh"
 }

+proc add_options_for_riscv_zvbb { flags } {
+    if { [lsearch $flags -march=*] >= 0 } {
+   # If there are multiple -march flags, we have to adjust all of them.
+   set flags [regsub -all -- {(?:^|[[:space:]])-march=[[:alnum:]_.]*} $flags &_zvbb ]
+   return [regsub -all -- {((?:^|[[:space:]])-march=[[:alnum:]_.]*_zvbb[[:alnum:]_.]*)_zvbb} $flags \\1 ]
+    }
+    if { [check_effective_target_riscv_zvbb] } {
+   return "$flags"
+    }
+    return "$flags -march=[riscv_get_arch]_zvbb"
+}
+
 # Return 1 if the target OS supports running SSE executables, 0
 # otherwise.  Cache the result.

Additional information