Patch Status 25997-V2_RISCV_Fix_ICE_for_the_fusion_case_from_vsetvl_to_scalar_movePR111927-1

github-actions[bot] commented 12 months ago

Precommit CI Run information

Logs can be found in the associated Github Actions run: https://github.com/ewlu/gcc-precommit-ci/actions/runs/6619534962

Patch information

Applied patches: 1 -> 1 Associated series: https://patchwork.sourceware.org/project/gcc/list/?series=25997 Last patch applied: https://patchwork.sourceware.org/project/gcc/patch/20231023094034.1728130-1-juzhe.zhong@rivai.ai/

Notes

Build and Testsuite checks are in beta testing stages. Results are unstable and may be inaccurate.

github-actions[bot] commented 12 months ago

Lint Status

The following issues have been found with 25997-V2_RISCV_Fix_ICE_for_the_fusion_case_from_vsetvl_to_scalar_movePR111927-1 using gcc's ./contrib/check_GNU_style.py. Please use your best judgement when resolving these issues. These are only warnings and do not need to be resolved in order to merge your patch. If any of these warnings seem like false-positives that could be guarded against please contact me: patchworks-ci@rivosinc.com.

=== ERROR type #1: there should be exactly one space between function name and parenthesis (1 error(s)) ===
gcc/config/riscv/riscv-vsetvl.cc:1550:19:      NEXT: vsetvl a5(VL), a4(AVL) ...

Additional information

github-actions[bot] commented 12 months ago

Apply Status

Target	Status
Baseline hash: https://github.com/gcc-mirror/gcc/commit/c85f74813f6a6b73f7f303d0678b3d1c00f8adc2	Failed
Tip of tree hash: https://github.com/gcc-mirror/gcc/commit/02aa322c8cfd3f60fa5a3a0eee4340bb644261fe	Failed

Command

> git am ../patches/*.patch --whitespace=fix -q --3way

Output

error: sha1 information is lacking or useless (gcc/config/riscv/riscv-vsetvl.cc).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 RISC-V: Fix ICE for the fusion case from vsetvl to scalar move[PR111927]
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
---
 gcc/config/riscv/riscv-vsetvl.cc              |  23 +++
 .../gcc.target/riscv/rvv/vsetvl/pr111927.c    | 170 ++++++++++++++++++
 2 files changed, 193 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111927.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 47b459fddd4..f3922a051c5 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1541,6 +1541,29 @@ private:
   inline bool can_use_next_avl_p (const vsetvl_info &prev,
                  const vsetvl_info &next)
   {
+    /* Forbid the AVL/VL propagation if VL of NEXT is used
+       by non-RVV instructions.  This is because:
+
+    bb 2:
+      PREV: scalar move (no AVL)
+    bb 3:
+      NEXT: vsetvl a5(VL), a4(AVL) ...
+      branch a5,zero
+
+       Since user vsetvl instruction is no side effect instruction
+       which should be placed in the correct and optimal location
+       of the program by the previous PASS, it is unreasonable that
+       VSETVL PASS tries to move it to another places if it used by
+       non-RVV instructions.
+
+       Note: We only forbid the cases that VL is used by the following
+       non-RVV instructions which will cause issues.  We don't forbid
+       other cases since it won't cause correctness issues and we still
+       more demand info are fused backward.  The later LCM algorithm
+       should know the optimal location of the vsetvl.  */
+    if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
+      return false;
+
     if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
       return true;

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111927.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111927.c
new file mode 100644
index 00000000000..ab599add57f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111927.c
@@ -0,0 +1,170 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+#define RISCV_MATH_LOOPUNROLL
+#define RISCV_MATH_VECTOR
+typedef  float float32_t;
+
+  typedef struct
+  {
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
+          float32_t mu;        /**< step size that controls filter coefficient updates. */
+  } riscv_lms_instance_f32;
+
+
+void riscv_lms_f32(
+  const riscv_lms_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pRef,
+        float32_t * pOut,
+        float32_t * pErr,
+        uint32_t blockSize)
+{
+        float32_t *pState = S->pState;                 /* State pointer */
+        float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+        float32_t *pStateCurnt;                        /* Points to the current sample of the state */
+        float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
+        float32_t mu = S->mu;                          /* Adaptive factor */
+        float32_t acc, e;                              /* Accumulator, error */
+        float32_t w;                                   /* Weight factor */
+        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+        uint32_t tapCnt, blkCnt;                       /* Loop counters */
+
+  /* Initializations of error,  difference, Coefficient update */
+  e = 0.0f;
+  w = 0.0f;
+
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+  /* pStateCurnt points to the location where the new input data should be written */
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);
+
+  /* initialise loop count */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* Copy the new input sample into the state buffer */
+    *pStateCurnt++ = *pSrc++;
+
+    /* Initialize pState pointer */
+    px = pState;
+
+    /* Initialize coefficient pointer */
+    pb = pCoeffs;
+
+    /* Set the accumulator to zero */
+    acc = 0.0f;
+    uint32_t vblkCnt = numTaps;                               /* Loop counter */
+    size_t l;
+    vfloat32m8_t vx, vy;
+    vfloat32m1_t temp00m1;
+    l = __riscv_vsetvl_e32m1(1);
+    temp00m1 = __riscv_vfmv_v_f_f32m1(0, l);
+    for (; (l = __riscv_vsetvl_e32m8(vblkCnt)) > 0; vblkCnt -= l) {
+      vx = __riscv_vle32_v_f32m8(px, l);
+      px += l;
+      vy = __riscv_vle32_v_f32m8(pb, l);
+      pb += l;
+      temp00m1 = __riscv_vfredusum_vs_f32m8_f32m1(__riscv_vfmul_vv_f32m8(vx, vy, l), temp00m1, l);
+    }
+    acc += __riscv_vfmv_f_s_f32m1_f32(temp00m1);
+
+    while (tapCnt > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      acc += (*px++) * (*pb++);
+
+      /* Decrement the loop counter */
+      tapCnt--;
+    }
+    /* Store the result from accumulator into the destination buffer. */
+    *pOut++ = acc;
+
+    /* Compute and store error */
+    e = (float32_t) *pRef++ - acc;
+    *pErr++ = e;
+
+    /* Calculation of Weighting factor for updating filter coefficients */
+    w = e * mu;
+
+    /* Initialize pState pointer */
+    /* Advance state pointer by 1 for the next sample */
+    px = pState++;
+
+    /* Initialize coefficient pointer */
+    pb = pCoeffs;
+
+    vblkCnt = numTaps;
+    for (; (l = __riscv_vsetvl_e32m8(vblkCnt)) > 0; vblkCnt -= l) {
+      vx = __riscv_vle32_v_f32m8(px, l);
+      px += l;
+      __riscv_vse32_v_f32m8(pb, __riscv_vfadd_vv_f32m8(__riscv_vfmul_vf_f32m8(vx, w, l), __riscv_vle32_v_f32m8(pb, l), l) , l);
+      pb += l;
+    }
+    while (tapCnt > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      *pb += w * (*px++);
+      pb++;
+
+      /* Decrement loop counter */
+      tapCnt--;
+    }
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Processing is complete.
+     Now copy the last numTaps - 1 samples to the start of the state buffer.
+     This prepares the state buffer for the next function call. */
+
+  /* Points to the start of the pState buffer */
+  pStateCurnt = S->pState;
+
+  /* copy data */
+
+    uint32_t vblkCnt = (numTaps - 1U);                               /* Loop counter */
+    size_t l;
+    for (; (l = __riscv_vsetvl_e32m8(vblkCnt)) > 0; vblkCnt -= l) {
+      __riscv_vse32_v_f32m8(pStateCurnt, __riscv_vle32_v_f32m8(pState, l) , l);
+      pState += l;
+      pStateCurnt += l;
+    }
+
+
+  /* Loop unrolling: Compute 4 taps at a time. */
+  tapCnt = (numTaps - 1U) >> 2U;
+
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining taps */
+  tapCnt = (numTaps - 1U) & 0x3U;
+
+
+
+  /* Initialize tapCnt with number of samples */
+  tapCnt = (numTaps - 1U);
+
+
+
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+}

Additional information

ewlu / gcc-precommit-ci