Closed linsinan1995 closed 2 years ago
The current impl cannot use rvp instructions since it is not allowed vector mode data to be put on GPR, see https://github.com/pz9115/riscv-gcc/blob/riscv-gcc-experimental-v/gcc/config/riscv/riscv.c#L4986-#L4997 for more info. (this PR also includes a RVP fix from commit 8a51f69b54fab099c5cd1f8e54ded052f0c79a55)
Here is a test case to reproduce the error.
test_rvp.c
#include <rvp_intrinsic.h> #include <stdlib.h> #include <stdint.h> typedef short int16x4_t __attribute__((vector_size (8))); typedef unsigned short uint16x4_t __attribute__((vector_size (8))); static __attribute__ ((noinline)) uint32_t dda (uint32_t ra, uint32_t rb) { return __rv__add16 (ra, rb); } static __attribute__ ((noinline)) uint16x4_t ddau_v (uint16x4_t ra, uint16x4_t rb) { return __rv__v_uadd16 (ra, rb); } static __attribute__ ((noinline)) int16x4_t ddas_v (int16x4_t ra, int16x4_t rb) { return __rv__v_sadd16 (ra, rb); }
This patch can fix this problem, and make rvp and rvv intrinsic to be able to coexist and use. Test cases I used(build config --with-arch=rv64gcv_zpn_zpsf --with-abi=lp64d ) for rvv are from https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/examples:
--with-arch=rv64gcv_zpn_zpsf --with-abi=lp64d
common.h
#include <math.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> void gen_rand_1d(double *a, int n) { for (int i = 0; i < n; ++i) a[i] = (double)rand() / (double)RAND_MAX + (double)(rand() % 1000); } void gen_string(char *s, int n) { // char value range: -128 ~ 127 for (int i = 0; i < n - 1; ++i) s[i] = (char)(rand() % 127) + 1; s[n - 1] = '\0'; } void gen_rand_2d(double **ar, int n, int m) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) ar[i][j] = (double)rand() / (double)RAND_MAX + (double)(rand() % 1000); } void print_string(const char *a, const char *name) { printf("const char *%s = \"", name); int i = 0; while (a[i] != 0) putchar(a[i++]); printf("\"\n"); puts(""); } void print_array_1d(double *a, int n, const char *type, const char *name) { printf("%s %s[%d] = {\n", type, name, n); for (int i = 0; i < n; ++i) { printf("%06.2f%s", a[i], i != n - 1 ? "," : "};\n"); if (i % 10 == 9) puts(""); } puts(""); } void print_array_2d(double **a, int n, int m, const char *type, const char *name) { printf("%s %s[%d][%d] = {\n", type, name, n, m); for (int i = 0; i < n; ++i) { for (int j = 0; j < m; ++j) { printf("%06.2f", a[i][j]); if (j == m - 1) puts(i == n - 1 ? "};" : ","); else putchar(','); } } puts(""); } bool double_eq(double golden, double actual, double relErr) { return (fabs(actual - golden) < relErr); } bool compare_1d(double *golden, double *actual, int n) { for (int i = 0; i < n; ++i) if (!double_eq(golden[i], actual[i], 1e-6)) return false; return true; } bool compare_string(const char *golden, const char *actual, int n) { for (int i = 0; i < n; ++i) if (golden[i] != actual[i]) return false; return true; } bool compare_2d(double **golden, double **actual, int n, int m) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) if (!double_eq(golden[i][j], actual[i][j], 1e-6)) return false; return true; } double **alloc_array_2d(int n, int m) { double **ret; ret = (double **)malloc(sizeof(double *) * n); for (int i = 0; i < n; ++i) ret[i] = (double *)malloc(sizeof(double) * m); return ret; } void init_array_one_1d(double *ar, int n) { for (int i = 0; i < n; ++i) ar[i] = 1; } void init_array_one_2d(double **ar, int n, int m) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) ar[i][j] = 1; }
test_rvv.c
#include "common.h" #include <riscv_vector.h> // index arithmetic void index_golden(double *a, double *b, double *c, int n) { for (int i = 0; i < n; ++i) { a[i] = b[i] + (double)i * c[i]; } } void index_(double *a, double *b, double *c, int n) { size_t vlmax = vsetvlmax_e32m1(); vuint32m1_t vec_i = vid_v_u32m1(vlmax); for (size_t vl; n > 0; n -= vl, a += vl, b += vl, c += vl) { vl = vsetvl_e64m2(n); vfloat64m2_t vec_i_double = vfwcvt_f_xu_v_f64m2(vec_i, vl); vfloat64m2_t vec_b = vle64_v_f64m2(b, vl); vfloat64m2_t vec_c = vle64_v_f64m2(c, vl); vfloat64m2_t vec_a = vfadd_vv_f64m2(vec_b, vfmul_vv_f64m2(vec_c, vec_i_double, vl), vl); vse64_v_f64m2(a, vec_a, vl); vec_i = vadd_vx_u32m1(vec_i, vl, vl); } } int main() { const int N = 31; const uint32_t seed = 0xdeadbeef; srand(seed); // data gen double B[N], C[N]; gen_rand_1d(B, N); gen_rand_1d(C, N); // compute double golden[N], actual[N]; index_golden(golden, B, C, N); index_(actual, B, C, N); // compare puts(compare_1d(golden, actual, N) ? "pass" : "fail"); }
test_rvv2.c
#include "common.h" #include <riscv_vector.h> // matrix multiplication // A[n][o], B[m][o] --> C[n][m]; void matmul_golden(double **a, double **b, double **c, int n, int m, int o) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { c[i][j] = 0; for (int k = 0; k < o; ++k) c[i][j] += a[i][k] * b[j][k]; } } void matmul(double **a, double **b, double **c, int n, int m, int o) { size_t vlmax = vsetvlmax_e64m1(); for (int i = 0; i < n; ++i) { for (int j = 0; j < m; ++j) { double *ptr_a = &a[i][0]; double *ptr_b = &b[j][0]; int k = o; vfloat64m1_t vec_s = vfmv_v_f_f64m1(0, vlmax); vfloat64m1_t vec_zero = vfmv_v_f_f64m1(0, vlmax); for (size_t vl; k > 0; k -= vl, ptr_a += vl, ptr_b += vl) { vl = vsetvl_e64m1(k); vfloat64m1_t vec_a = vle64_v_f64m1(ptr_a, vl); vfloat64m1_t vec_b = vle64_v_f64m1(ptr_b, vl); vec_s = vfmacc_vv_f64m1(vec_s, vec_a, vec_b, vl); } vfloat64m1_t vec_sum; vec_sum = vfredsum_vs_f64m1_f64m1(vec_zero, vec_s, vec_zero, vlmax); double sum = vfmv_f_s_f64m1_f64(vec_sum); c[i][j] = sum; } } } int main() { const int N = 8; const int M = 8; const int O = 7; uint32_t seed = 0xdeadbeef; srand(seed); // data gen double **A = alloc_array_2d(N, O); double **B = alloc_array_2d(M, O); gen_rand_2d(A, N, O); gen_rand_2d(B, M, O); // compute double **golden = alloc_array_2d(N, M); double **actual = alloc_array_2d(N, M); matmul_golden(A, B, golden, N, M, O); matmul(A, B, actual, N, M, O); // compare puts(compare_2d(golden, actual, N, M) ? "pass" : "fail"); }
The current impl cannot use rvp instructions since it is not allowed vector mode data to be put on GPR, see https://github.com/pz9115/riscv-gcc/blob/riscv-gcc-experimental-v/gcc/config/riscv/riscv.c#L4986-#L4997 for more info. (this PR also includes a RVP fix from commit 8a51f69b54fab099c5cd1f8e54ded052f0c79a55)
Here is a test case to reproduce the error.
test_rvp.c
This patch can fix this problem, and make rvp and rvv intrinsic to be able to coexist and use. Test cases I used(build config
--with-arch=rv64gcv_zpn_zpsf --with-abi=lp64d
) for rvv are from https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/examples:common.h
test_rvv.c
test_rvv2.c