float s_ref_real[eulers_per_block][block_sz];
std::complex<float> mdlComplex[SIZE];
int indexarr[block_sz];
void foo(int sz, int block) {
for (int i = 0; i < block; i ++) {
pragma clang loop vectorize(enable)
#pragma GCC ivdep
for (int tid=0; tid < sz; tid++){
int index = indexarr[tid];
s_ref_real[i][tid] = mdlComplex[index].real();
}
}
}
* This is a subproblems of [PR107345](https://github.com/llvm/llvm-project/issues/107345), here we only address the **real** part of complex type. Now llvm gets unoptimized kernel loop because DAGTypeLegalizer::SplitVectorOperand split the llvm.masked.gather.nxv4f32.nxv4p0 because there is invalid type **<vscale x 4 x i64>**.
* We are expected to get similar assemble, https://godbolt.org/z/ahaznch33
* test: https://godbolt.org/z/TzTbv9xW1
```
float s_ref_real[eulers_per_block][block_sz];
std::complex<float> mdlComplex[SIZE];
int indexarr[block_sz];
void foo(int sz, int block) {
for (int i = 0; i < block; i ++) {
#pragma clang loop vectorize(enable)
#pragma GCC ivdep
for (int tid=0; tid < sz; tid++){
int index = indexarr[tid];
s_ref_real[i][tid] = mdlComplex[index].real();
}
}
}
```
* This is a subproblems of [PR107345](https://github.com/llvm/llvm-project/issues/107345), here we only address the **real** part of complex type. Now llvm gets unoptimized kernel loop because DAGTypeLegalizer::SplitVectorOperand split the llvm.masked.gather.nxv4f32.nxv4p0 because there is invalid type **<vscale x 4 x i64>**.
* We are expected to get similar assemble, https://godbolt.org/z/ahaznch33
```
.LBB0_4:
ld1w { z0.s }, p1/z, [x12, x15, lsl #2]
lsl z0.s, z0.s, #1
ld1w { z0.s }, p1/z, [x13, z0.s, sxtw #2]
st1w { z0.s }, p1, [x11, x15, lsl #2]
add x15, x15, x14
whilelo p1.s, x15, x10
b.mi .LBB0_4
```
void foo(int sz, int block) { for (int i = 0; i < block; i ++) {
pragma clang loop vectorize(enable)
} }
.LBB0_4: ld1w { z0.s }, p1/z, [x12, x15, lsl #2] lsl z0.s, z0.s, #1 ld1w { z0.s }, p1/z, [x13, z0.s, sxtw #2] st1w { z0.s }, p1, [x11, x15, lsl #2] add x15, x15, x14 whilelo p1.s, x15, x10 b.mi .LBB0_4