Open vfdff opened 2 weeks ago
.LBB0_3:
ld1w { z0.s }, p0/z, [x10, x16, lsl #2]
uunpkhi z1.d, z0.s
cmpge p1.s, p0/z, z0.s, #0
cmplt p2.s, p0/z, z0.s, #0
uunpklo z2.d, z0.s
subr z0.s, z0.s, #0
uunpkhi z3.d, z0.s
lsl z1.d, z1.d, #2
uunpklo z0.d, z0.s
punpkhi p3.h, p1.b
lsl z2.d, z2.d, #2
punpklo p1.h, p1.b
ld1w { z1.d }, p3/z, [x15, z1.d]
punpkhi p3.h, p2.b
lsl z3.d, z3.d, #2
lsl z0.d, z0.d, #2
ld1w { z3.d }, p3/z, [x15, z3.d]
punpklo p3.h, p2.b
ld1w { z0.d }, p3/z, [x15, z0.d]
ld1w { z2.d }, p1/z, [x15, z2.d]
uzp1 z0.s, z0.s, z3.s
fneg z0.s, p0/m, z0.s
uzp1 z1.s, z2.s, z1.s
sel z0.s, p2, z0.s, z1.s
st1w { z0.s }, p0, [x14, x16, lsl #2]
incw x16
cmp x9, x16
b.ne .LBB0_3
test: https://gcc.godbolt.org/z/zM5qfqnda
gcc do sve vectorize, while llvm don't now, and record the llvm's assemble