Closed topperc closed 1 month ago
@llvm/issue-subscribers-backend-risc-v
The cost of cast and fp arithmetic instruction was modified in https://github.com/llvm/llvm-project/commit/ecd7a0132a344af1451d262e226fad3eeb233b84 and https://github.com/llvm/llvm-project/commit/db07d79ab06f8f88a8bc161654336e235d906ad7, I think the cost is right at the main branch.
These all look fine to me. This is from a recent TOT snapshot. The fptoXi versions looks slightly debatable as we're discounting two vsetvlis rather than our normal 1, but otherwise, I don't see obvious issues here.
$ cat craig-fp-convert-cost.ll
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+experimental-zvfh < %s | FileCheck %s
define <2 x float> @v2i8_sitofp_v2f32(<2 x i8> %a) {
; CHECK-LABEL: 'v2i8_sitofp_v2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = sitofp <2 x i8> %a to <2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res
;
%res = sitofp <2 x i8> %a to <2 x float>
ret <2 x float> %res
}
define <2 x float> @v2i8_uitofp_v2f32(<2 x i8> %a) {
; CHECK-LABEL: 'v2i8_uitofp_v2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = sitofp <2 x i8> %a to <2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res
;
%res = sitofp <2 x i8> %a to <2 x float>
ret <2 x float> %res
}
define <2 x i8> @v2i8_fptosi_v2f32(<2 x float> %a) {
; CHECK-LABEL: 'v2i8_fptosi_v2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fptosi <2 x float> %a to <2 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i8> %res
;
%res = fptosi <2 x float> %a to <2 x i8>
ret <2 x i8> %res
}
define <2 x i8> @v2i8_fptoui_v2f32(<2 x float> %a) {
; CHECK-LABEL: 'v2i8_fptoui_v2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fptoui <2 x float> %a to <2 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i8> %res
;
%res = fptoui <2 x float> %a to <2 x i8>
ret <2 x i8> %res
}
$ ./llc -march=riscv64 -mattr=+v < craig-fp-convert-cost.ll
.text
.attribute 4, 16
.attribute 5, "rv64i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.file "<stdin>"
.globl v2i8_sitofp_v2f32 # -- Begin function v2i8_sitofp_v2f32
.p2align 2
.type v2i8_sitofp_v2f32,@function
.variant_cc v2i8_sitofp_v2f32
v2i8_sitofp_v2f32: # @v2i8_sitofp_v2f32
.cfi_startproc
# %bb.0:
vsetivli zero, 2, e16, mf4, ta, ma
vsext.vf2 v9, v8
vfwcvt.f.x.v v8, v9
ret
.Lfunc_end0:
.size v2i8_sitofp_v2f32, .Lfunc_end0-v2i8_sitofp_v2f32
.cfi_endproc
# -- End function
.globl v2i8_uitofp_v2f32 # -- Begin function v2i8_uitofp_v2f32
.p2align 2
.type v2i8_uitofp_v2f32,@function
.variant_cc v2i8_uitofp_v2f32
v2i8_uitofp_v2f32: # @v2i8_uitofp_v2f32
.cfi_startproc
# %bb.0:
vsetivli zero, 2, e16, mf4, ta, ma
vsext.vf2 v9, v8
vfwcvt.f.x.v v8, v9
ret
.Lfunc_end1:
.size v2i8_uitofp_v2f32, .Lfunc_end1-v2i8_uitofp_v2f32
.cfi_endproc
# -- End function
.globl v2i8_fptosi_v2f32 # -- Begin function v2i8_fptosi_v2f32
.p2align 2
.type v2i8_fptosi_v2f32,@function
.variant_cc v2i8_fptosi_v2f32
v2i8_fptosi_v2f32: # @v2i8_fptosi_v2f32
.cfi_startproc
# %bb.0:
vsetivli zero, 2, e16, mf4, ta, ma
vfncvt.rtz.x.f.w v9, v8
vsetvli zero, zero, e8, mf8, ta, ma
vnsrl.wi v8, v9, 0
ret
.Lfunc_end2:
.size v2i8_fptosi_v2f32, .Lfunc_end2-v2i8_fptosi_v2f32
.cfi_endproc
# -- End function
.globl v2i8_fptoui_v2f32 # -- Begin function v2i8_fptoui_v2f32
.p2align 2
.type v2i8_fptoui_v2f32,@function
.variant_cc v2i8_fptoui_v2f32
v2i8_fptoui_v2f32: # @v2i8_fptoui_v2f32
.cfi_startproc
# %bb.0:
vsetivli zero, 2, e16, mf4, ta, ma
vfncvt.rtz.xu.f.w v9, v8
vsetvli zero, zero, e8, mf8, ta, ma
vnsrl.wi v8, v9, 0
ret
.Lfunc_end3:
.size v2i8_fptoui_v2f32, .Lfunc_end3-v2i8_fptoui_v2f32
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
Looks like I made a mistake here. I was acting on someone else's analysis in an internal bug report. Our tree is relatively up to date and those commits are from 6 months and a year ago so I need to figure out where the disconnect was on our side.
These are issues we identified in our downstream. Not sure if any have been fixed recently.
<2 x i8> → <2 x float> is not costed as being 2 instructions, a vzext+vwfcvt <2 x float> → <2 x i8> is not costed as being 2 instructions, vwfcvt+vnsrl. Scalar fmul/fsub float cost is 2, but vector is 1 cc: @preames @bubba