microsoft / DirectXShaderCompiler

This repo hosts the source for the DirectX Shader Compiler which is based on LLVM/Clang.
Other
2.98k stars 665 forks source link

[SPIR-V] asuint fails on bit cast #6735

Open farzonl opened 3 days ago

farzonl commented 3 days ago

The following code works when targeting DXIL and fails to compile when targeting SPIRV SPIRV:

DXC_Debug_BUILD/bin/dxc scratch/asuint_spirv_test.hlsl -T lib_6_8 -enable-16bit-types -spirv -fspv-target-env=universal1.5 -O0 
export uint4 fn(double4 p1, uint4 p2, uint4 p3) {
    asuint(p1, p2, p3);
    return p2;
}

error

fatal error: generated SPIR-V is invalid: Expected input to have the same total bit width as Result Type: Bitcast
  %16 = OpBitcast %v2uint %14

DXIL:

DXC_Debug_BUILD/bin/dxc scratch/asuint_spirv_test.hlsl -T lib_6_8 -enable-16bit-types -O0
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

%dx.types.splitdouble = type { i32, i32 }

@dx.nothing.a = internal constant [1 x i32] zeroinitializer

; Function Attrs: nounwind
define <4 x i32> @"\01?fn@@YA?AV?$vector@I$03@@V?$vector@N$03@@V1@1@Z"(<4 x double> %p1, <4 x i32> %p2, <4 x i32> %p3) #0 {
  %1 = extractelement <4 x double> %p1, i64 0
  %2 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %1)  ; SplitDouble(value)
  %3 = extractvalue %dx.types.splitdouble %2, 0
  %4 = insertelement <4 x i32> undef, i32 %3, i64 0
  %5 = extractelement <4 x double> %p1, i64 1
  %6 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %5)  ; SplitDouble(value)
  %7 = extractvalue %dx.types.splitdouble %6, 0
  %8 = insertelement <4 x i32> %4, i32 %7, i64 1
  %9 = extractelement <4 x double> %p1, i64 2
  %10 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %9)  ; SplitDouble(value)
  %11 = extractvalue %dx.types.splitdouble %10, 0
  %12 = insertelement <4 x i32> %8, i32 %11, i64 2
  %13 = extractelement <4 x double> %p1, i64 3
  %14 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %13)  ; SplitDouble(value)
  %15 = extractvalue %dx.types.splitdouble %14, 0
  %16 = insertelement <4 x i32> %12, i32 %15, i64 3
  %17 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0)
  ret <4 x i32> %16
}

; Function Attrs: nounwind readnone
declare %dx.types.splitdouble @dx.op.splitDouble.f64(i32, double) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

!llvm.ident = !{!0}
!dx.version = !{!1}
!dx.valver = !{!1}
!dx.shaderModel = !{!2}
!dx.entryPoints = !{!3}

!0 = !{!"dxc(private) 1.8.0.14649 (main, 206133c9e)"}
!1 = !{i32 1, i32 8}
!2 = !{!"lib", i32 6, i32 8}
!3 = !{null, !"", null, null, !4}
!4 = !{i32 0, i64 8388612}
devshgraphicsprogramming commented 9 minutes ago

would this work for you?

template<class T, class U>
[[vk::ext_instruction(spv::OpBitcast)]]
T bitcast(U);