James-Jones / HLSLCrossCompiler

467 stars 81 forks source link

Analyze of a cross compile #8

Closed Tisten closed 2 years ago

Tisten commented 10 years ago

This one requires a bit more description. The cross compile don't fail, and I still have not tried running the shader for real. But the result worries me. So here goes:

The HLSL fragment program: struct v2p { float3 Tex0 : TEXCOORD0; float2 BlendWeights : TEXCOORD1; float2 Tex1 : TEXCOORD2; }; sampler2D Texture : register(s0) : register(t0); void main( in v2p In, out float4 Out : COLOR0 ) { float4 TopLeftTex = tex2D( Texture, In.Tex0.xz ); float4 TopRightTex = tex2D( Texture, In.Tex0.yz ); float4 BottomTex = tex2D( Texture, In.Tex1 ); float alphaBlendLeft = TopLeftTex.a * ( 1.h - In.BlendWeights.x ); float alphaBlendRight = TopRightTex.a * In.BlendWeights.x; float bottom_alpha = BottomTex.a * In.BlendWeights.y; const float blendThreshold = 0.250980392h; Out = bottom_alpha > blendThreshold ? BottomTex : ( alphaBlendLeft > blendThreshold ) ? TopLeftTex : ( alphaBlendRight > blendThreshold ) ? TopRightTex : 0.h; }

The D3D ASM: ps_5_0 dcl_globalFlags refactoringAllowed dcl_sampler s0, mode_default dcl_resource_texture2d (float,float,float,float) t0 dcl_input_ps linear v0.xyz dcl_input_ps linear v1.xy dcl_input_ps linear v1.zw dcl_output o0.xyzw dcl_temps 3 sample_indexable(texture2d)(float,float,float,float) r0.xyzw, v0.yzyy, t0.xyzw, s0 mul r1.x, r0.w, v1.x lt r1.x, l(0.250980), r1.x and r0.xyzw, r0.xyzw, r1.xxxx add r1.x, -v1.x, l(1.000000) sample_indexable(texture2d)(float,float,float,float) r2.xyzw, v0.xzxx, t0.xyzw, s0 mul r1.x, r1.x, r2.w lt r1.x, l(0.250980), r1.x movc r0.xyzw, r1.xxxx, r2.xyzw, r0.xyzw sample_indexable(texture2d)(float,float,float,float) r1.xyzw, v1.zwzz, t0.xyzw, s0 mul r2.x, r1.w, v1.y lt r2.x, l(0.250980), r2.x movc o0.xyzw, r2.xxxx, r1.xyzw, r0.xyzw ret

The converted GLSL program:

version 410

struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; subroutine void SubroutineType(); uniform sampler2D Texture; layout(location = 0) in vec4 VtxGeoOutput0; vec4 Input0; layout(location = 1) in vec4 VtxGeoOutput1; vec4 Input1; layout(location = 0) out vec4 PixOutput0;

define Output0 PixOutput0

vec4 Temp0; ivec4 Temp0_int; uvec4 Temp0_uint; vec4 Temp1; ivec4 Temp1_int; uvec4 Temp1_uint; vec4 Temp2; ivec4 Temp2_int; uvec4 Temp2_uint; void main() { Input0 = VtxGeoOutput0; Input1 = VtxGeoOutput1; Input1 = VtxGeoOutput1; Temp0 = (texture(Texture, Input0.yz)); Temp1.x = Temp0.w * Input1.x; Temp1_uint.x = ((0.250980)< (Temp1.x)) ? 1u : 0u; Temp0_int = ivec4(ivec4(Temp0) & ivec4(Temp1_uint).xxxx); Temp1.x = -Input1.x + 1.000000; Temp2 = (texture(Texture, Input0.xz)); Temp1.x = Temp1.x * Temp2.w; Temp1_uint.x = ((0.250980)< (Temp1.x)) ? 1u : 0u; if(vec4(Temp1_uint.xxxx).x != 0) { Temp0 = Temp2; } else { Temp0 = Temp0_int; } Temp1 = (texture(Texture, Input1.zw)); Temp2.x = Temp1.w * Input1.y; Temp2_uint.x = ((0.250980)< (Temp2.x)) ? 1u : 0u; if(vec4(Temp2_uint.xxxx).x != 0) { Output0 = Temp1; } else { Output0 = Temp0; } return; }

And the problem, the D3D ASM does: lt r1.x, l(0.250980), r1.x and r0.xyzw, r0.xyzw, r1.xxxx

I imagine that the result of "lt" is either 0xFFFFFFFFU or 0x0U. When doing "and" with a float, it will either set the float to 0.0f or keep it as it was.

While the GLSL does: Temp1_uint.x = ((0.250980)< (Temp1.x)) ? 1u : 0u; Temp0_int = ivec4(ivec4(Temp0) & ivec4(Temp1_uint).xxxx);

Here are a few problems:

James-Jones commented 10 years ago

The first problem (conditional result should be max_uint, not 1) has now been fixed on the master branch.

James-Jones commented 10 years ago

The second problem (casting float to int for bitwise-and) has been fixed on master.