Add HLSLcc flag to let users disable bit-cast temp registers

lbavoil commented 10 years ago

This commit has introduced corruption in multiple GLSL 150 fragment shaders that I have been generating with HLSLcc: "Bit-cast temp registers - issues #8, #20 and #21" https://github.com/James-Jones/HLSLCrossCompiler/commit/fa593f4aa07023c1c185768a7f03dc30363b32ab

The previous HLSLcc versions worked fine for me, using SM5 pixel shaders as input and "HLSLcc.exe -lang=150 -flags=1 ...".

Would it be possible for you to add a new HLSLcc flag to let users disable bit-cast temp register strategy for specific shaders (reverting to the old strategy that uses separate arrays of registers for each type)? This will have the advantage to remove a dependency with GL_ARB_shader_bit_encoding, and hopefully will fix the regression on my end.

James-Jones commented 10 years ago

I have a strong preference to fixing the current code and not adding this flag. The old method of managing temp variables was not correct, even though there are shaders which will not hit the problematic cases. The current code is new and it would not surprise me that some instructions don't have the correct casts being applied. If you don't mind sharing the bad glsl shader then I can take a look. Otherwise you could try to spot the problem in the shader and I could eyeball the recent compiler changes for anything missing/suspicious.

lbavoil commented 10 years ago

Here are example GLSL shaders generated before and after bit-cast temp registers were implemented in HLSLcc:

BEFORE (correct output): "#version 150\n" "struct vec1 {\n" " float x;\n" "};\n" "struct uvec1 {\n" " uint x;\n" "};\n" "struct ivec1 {\n" " int x;\n" "};\n" "layout(std140) uniform;\n" "uniform GlobalConstantBuffer {\n" " vec2 g_c0_0;\n" " vec2 g_c0_1;\n" " vec2 g_c0_2;\n" " vec2 g_c0_3;\n" " float g_c0_4;\n" " float g_c0_5;\n" " float g_c0_6;\n" " float g_c0_7;\n" " float g_c0_8;\n" " float g_c0_9;\n" " float g_c0_10;\n" " int g_c0_11;\n" " float g_c0_12;\n" " float g_c0_13;\n" " float g_c0_14;\n" " float g_c0_15;\n" " float g_c0_16;\n" " float g_c0_17;\n" " float g_c0_18;\n" " float g_c0_19;\n" " vec2 g_c0_20;\n" " float g_c0_21;\n" " float g_c0_22;\n" "};\n" "uniform sampler2D g_t0;\n" "vec4 Input0;\n" "out vec4 PixOutput0;\n" "#define Output0 PixOutput0\n" "vec4 Temp[5];\n" "ivec4 Temp_int[5];\n" "uvec4 Temp_uint[5];\n" "void main()\n" "{\n" " Input0.xy = gl_FragCoord.xy;\n" " Temp[0].y = vec4(g_c0_1.xxxy.z).y;\n" " Temp[0].z = vec4(0.000000).z;\n" " Temp[0].xw = vec4(Input0.yyyx + -g_c0_20.xyxx.yyyx).xw;\n" " Temp[0].yz = vec4(Temp[0].wwxw * g_c0_1.xxxy.zzwz + Temp[0].yyzy).yz;\n" " Temp[1].xy = vec4(g_c0_2.xyxx.xyxx * Temp[0].yzyy + g_c0_3.xxxy.zwzz).xy;\n" " Temp[2].x = (textureLod(g_t0, Temp[0].yz, 0.000000)).x;\n" " Temp[2].yz = vec4(Temp[1].xxyx * Temp[2].xxxx).yz;\n" " Temp[0].yz = vec4(Temp[0].xxwx * g_c0_1.xxxy.wwzw).yz;\n" " Temp[1].xy = vec4(g_c0_2.xyxx.yxyy * Temp[0].yzyy + g_c0_3.xxxy.wzww).xy;\n" " Temp[3].y = (textureLod(g_t0, Temp[0].zy, 0.000000).yxzw).y;\n" " Temp[3].xz = vec4(Temp[1].xxyx * Temp[3].yyyy).xz;\n" " Temp[1].xyz = vec4(Temp[2].xyzx + -Temp[3].yzxy).xyz;\n" " Temp[0].y = vec4(dot((Temp[1].xyzx).xyz, (Temp[1].xyzx).xyz)).y;\n" " Temp[2].y = vec4(-g_c0_1.xxxy.z).y;\n" " Temp[2].z = vec4(0.000000).z;\n" " Temp[2].xy = vec4(Temp[0].wxww * g_c0_1.xxxy.zwzz + Temp[2].yzyy).xy;\n" " Temp[2].zw = vec4(g_c0_2.xyxx.xxxy * Temp[2].xxxy + g_c0_3.xxxy.zzzw).zw;\n" " Temp[4].x = (textureLod(g_t0, Temp[2].xy, 0.000000)).x;\n" " Temp[4].yz = vec4(Temp[2].zzwz * Temp[4].xxxx).yz;\n" " Temp[2].xyz = vec4(Temp[3].yzxy + -Temp[4].xyzx).xyz;\n" " Temp[0].z = vec4(dot((Temp[2].xyzx).xyz, (Temp[2].xyzx).xyz)).z;\n" " Temp_uint[0].y = ((Temp[0].y)< (Temp[0].z)) ? 0xFFFFFFFFu : 0u;\n" " if(vec4(Temp_uint[0].yyyy).x != 0.0) {\n" " Temp[1].xyz = vec4(Temp[1].xyzx).xyz;\n" " } else {\n" " Temp[1].xyz = vec4(Temp[2].xyzx).xyz;\n" " }\n" " Temp[2].z = vec4(0.000000).z;\n" " Temp[2].x = vec4(g_c0_1.xxxy.w).x;\n" " Temp[0].yz = vec4(Temp[0].xxwx * g_c0_1.xxxy.wwzw + Temp[2].xxzx).yz;\n" " Temp[2].xy = vec4(g_c0_2.xyxx.yxyy * Temp[0].yzyy + g_c0_3.xxxy.wzww).xy;\n" " Temp[4].y = (textureLod(g_t0, Temp[0].zy, 0.000000).yxzw).y;\n" " Temp[4].xz = vec4(Temp[2].xxyx * Temp[4].yyyy).xz;\n" " Temp[2].xyz = vec4(-Temp[3].xyzx + Temp[4].xyzx).xyz;\n" " Temp[0].y = vec4(dot((Temp[2].xyzx).xyz, (Temp[2].xyzx).xyz)).y;\n" " Temp[4].z = vec4(0.000000).z;\n" " Temp[4].x = vec4(-g_c0_1.xxxy.w).x;\n" " Temp[0].xz = vec4(Temp[0].xxwx * g_c0_1.xxxy.wwzw + Temp[4].xxzx).xz;\n" " Temp[4].xy = vec4(g_c0_2.xyxx.yxyy * Temp[0].xzxx + g_c0_3.xxxy.wzww).xy;\n" " Temp[0].z = (textureLod(g_t0, Temp[0].zx, 0.000000).ywxz).z;\n" " Temp[0].xw = vec4(Temp[0].zzzz * Temp[4].xxxy).xw;\n" " Temp[0].xzw = vec4(-Temp[0].xxzw + Temp[3].xxyz).xzw;\n" " Temp[1].w = vec4(dot((Temp[0].xzwx).xyz, (Temp[0].xzwx).xyz)).w;\n" " Temp_uint[0].y = ((Temp[0].y)< (Temp[1].w)) ? 0xFFFFFFFFu : 0u;\n" " if(vec4(Temp_uint[0].yyyy).x != 0.0) {\n" " Temp[0].xyz = vec4(Temp[2].xyzx).xyz;\n" " } else {\n" " Temp[0].xyz = vec4(Temp[0].xzwx).xyz;\n" " }\n" " Temp[2].xyz = vec4(Temp[0].xyzx * Temp[1].xyzx).xyz;\n" " Temp[0].xyz = vec4(Temp[1].zxyz * Temp[0].yzxy + -Temp[2].xyzx).xyz;\n" " Temp[0].w = vec4(dot((Temp[0].xyzx).xyz, (Temp[0].xyzx).xyz)).w;\n" " Temp[0].w = vec4(inversesqrt(Temp[0].w)).w;\n" " Temp[0].xyz = vec4(Temp[0].wwww * Temp[0].xyzx).xyz;\n" " Output0.xyz = vec4(-Temp[0].xyzx).xyz;\n" " Output0.w = vec4(0.000000).w;\n" " return;\n" "}\n"

AFTER (corrupted output): "#version 150\n" "#extension GL_ARB_shader_bit_encoding : require\n" "struct vec1 {\n" " float x;\n" "};\n" "struct uvec1 {\n" " uint x;\n" "};\n" "struct ivec1 {\n" " int x;\n" "};\n" "layout(std140) uniform;\n" "uniform GlobalConstantBuffer {\n" " vec2 g_c0_0;\n" " vec2 g_c0_1;\n" " vec2 g_c0_2;\n" " vec2 g_c0_3;\n" " float g_c0_4;\n" " float g_c0_5;\n" " float g_c0_6;\n" " float g_c0_7;\n" " float g_c0_8;\n" " float g_c0_9;\n" " float g_c0_10;\n" " int g_c0_11;\n" " float g_c0_12;\n" " float g_c0_13;\n" " float g_c0_14;\n" " float g_c0_15;\n" " float g_c0_16;\n" " float g_c0_17;\n" " float g_c0_18;\n" " float g_c0_19;\n" " vec2 g_c0_20;\n" " float g_c0_21;\n" " float g_c0_22;\n" "};\n" "uniform sampler2D g_t0;\n" "vec4 Input0;\n" "out vec4 PixOutput0;\n" "#define Output0 PixOutput0\n" "vec4 Temp[5];\n" "ivec4 Temp_int[5];\n" "uvec4 Temp_uint[5];\n" "void main()\n" "{\n" " Input0.xy = gl_FragCoord.xy;\n" " Temp_int[0].y = ivec4(floatBitsToInt(g_c0_1.xxxy.z)).y;\n" " Temp_int[0].z = ivec4(int(0x0)).z;\n" " Temp_int[0].xw = floatBitsToInt(Input0.yyyx + -g_c0_20.xyxx.yyyx).xw;\n" " Temp_int[0].yz = floatBitsToInt(vec4(intBitsToFloat(Temp_int[0]).wwxw * g_c0_1.xxxy.zzwz + intBitsToFloat(Temp_int[0]).yyzy)).yz;\n" " Temp_int[1].xy = floatBitsToInt(vec4(g_c0_2.xyxx.xyxx * intBitsToFloat(Temp_int[0]).yzyy + g_c0_3.xxxy.zwzz)).xy;\n" " Temp_int[2].x = floatBitsToInt(textureLod(g_t0, intBitsToFloat(Temp_int[0]).yz, 0.000000)).x;\n" " Temp_int[2].yz = floatBitsToInt(intBitsToFloat(Temp_int[1]).xxyx * intBitsToFloat(Temp_int[2]).xxxx).yz;\n" " Temp_int[0].yz = floatBitsToInt(intBitsToFloat(Temp_int[0]).xxwx * g_c0_1.xxxy.wwzw).yz;\n" " Temp_int[1].xy = floatBitsToInt(vec4(g_c0_2.xyxx.yxyy * intBitsToFloat(Temp_int[0]).yzyy + g_c0_3.xxxy.wzww)).xy;\n" " Temp_int[3].y = floatBitsToInt(textureLod(g_t0, intBitsToFloat(Temp_int[0]).zy, 0.000000).yxzw).y;\n" " Temp_int[3].xz = floatBitsToInt(intBitsToFloat(Temp_int[1]).xxyx * intBitsToFloat(Temp_int[3]).yyyy).xz;\n" " Temp_int[1].xyz = floatBitsToInt(intBitsToFloat(Temp_int[2]).xyzx + -intBitsToFloat(Temp_int[3]).yzxy).xyz;\n" " Temp_int[0].y = floatBitsToInt(vec4(dot((intBitsToFloat(Temp_int[1]).xyzx).xyz, (intBitsToFloat(Temp_int[1]).xyzx).xyz)).y);\n" " Temp_int[2].y = ivec4(floatBitsToInt(-g_c0_1.xxxy.z)).y;\n" " Temp_int[2].z = ivec4(int(0x0)).z;\n" " Temp_int[2].xy = floatBitsToInt(vec4(intBitsToFloat(Temp_int[0]).wxww * g_c0_1.xxxy.zwzz + intBitsToFloat(Temp_int[2]).yzyy)).xy;\n" " Temp_int[2].zw = floatBitsToInt(vec4(g_c0_2.xyxx.xxxy * intBitsToFloat(Temp_int[2]).xxxy + g_c0_3.xxxy.zzzw)).zw;\n" " Temp_int[4].x = floatBitsToInt(textureLod(g_t0, intBitsToFloat(Temp_int[2]).xy, 0.000000)).x;\n" " Temp_int[4].yz = floatBitsToInt(intBitsToFloat(Temp_int[2]).zzwz * intBitsToFloat(Temp_int[4]).xxxx).yz;\n" " Temp_int[2].xyz = floatBitsToInt(intBitsToFloat(Temp_int[3]).yzxy + -intBitsToFloat(Temp_int[4]).xyzx).xyz;\n" " Temp_int[0].z = floatBitsToInt(vec4(dot((intBitsToFloat(Temp_int[2]).xyzx).xyz, (intBitsToFloat(Temp_int[2]).xyzx).xyz)).z);\n" " Temp_int[0].y = floatBitsToInt(((intBitsToFloat(Temp_int[0]).y)< (intBitsToFloat(Temp_int[0]).z)) ? int(0xFFFFFFFF) : 0);\n" " if(ivec4(Temp_int[0].yyyy).x != 0) {\n" " Temp_int[1].xyz = ivec4(Temp_int[1].xyzx).xyz;\n" " } else {\n" " Temp_int[1].xyz = ivec4(Temp_int[2].xyzx).xyz;\n" " }\n" " Temp_int[2].z = ivec4(int(0x0)).z;\n" " Temp_int[2].x = ivec4(floatBitsToInt(g_c0_1.xxxy.w)).x;\n" " Temp_int[0].yz = floatBitsToInt(vec4(intBitsToFloat(Temp_int[0]).xxwx * g_c0_1.xxxy.wwzw + intBitsToFloat(Temp_int[2]).xxzx)).yz;\n" " Temp_int[2].xy = floatBitsToInt(vec4(g_c0_2.xyxx.yxyy * intBitsToFloat(Temp_int[0]).yzyy + g_c0_3.xxxy.wzww)).xy;\n" " Temp_int[4].y = floatBitsToInt(textureLod(g_t0, intBitsToFloat(Temp_int[0]).zy, 0.000000).yxzw).y;\n" " Temp_int[4].xz = floatBitsToInt(intBitsToFloat(Temp_int[2]).xxyx * intBitsToFloat(Temp_int[4]).yyyy).xz;\n" " Temp_int[2].xyz = floatBitsToInt(-intBitsToFloat(Temp_int[3]).xyzx + intBitsToFloat(Temp_int[4]).xyzx).xyz;\n" " Temp_int[0].y = floatBitsToInt(vec4(dot((intBitsToFloat(Temp_int[2]).xyzx).xyz, (intBitsToFloat(Temp_int[2]).xyzx).xyz)).y);\n" " Temp_int[4].z = ivec4(int(0x0)).z;\n" " Temp_int[4].x = ivec4(floatBitsToInt(-g_c0_1.xxxy.w)).x;\n" " Temp_int[0].xz = floatBitsToInt(vec4(intBitsToFloat(Temp_int[0]).xxwx * g_c0_1.xxxy.wwzw + intBitsToFloat(Temp_int[4]).xxzx)).xz;\n" " Temp_int[4].xy = floatBitsToInt(vec4(g_c0_2.xyxx.yxyy * intBitsToFloat(Temp_int[0]).xzxx + g_c0_3.xxxy.wzww)).xy;\n" " Temp_int[0].z = floatBitsToInt(textureLod(g_t0, intBitsToFloat(Temp_int[0]).zx, 0.000000).ywxz).z;\n" " Temp_int[0].xw = floatBitsToInt(intBitsToFloat(Temp_int[0]).zzzz * intBitsToFloat(Temp_int[4]).xxxy).xw;\n" " Temp_int[0].xzw = floatBitsToInt(-intBitsToFloat(Temp_int[0]).xxzw + intBitsToFloat(Temp_int[3]).xxyz).xzw;\n" " Temp_int[1].w = floatBitsToInt(vec4(dot((intBitsToFloat(Temp_int[0]).xzwx).xyz, (intBitsToFloat(Temp_int[0]).xzwx).xyz)).w);\n" " Temp_int[0].y = floatBitsToInt(((intBitsToFloat(Temp_int[0]).y)< (intBitsToFloat(Temp_int[1]).w)) ? int(0xFFFFFFFF) : 0);\n" " if(ivec4(Temp_int[0].yyyy).x != 0) {\n" " Temp_int[0].xyz = ivec4(Temp_int[2].xyzx).xyz;\n" " } else {\n" " Temp_int[0].xyz = ivec4(Temp_int[0].xzwx).xyz;\n" " }\n" " Temp_int[2].xyz = floatBitsToInt(intBitsToFloat(Temp_int[0]).xyzx * intBitsToFloat(Temp_int[1]).xyzx).xyz;\n" " Temp_int[0].xyz = floatBitsToInt(vec4(intBitsToFloat(Temp_int[1]).zxyz * intBitsToFloat(Temp_int[0]).yzxy + -intBitsToFloat(Temp_int[2]).xyzx)).xyz;\n" " Temp_int[0].w = floatBitsToInt(vec4(dot((intBitsToFloat(Temp_int[0]).xyzx).xyz, (intBitsToFloat(Temp_int[0]).xyzx).xyz)).w);\n" " Temp_int[0].w = floatBitsToInt(vec4(inversesqrt(intBitsToFloat(Temp_int[0]).w)).w);\n" " Temp_int[0].xyz = floatBitsToInt(intBitsToFloat(Temp_int[0]).wwww * intBitsToFloat(Temp_int[0]).xyzx).xyz;\n" " Output0.xyz = vec4(intBitsToFloat(-Temp_int[0].xyzx)).xyz;\n" " Output0.w = vec4(intBitsToFloat(int(0x0))).w;\n" " return;\n" "}\n"

I am thikning that the problem is that HLSLcc is casting the 0xFFFFFFFF uint to an int at this line, whereas it should really be casting to uint in this case (and was doing that correctly before):

" Temp_int[0].y = floatBitsToInt(((intBitsToFloat(Temp_int[0]).y)< (intBitsToFloat(Temp_int[0]).z)) ? int(0xFFFFFFFF) : 0);\n"

James-Jones commented 10 years ago

That is a bad instruction. Then the comparison passes it would evaluate to int = floatBitsToInt(0xFFFFFFFF) which is a NaN float when later converted back to float. It should be int = 0xFFFFFFFF; Fix pushed.

lbavoil commented 10 years ago

Using the latest version from main, the bit-cast register change is generating invalid GLSL on another shader.

Before bit-cast registers were added, the GLSL for this particular shader contained this instruction:

Temp[2].yz = vec4(-g_c0_12 + g_c0_13).yz;

And now, HLSLcc is generating this:

Temp_int[2].yz = floatBitsToInt(-g_c0_12 + g_c0_13).yz;

I am now getting these 2 GLSL compiler errors on this line:

Error: Failed to compile GLSL shader 0(56) : error C7505: OpenGL does not allow swizzles on scalar expressions 0(56) : error C1031: swizzle mask element not present in operand "yz"

I think it is invalid because "g_c0_12" and "g_c0_13" are scalars. (They are floats stored in a UBO.) The old HLSLcc was converting these scalars to a vec4() before taking the .yz components. This vec4() is now missing in the new HLSLcc.

vk2gpu commented 9 years ago

Has anyone been looking into this? I'm having issues using this to target WebGL at the moment since there is no GL_ARB_shader_bit_encoding to work with. For targetting GLSL ES 1.00 I'm using the 4_0_level_9_3 targets, do the same SM4+ typeless register rules apply here, or perhaps should level_9_x be handled differently when casting int to float?

There also seems to be a few other strangenesses with it:

Interpreting a float to int cast as a bitcast when it should be casting 2.0f to 2, not as bits. Source HLSL just being "int(BoneIndex.x)" or similar.
Literal float values being replaced with "intBitstoFloat(0x0)" or "intBitstoFloat(0x3F800000)" unnecessarily.

For the mean time I may just drop back to prior to this change, and fix up anything else by carefully writing the source HLSL in such a way to avoid other issues I've spotted but are logical given the input HLSL bytecode (bitshifting instead of multiplying for example, unsupported in GLSL ES 1.00)

James-Jones / HLSLCrossCompiler

Add HLSLcc flag to let users disable bit-cast temp registers #23