Open mborgerson opened 3 years ago
I was able to reproduce this on my ThinkPad t440p, which has a i5-4300M with an HD 4600. I tested it with Halo 2 - it worked fine on Linux with the Mesa drivers, but the Windows 10 drivers resulted in the exact same error as above when it's about to enter the menu. Here's the glCapsViewer report for my GPU on Windows (raw XML attached below): http://opengl.gpuinfo.org/displayreport.php?id=1594
I enabled the NV2A debug printing, and dumped the shaders that seem to cause the error. I attached them below. The shaders themselves seem to be fine - not only do they apparently work on the same hardware with different drivers, but they also build fine with glslangValidator (glslangValidator -G --aml fragment.frag geometry.geom vertex.vert
). At least with Halo 2, when the error appears, it's the first time a geometry shader is used.
My guess is that this is a driver bug - it seems like Intel had issues with shaders before on the drivers for the 4000 series.
```glsl #version 330 uniform vec2 clipRange; uniform vec2 surfaceSize; uniform vec4 c[192]; uniform vec4 fogColor; uniform float fogParam[2]; #define fogPlane c[0x39] #define texMat0 mat4(c[0x44], c[0x44+1], c[0x44+2], c[0x44+3]) #define texMat1 mat4(c[0x4c], c[0x4c+1], c[0x4c+2], c[0x4c+3]) #define texMat2 mat4(c[0x54], c[0x54+1], c[0x54+2], c[0x54+3]) #define texMat3 mat4(c[0x5c], c[0x5c+1], c[0x5c+2], c[0x5c+3]) vec4 oPos = vec4(0.0,0.0,0.0,1.0); vec4 oD0 = vec4(0.0,0.0,0.0,1.0); vec4 oD1 = vec4(0.0,0.0,0.0,1.0); vec4 oB0 = vec4(0.0,0.0,0.0,1.0); vec4 oB1 = vec4(0.0,0.0,0.0,1.0); vec4 oPts = vec4(0.0,0.0,0.0,1.0); vec4 oFog = vec4(1.0,0.0,0.0,1.0); vec4 oT0 = vec4(0.0,0.0,0.0,1.0); vec4 oT1 = vec4(0.0,0.0,0.0,1.0); vec4 oT2 = vec4(0.0,0.0,0.0,1.0); vec4 oT3 = vec4(0.0,0.0,0.0,1.0); struct VertexData { float inv_w; vec4 D0; vec4 D1; vec4 B0; vec4 B1; float Fog; vec4 T0; vec4 T1; vec4 T2; vec4 T3; }; noperspective out VertexData v_vtx; #define vtx v_vtx in vec4 v0; in vec4 v1; in vec4 v2; in vec4 v3; in vec4 v4; in vec4 v5; in vec4 v6; in vec4 v7; in vec4 v8; in vec4 v9; in vec4 v10; in vec4 v11; in vec4 v12; in vec4 v13; in vec4 v14; in vec4 v15; int A0 = 0; vec4 R0 = vec4(0.0,0.0,0.0,0.0); vec4 R1 = vec4(0.0,0.0,0.0,0.0); vec4 R2 = vec4(0.0,0.0,0.0,0.0); vec4 R3 = vec4(0.0,0.0,0.0,0.0); vec4 R4 = vec4(0.0,0.0,0.0,0.0); vec4 R5 = vec4(0.0,0.0,0.0,0.0); vec4 R6 = vec4(0.0,0.0,0.0,0.0); vec4 R7 = vec4(0.0,0.0,0.0,0.0); vec4 R8 = vec4(0.0,0.0,0.0,0.0); vec4 R9 = vec4(0.0,0.0,0.0,0.0); vec4 R10 = vec4(0.0,0.0,0.0,0.0); vec4 R11 = vec4(0.0,0.0,0.0,0.0); #define R12 oPos /* Converts the input to vec4, pads with last component */ vec4 _in(float v) { return vec4(v); } vec4 _in(vec2 v) { return v.xyyy; } vec4 _in(vec3 v) { return v.xyzz; } vec4 _in(vec4 v) { return v.xyzw; } #define INFINITY (1.0 / 0.0) #define MOV(dest, mask, src) dest.mask = _MOV(_in(src)).mask vec4 _MOV(vec4 src) { return src; } #define MUL(dest, mask, src0, src1) dest.mask = _MUL(_in(src0), _in(src1)).mask vec4 _MUL(vec4 src0, vec4 src1) { return src0 * src1; } #define ADD(dest, mask, src0, src1) dest.mask = _ADD(_in(src0), _in(src1)).mask vec4 _ADD(vec4 src0, vec4 src1) { return src0 + src1; } #define MAD(dest, mask, src0, src1, src2) dest.mask = _MAD(_in(src0), _in(src1), _in(src2)).mask vec4 _MAD(vec4 src0, vec4 src1, vec4 src2) { return src0 * src1 + src2; } #define DP3(dest, mask, src0, src1) dest.mask = _DP3(_in(src0), _in(src1)).mask vec4 _DP3(vec4 src0, vec4 src1) { return vec4(dot(src0.xyz, src1.xyz)); } #define DPH(dest, mask, src0, src1) dest.mask = _DPH(_in(src0), _in(src1)).mask vec4 _DPH(vec4 src0, vec4 src1) { return vec4(dot(vec4(src0.xyz, 1.0), src1)); } #define DP4(dest, mask, src0, src1) dest.mask = _DP4(_in(src0), _in(src1)).mask vec4 _DP4(vec4 src0, vec4 src1) { return vec4(dot(src0, src1)); } #define DST(dest, mask, src0, src1) dest.mask = _DST(_in(src0), _in(src1)).mask vec4 _DST(vec4 src0, vec4 src1) { return vec4(1.0, src0.y * src1.y, src0.z, src1.w); } #define MIN(dest, mask, src0, src1) dest.mask = _MIN(_in(src0), _in(src1)).mask vec4 _MIN(vec4 src0, vec4 src1) { return min(src0, src1); } #define MAX(dest, mask, src0, src1) dest.mask = _MAX(_in(src0), _in(src1)).mask vec4 _MAX(vec4 src0, vec4 src1) { return max(src0, src1); } #define SLT(dest, mask, src0, src1) dest.mask = _SLT(_in(src0), _in(src1)).mask vec4 _SLT(vec4 src0, vec4 src1) { return vec4(lessThan(src0, src1)); } #define ARL(dest, src) dest = _ARL(_in(src).x) int _ARL(float src) { /* Xbox GPU does specify rounding, OpenGL doesn't; so we need a bias. * Example: We probably want to floor 16.99.. to 17, not 16. * Source of error (why we get 16.99.. instead of 17.0) is typically * vertex-attributes being normalized from a byte value to float: * 17 / 255 = 0.06666.. so is this 0.06667 (ceil) or 0.06666 (floor)? * Which value we get depends on the host GPU. * If we multiply these rounded values by 255 later, we get: * 17.00 (ARL result = 17) or 16.99 (ARL result = 16). * We assume the intend was to get 17, so we add our bias to fix it. */ return int(floor(src + 0.001)); } #define SGE(dest, mask, src0, src1) dest.mask = _SGE(_in(src0), _in(src1)).mask vec4 _SGE(vec4 src0, vec4 src1) { return vec4(greaterThanEqual(src0, src1)); } #define RCP(dest, mask, src) dest.mask = _RCP(_in(src).x).mask vec4 _RCP(float src) { return vec4(1.0 / src); } #define RCC(dest, mask, src) dest.mask = _RCC(_in(src).x).mask vec4 _RCC(float src) { float t = 1.0 / src; if (t > 0.0) { t = clamp(t, 5.42101e-020, 1.884467e+019); } else { t = clamp(t, -1.884467e+019, -5.42101e-020); } return vec4(t); } #define RSQ(dest, mask, src) dest.mask = _RSQ(_in(src).x).mask vec4 _RSQ(float src) { if (src == 0.0) { return vec4(INFINITY); } if (isinf(src)) { return vec4(0.0); } return vec4(inversesqrt(abs(src))); } #define EXP(dest, mask, src) dest.mask = _EXP(_in(src).x).mask vec4 _EXP(float src) { vec4 result; result.x = exp2(floor(src)); result.y = src - floor(src); result.z = exp2(src); result.w = 1.0; return result; } #define LOG(dest, mask, src) dest.mask = _LOG(_in(src).x).mask vec4 _LOG(float src) { float tmp = abs(src); if (tmp == 0.0) { return vec4(-INFINITY, 1.0f, -INFINITY, 1.0f); } vec4 result; result.x = floor(log2(tmp)); result.y = tmp / exp2(floor(log2(tmp))); result.z = log2(tmp); result.w = 1.0; return result; } #define LIT(dest, mask, src) dest.mask = _LIT(_in(src)).mask vec4 _LIT(vec4 src) { vec4 s = src; float epsilon = 1.0 / 256.0; s.w = clamp(s.w, -(128.0 - epsilon), 128.0 - epsilon); s.x = max(s.x, 0.0); s.y = max(s.y, 0.0); vec4 t = vec4(1.0, 0.0, 0.0, 1.0); t.y = s.x; t.z = (s.x > 0.0) ? exp2(s.w * log2(s.y)) : 0.0; return t; } void main() { /* Slot 0: 0x00000000 0x0056E000 0x7C2A1000 0x2CA00000 */ MUL(R10,xy, c[183].x, v0.xy); /* Slot 1: 0x00000000 0x0056E0AA 0x7C021000 0x23A00000 */ MUL(R10,zw, c[183].z, v0.xxxy); /* Slot 2: 0x00000000 0x00570000 0x8C2A1000 0x2CB00000 */ MUL(R11,xy, c[184].x, v0.xy); /* Slot 3: 0x00000000 0x0096E215 0x18AAF856 0x9CA00000 */ MAD(R10,xy, v1.xy, c[183].y, R10.xy); /* Slot 4: 0x00000000 0x0096E201 0x19FEFAAE 0x93A00000 */ MAD(R10,zw, v1.xxxy, c[183].w, R10.zzzw); /* Slot 5: 0x00000000 0x00970215 0x18AB1856 0xDCB00000 */ MAD(R11,xy, v1.xy, c[184].y, R11.xy); /* Slot 6: 0x00000000 0x00D6201B 0x08363800 0x20B08800 */ DPH(oPos,x, v0, c[177]); /* Slot 7: 0x00000000 0x00D6401B 0x08365800 0x20B04800 */ DPH(oPos,y, v0, c[178]); /* Slot 8: 0x00000000 0x00D6601B 0x08367800 0x20B02800 */ DPH(oPos,z, v0, c[179]); /* Slot 9: 0x00000000 0x00D6801B 0x08369800 0x20B01800 */ DPH(oPos,w, v0, c[180]); /* Slot 10: 0x00000000 0x02574415 0xA42B586C 0xACA0F81C */ MUL(R10,xy, R10.xy, c[186].xy); MOV(oD0,xyzw, v2); /* Slot 11: 0x00000000 0x065740AB 0xA5575BFF 0x13A10000 */ MUL(R10,zw, R10.zzzw, c[186].zzzw); RCC(R1,w, R12.w); /* Slot 12: 0x00000000 0x00576015 0xB42B7800 0x2CB00000 */ MUL(R11,xy, R11.xy, c[187].xy); /* Slot 13: 0x00000000 0x00770015 0xA40012FE 0x3CA00000 */ ADD(R10,xy, R10.xy, c[184].zw); /* Slot 14: 0x00000000 0x007720AB 0xA4001006 0x73A00000 */ ADD(R10,zw, R10.zzzw, c[185].xxxy); /* Slot 15: 0x00000000 0x00772015 0xB40012FE 0x7CB00000 */ ADD(R11,xy, R11.xy, c[185].zw); /* Slot 16: 0x00000000 0x0041401A 0xC4355800 0x20B0E800 */ MUL(oPos,xyz, R12.xyz, c[10].xyz); /* Slot 17: 0x00000000 0x0056A015 0xA42AB800 0x2090C848 */ MUL(oT0,xy, R10.xy, c[181].xy); /* Slot 18: 0x00000000 0x0056C0BF 0xA42AD800 0x20A0C850 */ MUL(oT1,xy, R10.zw, c[182].xy); /* Slot 19: 0x00000000 0x0056C015 0xB57ED800 0x20A0C858 */ MUL(oT2,xy, R11.xy, c[182].zw); /* Slot 20: 0x00000000 0x0081601A 0xC5FE286A 0xF0B0E801 */ MAD(oPos,xyz, R12.xyz, R1.w, c[11].xyz); if (oPos.w == 0.0 || isinf(oPos.w)) { vtx.inv_w = 1.0; } else { vtx.inv_w = 1.0 / oPos.w; } oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x; oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y; if (clipRange.y != clipRange.x) { oPos.z = (oPos.z - 0.5 * (clipRange.x + clipRange.y)) / (0.5 * (clipRange.y - clipRange.x)); } if (oPos.w < 0.0) { oPos.xyz *= oPos.w; } else { oPos.w = 1.0; } float fogDistance = oFog.x; float fogFactor = fogParam[0] + fogDistance * fogParam[1]; fogFactor -= 1.0; oFog.xyzw = vec4(fogFactor); vtx.D0 = clamp(oD0, 0.0, 1.0) * vtx.inv_w; vtx.D1 = clamp(oD1, 0.0, 1.0) * vtx.inv_w; vtx.B0 = clamp(oB0, 0.0, 1.0) * vtx.inv_w; vtx.B1 = clamp(oB1, 0.0, 1.0) * vtx.inv_w; vtx.Fog = oFog.x * vtx.inv_w; vtx.T0 = oT0 * vtx.inv_w; vtx.T1 = oT1 * vtx.inv_w; vtx.T2 = oT2 * vtx.inv_w; vtx.T3 = oT3 * vtx.inv_w; gl_Position = oPos; gl_PointSize = oPts.x; } ```
```glsl #version 330 layout(lines_adjacency) in; layout(triangle_strip, max_vertices = 4) out; struct VertexData { float inv_w; vec4 D0; vec4 D1; vec4 B0; vec4 B1; float Fog; vec4 T0; vec4 T1; vec4 T2; vec4 T3; }; noperspective in VertexData v_vtx[]; noperspective out VertexData g_vtx; void emit_vertex(int index) { gl_Position = gl_in[index].gl_Position; gl_PointSize = gl_in[index].gl_PointSize; g_vtx = v_vtx[index]; EmitVertex(); } void main() { emit_vertex(0); emit_vertex(1); emit_vertex(3); emit_vertex(2); EndPrimitive(); } ```
```glsl #version 330 struct VertexData { float inv_w; vec4 D0; vec4 D1; vec4 B0; vec4 B1; float Fog; vec4 T0; vec4 T1; vec4 T2; vec4 T3; }; noperspective in VertexData g_vtx; #define vtx g_vtx out vec4 fragColor; uniform vec4 fogColor; float sign1(float x) { x *= 255.0; return (x-128.0)/127.0; } float sign2(float x) { x *= 255.0; if (x >= 128.0) return (x-255.5)/127.5; else return (x+0.5)/127.5; } float sign3(float x) { x *= 255.0; if (x >= 128.0) return (x-256.0)/127.0; else return (x)/127.0; } float sign3_to_0_to_1(float x) { if (x >= 0) return x/2; else return 1+x/2; } vec3 dotmap_zero_to_one(vec3 col) { return col; } vec3 dotmap_minus1_to_1_d3d(vec3 col) { return vec3(sign1(col.r),sign1(col.g),sign1(col.b)); } vec3 dotmap_minus1_to_1_gl(vec3 col) { return vec3(sign2(col.r),sign2(col.g),sign2(col.b)); } vec3 dotmap_minus1_to_1(vec3 col) { return vec3(sign3(col.r),sign3(col.g),sign3(col.b)); } vec3 dotmap_hilo_1(vec3 col) { return col; } vec3 dotmap_hilo_hemisphere_d3d(vec3 col) { return col; } vec3 dotmap_hilo_hemisphere_gl(vec3 col) { return col; } vec3 dotmap_hilo_hemisphere(vec3 col) { return col; } const float[9] gaussian3x3 = float[9]( 1.0/16.0, 2.0/16.0, 1.0/16.0, 2.0/16.0, 4.0/16.0, 2.0/16.0, 1.0/16.0, 2.0/16.0, 1.0/16.0); const vec2[9] convolution3x3 = vec2[9]( vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0), vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0), vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0)); vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) { vec4 sum = vec4(0.0); for (int i = 0; i < 9; i++) { sum += gaussian3x3[i]*textureProj(sampler, texCoord + vec3(convolution3x3[i], 0.0)); } return sum; } uniform ivec4 clipRegion[1]; uniform sampler2D texSamp0; uniform vec4 c0_0; uniform vec4 c1_0; uniform vec4 c0_1; void main() { /* Window-clip (Inclusive) */ bool clipContained = false; for (int i = 0; i < 1; i++) { bvec4 clipTest = bvec4(lessThan(gl_FragCoord.xy, clipRegion[i].xy), greaterThan(gl_FragCoord.xy, clipRegion[i].zw)); if (!any(clipTest)) { clipContained = true; break; } } if (!clipContained) { discard; } vec4 pD0 = vtx.D0 / vtx.inv_w; vec4 pD1 = vtx.D1 / vtx.inv_w; vec4 pB0 = vtx.B0 / vtx.inv_w; vec4 pB1 = vtx.B1 / vtx.inv_w; vec4 pFog = vec4(fogColor.rgb, clamp(vtx.Fog / vtx.inv_w, 0.0, 1.0)); vec4 pT0 = vtx.T0 / vtx.inv_w; vec4 pT1 = vtx.T1 / vtx.inv_w; vec4 pT2 = vtx.T2 / vtx.inv_w; vec4 pT3 = vtx.T3 / vtx.inv_w; vec4 v0 = pD0; vec4 v1 = pD1; vec4 t0 = textureProj(texSamp0, pT0.xyw); vec4 t1 = vec4(0.0); /* PS_TEXTUREMODES_NONE */ vec4 t2 = vec4(0.0); /* PS_TEXTUREMODES_NONE */ vec4 t3 = vec4(0.0); /* PS_TEXTUREMODES_NONE */ vec4 r0; r0.a = t0.a; // Stage 0 t0.rgb = clamp(vec3((max(t0.rgb, 0.0) * max(c0_0.rgb, 0.0))), -1.0, 1.0); t1.rgb = clamp(vec3((max(t1.rgb, 0.0) * max(c1_0.rgb, 0.0))), -1.0, 1.0); t0.a = clamp(((max(t0.a, 0.0) * max(c0_0.a, 0.0))), -1.0, 1.0); t1.a = clamp(((max(t1.a, 0.0) * max(c1_0.a, 0.0))), -1.0, 1.0); // Stage 1 t2.rgb = clamp(vec3((max(t2.rgb, 0.0) * max(c0_1.rgb, 0.0))), -1.0, 1.0); r0.rgb = clamp(vec3((max(t0.rgb, 0.0) * max(v0.rgb, 0.0))), -1.0, 1.0); t2.a = clamp(((max(t2.a, 0.0) * max(c0_1.a, 0.0))), -1.0, 1.0); r0.a = clamp(((max(t0.a, 0.0) * max(v0.a, 0.0))), -1.0, 1.0); // Final Combiner fragColor.rgb = max(r0.rgb, 0.0) + mix(vec3(max(vec4(0.0).rgb, 0.0)), vec3(max(vec4(0.0).rgb, 0.0)), vec3(max(vec4(0.0).rgb, 0.0))); fragColor.a = max(r0.a, 0.0); } ```
```xml
Cannot reproduce on macOS with Intel HD 4000. Appears isolated to Windows.
"gl_renderer": "Intel HD Graphics 4000 OpenGL Engine",
"gl_shading_language_version": "4.10",
"gl_vendor": "Intel Inc.",
"gl_version": "4.1 INTEL-14.7.17",
"os_platform": "macOS",
"os_version": "Version 10.15.7 (Build 19H1217)",
Unfortunately the issue persists even after #353, xemu.log
from Halo 2 below. System is the same as above, t440p with an HD4600 on Windows 10 Pro 20H2 build 19042.985 with the 20.19.15.4531 driver.
``` Created QEMU launch parameters: C:\Users\****\Downloads\xemu-win-debug\xemu.exe -cpu pentium3 -machine xbox,bootrom=C:\Users\****\Desktop\xemu-files\mcpx_1.0.bin,short-animation=on,kernel-irqchip=off -device smbus-storage,file=C:\Users\****\AppData\Roaming\xemu\xemu\eeprom.bin -bios C:\Users\****\Desktop\xemu-files\Complex_4627.bin -m 64 -drive index=0,media=disk,file=C:\Users\****\Desktop\xemu-files\xbox_original.qcow2,locked=on -drive index=1,media=cdrom,file= -display xemu -audiodev none,id=snd0 xemu_version: 0.5.4-7-ga7f3f8f212 xemu_branch: master xemu_commit: a7f3f8f21221174efb65fdc0b8f7e2dd234c7412 xemu_date: Thu Jun 17 11:21:55 UTC 2021 GL_VENDOR: Intel GL_RENDERER: Intel(R) HD Graphics 4600 GL_VERSION: 4.0.0 - Build 20.19.15.4531 GL_SHADING_LANGUAGE_VERSION: 4.00 - Build 20.19.15.4531 xemu_settings_get_path: config path: C:\Users\****\AppData\Roaming\xemu\xemu\xemu.ini config_parse_callback: [system] flash_path = C:\Users\****\Desktop\xemu-files\Complex_4627.bin config_parse_callback: [system] bootrom_path = C:\Users\****\Desktop\xemu-files\mcpx_1.0.bin config_parse_callback: [system] hdd_path = C:\Users\****\Desktop\xemu-files\xbox_original.qcow2 config_parse_callback: [system] eeprom_path = C:\Users\****\AppData\Roaming\xemu\xemu\eeprom.bin config_parse_callback: [system] dvd_path = C:\Users\****\Desktop\xemu-files\Halo 2.iso config_parse_callback: [system] memory = 64 config_parse_callback: [system] shortanim = true config_parse_callback: [system] hard_fpu = true config_parse_callback: [audio] use_dsp = false config_parse_callback: [display] scale = scale config_parse_callback: [display] ui_scale = 1 config_parse_callback: [input] controller_1_guid = 030000005e0400008e02000000007801 config_parse_callback: [input] controller_2_guid = config_parse_callback: [input] controller_3_guid = config_parse_callback: [input] controller_4_guid = config_parse_callback: [network] enabled = false config_parse_callback: [network] backend = user config_parse_callback: [network] local_addr = 0.0.0.0:9368 config_parse_callback: [network] remote_addr = 1.2.3.4:9368 config_parse_callback: [network] pcap_iface = config_parse_callback: [misc] user_token = config_parse_callback: [misc] check_for_update = false audio: Device ac97: audiodev default parameter is deprecated, please specify audiodev=snd0 nv2a: shader linking failed: Type mismatch: Type of _O;v_vtx;inv_w different between shaders. Type mismatch: Type of _O;v_vtx;D0 different between shaders. Type mismatch: Type of _O;v_vtx;D1 different between shaders. Type mismatch: Type of _O;v_vtx;B0 different between shaders. Type mismatch: Type of _O;v_vtx;B1 different between shaders. Type mismatch: Type of _O;v_vtx;Fog different between shaders. Type mismatch: Type of _O;v_vtx;T0 different between shaders. Type mismatch: Type of _O;v_vtx;T1 different between shaders. Type mismatch: Type of _O;v_vtx;T2 different between shaders. Type mismatch: Type of _O;v_vtx;T3 different between shaders. Out of resource error. Resetting rate control (87106 samples) ```
HD Graphics (Broadwell) (BDW GT1) Also suffers from this issue
System Specs
xemu Version:
Error:
Titles: 4d530004