KhronosGroup / WebGL

The Official Khronos WebGL Repository
Other
2.66k stars 669 forks source link

NVIDIA graphics calculates texture samples in branches that are not possible #3102

Open ffcr opened 4 years ago

ffcr commented 4 years ago

In the glsl of webgl or webgl2, if there is texture sampling statement in the branch, even if it is impossible to execute, it will sample and predict, which will greatly increase the GPU occupancy rate. If there is for(;;){...} in the branch, NVIDIA will stop running. But if it is running in the browser of mobile phone or integrated graphics card, it will not have this problem.

ffcr commented 4 years ago

Look here https://www.shadertoy.com/view/WlXcRs

aleino-nv commented 4 years ago

Interesting, thanks for reporting this!

I was able to reproduce on GeForce GTX 1060 with driver 451.48 on Windows. Chrome version "Version 84.0.4147.105 (Official Build) (64-bit)". That means the backend being used is D3D11.

I'll investigate this further, and also check what happens with OpenGL backend.

aleino-nv commented 4 years ago

No repro with OpenGL backend (launch chrome with --use-angle=gl option). Otherwise same setup as in previous comment. So to me it seems D3D11-specific. I'll investigate the issue on D3D11 first.

What OS and browser did you use when seeing this issue with NVIDIA GPUs?

ffcr commented 4 years ago

Windows7 x64 GeForce GTX 950M Both chrome 85.0.4168.3 and Firefox 79 have this problem When I changed ANGLE graphics backend from default to OpenGL, there was no such problem in chrome

aleino-nv commented 4 years ago

Dump of HLSL generated by ANGLE from in https://www.shadertoy.com/view/WlXcRs with TEST=0, at Chrome from revision 53956752fa92d8ec1f11fd336741064ef4830b44:

struct PS_INPUT
{
    float4 dx_Position : SV_Position;
    float4 gl_Position : TEXCOORD0;
    float4 gl_FragCoord : TEXCOORD1;
};

// GLSL
//
// #version 300 es
// #ifdef GL_ES
// precision highp float;
// precision highp int;
// precision mediump sampler3D;
// #endif
// #define HW_PERFORMANCE 1
// uniform vec3      iResolution;
// uniform float     iTime;
// uniform float     iChannelTime[4];
// uniform vec4      iMouse;
// uniform vec4      iDate;
// uniform float     iSampleRate;
// uniform vec3      iChannelResolution[4];
// uniform int       iFrame;
// uniform float     iTimeDelta;
// uniform float     iFrameRate;
// uniform sampler2D iChannel0;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh0;
// uniform sampler2D iChannel1;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh1;
// uniform sampler2D iChannel2;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh2;
// uniform sampler2D iChannel3;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh3;
// void mainImage( out vec4 c,  in vec2 f );
// /*
//
//
//
//
//
//
//
//
// */
// #define TEST 0
// #define EXECUTE false
// #define IF(a) do{if(!(a))break;
// #define ENDIF }while(false);
// void mainImage( out vec4 fragColor, in vec2 fragCoord )
// {
//
//     vec2 uv = fragCoord/iResolution.xy;
//
//
//     vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
//
//     bool b=iTime<0.0;
//     if(EXECUTE)b=!b;
//     #if TEST==0
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }
//     #elif TEST==1
//     do{
//         if(!b)break;
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }while(false);
//     #elif TEST==2
//     IF(b)
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     ENDIF
//     #else
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texelFetch(iChannel0,ivec2(fragCoord+a),0).xyz;
//         }
//     }
//     #endif
//
//
//
//     fragColor = vec4(col,1.0);
// }
// out vec4 outColor;
//
// void main( void ){vec4 color = vec4(0.0,0.0,0.0,1.0);mainImage( color, gl_FragCoord.xy );color.w = 1.0;outColor = color;}

#pragma warning( disable: 3556 3571 )
float4 vec4_ctor(float3 x0, float x1)
{
    return float4(x0, x1);
}
// Uniforms

uniform float3 _iResolution : register(c0);
uniform float _iTime : register(c1);
static const uint _iChannel0 = 0;
uniform Texture2D<float4> textures2D[1] : register(t0);
uniform SamplerState samplers2D[1] : register(s0);
#ifdef ANGLE_ENABLE_LOOP_FLATTEN
#define LOOP [loop]
#define FLATTEN [flatten]
#else
#define LOOP
#define FLATTEN
#endif

#define ATOMIC_COUNTER_ARRAY_STRIDE 4

// Varyings

static float4 out_outColor = {0, 0, 0, 0};
static float4 gl_FragCoord = float4(0, 0, 0, 0);

cbuffer DriverConstants : register(b1)
{
    float4 dx_ViewCoords : packoffset(c1);
    float3 dx_DepthFront : packoffset(c2);
    float2 dx_ViewScale : packoffset(c3);
    struct SamplerMetadata
    {
        int baseLevel;
        int internalFormatBits;
        int wrapModes;
        int padding;
        int4 intBorderColor;
    };
    SamplerMetadata samplerMetadata[1] : packoffset(c4);
};

float4 gl_texture2D(uint samplerIndex, float2 t)
{
    return textures2D[samplerIndex].Sample(samplers2D[samplerIndex], float2(t.x, t.y));
}

#define GL_USES_FRAG_COORD
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord);
;
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord)
{
    float2 _uv2358 = (_fragCoord / _iResolution.xy);
    float3 _col2359 = (0.5 + (0.5 * cos(((_iTime + _uv2358.xyx) + float3(0.0, 2.0, 4.0)))));
    bool _b2360 = (_iTime < 0.0);
    FLATTEN if (_b2360)
    {
        {LOOP for(float _a2361 = {0.0}; (_a2361 < 2000.0); (_a2361++))
            {
                (_col2359 += gl_texture2D(_iChannel0, (_uv2358 + _a2361)).xyz);
            }
        }
    }
    (_fragColor = vec4_ctor(_col2359, 1.0));
}
struct PS_OUTPUT
{
    float4 out_outColor : SV_TARGET0;
};

PS_OUTPUT generateOutput()
{
    PS_OUTPUT output;
    output.out_outColor = out_outColor;
    return output;
}

PS_OUTPUT main(PS_INPUT input){
    float rhw = 1.0 / input.gl_FragCoord.w;
    gl_FragCoord.x = input.dx_Position.x;
    gl_FragCoord.y = input.dx_Position.y;
    gl_FragCoord.z = (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + dx_DepthFront.y;
    gl_FragCoord.w = rhw;

    float4 _color2364 = {0.0, 0.0, 0.0, 1.0};
    f_mainImage_float4(_color2364, gl_FragCoord.xy);
    (_color2364.w = 1.0);
    (out_outColor = _color2364);
    return generateOutput();
}
aleino-nv commented 4 years ago

Dump of HLSL generated by ANGLE from in https://www.shadertoy.com/view/WlXcRs with TEST=1, at Chrome from revision 53956752fa92d8ec1f11fd336741064ef4830b44:

struct PS_INPUT
{
    float4 dx_Position : SV_Position;
    float4 gl_Position : TEXCOORD0;
    float4 gl_FragCoord : TEXCOORD1;
};

// GLSL
//
// #version 300 es
// #ifdef GL_ES
// precision highp float;
// precision highp int;
// precision mediump sampler3D;
// #endif
// #define HW_PERFORMANCE 1
// uniform vec3      iResolution;
// uniform float     iTime;
// uniform float     iChannelTime[4];
// uniform vec4      iMouse;
// uniform vec4      iDate;
// uniform float     iSampleRate;
// uniform vec3      iChannelResolution[4];
// uniform int       iFrame;
// uniform float     iTimeDelta;
// uniform float     iFrameRate;
// uniform sampler2D iChannel0;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh0;
// uniform sampler2D iChannel1;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh1;
// uniform sampler2D iChannel2;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh2;
// uniform sampler2D iChannel3;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh3;
// void mainImage( out vec4 c,  in vec2 f );
// /*
//
//
//
//
//
//
//
//
// */
// #define TEST 1
// #define EXECUTE false
// #define IF(a) do{if(!(a))break;
// #define ENDIF }while(false);
// void mainImage( out vec4 fragColor, in vec2 fragCoord )
// {
//
//     vec2 uv = fragCoord/iResolution.xy;
//
//
//     vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
//
//     bool b=iTime<0.0;
//     if(EXECUTE)b=!b;
//     #if TEST==0
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }
//     #elif TEST==1
//     do{
//         if(!b)break;
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }while(false);
//     #elif TEST==2
//     IF(b)
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     ENDIF
//     #else
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texelFetch(iChannel0,ivec2(fragCoord+a),0).xyz;
//         }
//     }
//     #endif
//
//
//
//     fragColor = vec4(col,1.0);
// }
// out vec4 outColor;
//
// void main( void ){vec4 color = vec4(0.0,0.0,0.0,1.0);mainImage( color, gl_FragCoord.xy );color.w = 1.0;outColor = color;}

#pragma warning( disable: 3556 3571 )
float4 vec4_ctor(float3 x0, float x1)
{
    return float4(x0, x1);
}
// Uniforms

uniform float3 _iResolution : register(c0);
uniform float _iTime : register(c1);
static const uint _iChannel0 = 0;
uniform Texture2D<float4> textures2D[1] : register(t0);
uniform SamplerState samplers2D[1] : register(s0);
#ifdef ANGLE_ENABLE_LOOP_FLATTEN
#define LOOP [loop]
#define FLATTEN [flatten]
#else
#define LOOP
#define FLATTEN
#endif

#define ATOMIC_COUNTER_ARRAY_STRIDE 4

// Varyings

static float4 out_outColor = {0, 0, 0, 0};
static float4 gl_FragCoord = float4(0, 0, 0, 0);

cbuffer DriverConstants : register(b1)
{
    float4 dx_ViewCoords : packoffset(c1);
    float3 dx_DepthFront : packoffset(c2);
    float2 dx_ViewScale : packoffset(c3);
    struct SamplerMetadata
    {
        int baseLevel;
        int internalFormatBits;
        int wrapModes;
        int padding;
        int4 intBorderColor;
    };
    SamplerMetadata samplerMetadata[1] : packoffset(c4);
};

float4 gl_texture2DLod0(uint samplerIndex, float2 t)
{
    return textures2D[samplerIndex].SampleLevel(samplers2D[samplerIndex], float2(t.x, t.y), 0);
}

#define GL_USES_FRAG_COORD
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord);
;
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord)
{
    float2 _uv2358 = (_fragCoord / _iResolution.xy);
    float3 _col2359 = (0.5 + (0.5 * cos(((_iTime + _uv2358.xyx) + float3(0.0, 2.0, 4.0)))));
    bool _b2360 = (_iTime < 0.0);
    {LOOP do
        {
            if ((!_b2360))
            {
                break;
            }
            {LOOP for(float _a2361 = {0.0}; (_a2361 < 2000.0); (_a2361++))
                {
                    (_col2359 += (gl_texture2DLod0(_iChannel0, (_uv2358 + _a2361)).xyz+ float3(0.0, 0.0, 0.0)));
                }
            }
        }
        while (false);
    }
    (_fragColor = vec4_ctor(_col2359, 1.0));
}
struct PS_OUTPUT
{
    float4 out_outColor : SV_TARGET0;
};

PS_OUTPUT generateOutput()
{
    PS_OUTPUT output;
    output.out_outColor = out_outColor;
    return output;
}

PS_OUTPUT main(PS_INPUT input){
    float rhw = 1.0 / input.gl_FragCoord.w;
    gl_FragCoord.x = input.dx_Position.x;
    gl_FragCoord.y = input.dx_Position.y;
    gl_FragCoord.z = (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + dx_DepthFront.y;
    gl_FragCoord.w = rhw;

    float4 _color2364 = {0.0, 0.0, 0.0, 1.0};
    f_mainImage_float4(_color2364, gl_FragCoord.xy);
    (_color2364.w = 1.0);
    (out_outColor = _color2364);
    return generateOutput();
}
aleino-nv commented 4 years ago

Dump of HLSL generated by ANGLE from in https://www.shadertoy.com/view/WlXcRs with TEST=2, at Chrome from revision 53956752fa92d8ec1f11fd336741064ef4830b44:

struct PS_INPUT
{
    float4 dx_Position : SV_Position;
    float4 gl_Position : TEXCOORD0;
    float4 gl_FragCoord : TEXCOORD1;
};

// GLSL
//
// #version 300 es
// #ifdef GL_ES
// precision highp float;
// precision highp int;
// precision mediump sampler3D;
// #endif
// #define HW_PERFORMANCE 1
// uniform vec3      iResolution;
// uniform float     iTime;
// uniform float     iChannelTime[4];
// uniform vec4      iMouse;
// uniform vec4      iDate;
// uniform float     iSampleRate;
// uniform vec3      iChannelResolution[4];
// uniform int       iFrame;
// uniform float     iTimeDelta;
// uniform float     iFrameRate;
// uniform sampler2D iChannel0;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh0;
// uniform sampler2D iChannel1;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh1;
// uniform sampler2D iChannel2;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh2;
// uniform sampler2D iChannel3;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh3;
// void mainImage( out vec4 c,  in vec2 f );
// /*
//
//
//
//
//
//
//
//
// */
// #define TEST 2
// #define EXECUTE false
// #define IF(a) do{if(!(a))break;
// #define ENDIF }while(false);
// void mainImage( out vec4 fragColor, in vec2 fragCoord )
// {
//
//     vec2 uv = fragCoord/iResolution.xy;
//
//
//     vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
//
//     bool b=iTime<0.0;
//     if(EXECUTE)b=!b;
//     #if TEST==0
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }
//     #elif TEST==1
//     do{
//         if(!b)break;
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }while(false);
//     #elif TEST==2
//     IF(b)
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     ENDIF
//     #else
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texelFetch(iChannel0,ivec2(fragCoord+a),0).xyz;
//         }
//     }
//     #endif
//
//
//
//     fragColor = vec4(col,1.0);
// }
// out vec4 outColor;
//
// void main( void ){vec4 color = vec4(0.0,0.0,0.0,1.0);mainImage( color, gl_FragCoord.xy );color.w = 1.0;outColor = color;}

#pragma warning( disable: 3556 3571 )
float4 vec4_ctor(float3 x0, float x1)
{
    return float4(x0, x1);
}
// Uniforms

uniform float3 _iResolution : register(c0);
uniform float _iTime : register(c1);
static const uint _iChannel0 = 0;
uniform Texture2D<float4> textures2D[1] : register(t0);
uniform SamplerState samplers2D[1] : register(s0);
#ifdef ANGLE_ENABLE_LOOP_FLATTEN
#define LOOP [loop]
#define FLATTEN [flatten]
#else
#define LOOP
#define FLATTEN
#endif

#define ATOMIC_COUNTER_ARRAY_STRIDE 4

// Varyings

static float4 out_outColor = {0, 0, 0, 0};
static float4 gl_FragCoord = float4(0, 0, 0, 0);

cbuffer DriverConstants : register(b1)
{
    float4 dx_ViewCoords : packoffset(c1);
    float3 dx_DepthFront : packoffset(c2);
    float2 dx_ViewScale : packoffset(c3);
    struct SamplerMetadata
    {
        int baseLevel;
        int internalFormatBits;
        int wrapModes;
        int padding;
        int4 intBorderColor;
    };
    SamplerMetadata samplerMetadata[1] : packoffset(c4);
};

float4 gl_texture2DLod0(uint samplerIndex, float2 t)
{
    return textures2D[samplerIndex].SampleLevel(samplers2D[samplerIndex], float2(t.x, t.y), 0);
}

#define GL_USES_FRAG_COORD
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord);
;
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord)
{
    float2 _uv2358 = (_fragCoord / _iResolution.xy);
    float3 _col2359 = (0.5 + (0.5 * cos(((_iTime + _uv2358.xyx) + float3(0.0, 2.0, 4.0)))));
    bool _b2360 = (_iTime < 0.0);
    {LOOP do
        {
            if ((!_b2360))
            {
                break;
            }
            {LOOP for(float _a2361 = {0.0}; (_a2361 < 2000.0); (_a2361++))
                {
                    (_col2359 += (gl_texture2DLod0(_iChannel0, (_uv2358 + _a2361)).xyz + float3(0.0, 0.0, 0.0)));
                }
            }
        }
        while (false);
    }
    (_fragColor = vec4_ctor(_col2359, 1.0));
}
struct PS_OUTPUT
{
    float4 out_outColor : SV_TARGET0;
};

PS_OUTPUT generateOutput()
{
    PS_OUTPUT output;
    output.out_outColor = out_outColor;
    return output;
}

PS_OUTPUT main(PS_INPUT input){
    float rhw = 1.0 / input.gl_FragCoord.w;
    gl_FragCoord.x = input.dx_Position.x;
    gl_FragCoord.y = input.dx_Position.y;
    gl_FragCoord.z = (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + dx_DepthFront.y;
    gl_FragCoord.w = rhw;

    float4 _color2364 = {0.0, 0.0, 0.0, 1.0};
    f_mainImage_float4(_color2364, gl_FragCoord.xy);
    (_color2364.w = 1.0);
    (out_outColor = _color2364);
    return generateOutput();
}
aleino-nv commented 4 years ago

Dump of HLSL generated by ANGLE from in https://www.shadertoy.com/view/WlXcRs with TEST=3, at Chrome from revision 53956752fa92d8ec1f11fd336741064ef4830b44:

struct PS_INPUT
{
    float4 dx_Position : SV_Position;
    float4 gl_Position : TEXCOORD0;
    float4 gl_FragCoord : TEXCOORD1;
};

// GLSL
//
// #version 300 es
// #ifdef GL_ES
// precision highp float;
// precision highp int;
// precision mediump sampler3D;
// #endif
// #define HW_PERFORMANCE 1
// uniform vec3      iResolution;
// uniform float     iTime;
// uniform float     iChannelTime[4];
// uniform vec4      iMouse;
// uniform vec4      iDate;
// uniform float     iSampleRate;
// uniform vec3      iChannelResolution[4];
// uniform int       iFrame;
// uniform float     iTimeDelta;
// uniform float     iFrameRate;
// uniform sampler2D iChannel0;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh0;
// uniform sampler2D iChannel1;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh1;
// uniform sampler2D iChannel2;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh2;
// uniform sampler2D iChannel3;
// uniform struct {
//   sampler2D sampler;
//   vec3  size;
//   float time;
//   int   loaded;
// }iCh3;
// void mainImage( out vec4 c,  in vec2 f );
// /*
//
//
//
//
//
//
//
//
// */
// #define TEST 3
// #define EXECUTE false
// #define IF(a) do{if(!(a))break;
// #define ENDIF }while(false);
// void mainImage( out vec4 fragColor, in vec2 fragCoord )
// {
//
//     vec2 uv = fragCoord/iResolution.xy;
//
//
//     vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
//
//     bool b=iTime<0.0;
//     if(EXECUTE)b=!b;
//     #if TEST==0
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }
//     #elif TEST==1
//     do{
//         if(!b)break;
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     }while(false);
//     #elif TEST==2
//     IF(b)
//         for(float a=0.;a<2000.;a++){
//             col+=texture(iChannel0,uv+a).xyz;
//         }
//     ENDIF
//     #else
//     if(b){
//         for(float a=0.;a<2000.;a++){
//             col+=texelFetch(iChannel0,ivec2(fragCoord+a),0).xyz;
//         }
//     }
//     #endif
//
//
//
//     fragColor = vec4(col,1.0);
// }
// out vec4 outColor;
//
// void main( void ){vec4 color = vec4(0.0,0.0,0.0,1.0);mainImage( color, gl_FragCoord.xy );color.w = 1.0;outColor = color;}

#pragma warning( disable: 3556 3571 )
float4 vec4_ctor(float3 x0, float x1)
{
    return float4(x0, x1);
}
int2 ivec2_ctor(float2 x0)
{
    return int2(x0);
}
// Uniforms

uniform float3 _iResolution : register(c0);
uniform float _iTime : register(c1);
static const uint _iChannel0 = 0;
uniform Texture2D<float4> textures2D[1] : register(t0);
uniform SamplerState samplers2D[1] : register(s0);
#ifdef ANGLE_ENABLE_LOOP_FLATTEN
#define LOOP [loop]
#define FLATTEN [flatten]
#else
#define LOOP
#define FLATTEN
#endif

#define ATOMIC_COUNTER_ARRAY_STRIDE 4

// Varyings

static float4 out_outColor = {0, 0, 0, 0};
static float4 gl_FragCoord= float4(0, 0, 0, 0);

cbuffer DriverConstants: register(b1)
{
    float4 dx_ViewCoords : packoffset(c1);
    float3 dx_DepthFront : packoffset(c2);
    float2 dx_ViewScale : packoffset(c3);
    struct SamplerMetadata
    {
        int baseLevel;
        int internalFormatBits;
        int wrapModes;
        int padding;
        int4 intBorderColor;
    };
    SamplerMetadata samplerMetadata[1] : packoffset(c4);
};

float4 gl_texture2DFetch(uint samplerIndex, int2 t, int mip)
{
    return textures2D[samplerIndex].Load(int3(t.x, t.y, mip));
}

#define GL_USES_FRAG_COORD
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord);
;
void f_mainImage_float4(inout float4 _fragColor, in float2 _fragCoord)
{
    float2 _uv2358 = (_fragCoord / _iResolution.xy);
    float3 _col2359 = (0.5 + (0.5 * cos(((_iTime + _uv2358.xyx) + float3(0.0, 2.0, 4.0)))));
    bool _b2360 = (_iTime < 0.0);
    if (_b2360)
    {
        { for(float _a2361 = {0.0}; (_a2361 < 2000.0); (_a2361++))
            {
                (_col2359 += gl_texture2DFetch(_iChannel0, ivec2_ctor((_fragCoord + _a2361)), 0).xyz);
            }
        }
    }
    (_fragColor = vec4_ctor(_col2359, 1.0));
}
struct PS_OUTPUT
{
    float4 out_outColor : SV_TARGET0;
};

PS_OUTPUT generateOutput()
{
    PS_OUTPUT output;
    output.out_outColor = out_outColor;
    return output;
}

PS_OUTPUT main(PS_INPUT input){
    float rhw = 1.0 / input.gl_FragCoord.w;
    gl_FragCoord.x = input.dx_Position.x;
    gl_FragCoord.y = input.dx_Position.y;
    gl_FragCoord.z= (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + dx_DepthFront.y;
    gl_FragCoord.w = rhw;

    float4 _color2364 = {0.0, 0.0, 0.0, 1.0};
    f_mainImage_float4(_color2364, gl_FragCoord.xy);
    (_color2364.w = 1.0);
    (out_outColor = _color2364);
    return generateOutput();
}
aleino-nv commented 4 years ago

Apologies for spamming with the long HLSL code dumps. They take some work for me to grab and I didn't know where else to put them.

The plan now is to run the above HLSL code in a stand-alone D3D11 application. If I can reproduce the performance issue in that way, I'll simplify the problematic shader (TEST=0) and likely file a driver bug.

kdashg commented 4 years ago

I'll mention that we've expected some drivers to do unnecessary texture fetches in certain situations, though the case here is pretty extreme. Be aware that drivers might well treat this not as a bug, but as an optimization/enhancement request.

If there is for(;;){...} in the branch, NVIDIA will stop running

What does "stop running" mean? Browser freeze? Crash? This is better filed against browsers not WebGL itself, I think.

aleino-nv commented 4 years ago

I'll mention that we've expected some drivers to do unnecessary texture fetches in certain situations, though the case here is pretty extreme. Be aware that drivers might well treat this not as a bug, but as an optimization/enhancement request.

If there is for(;;){...} in the branch, NVIDIA will stop running

What does "stop running" mean? Browser freeze? Crash? This is better filed against browsers now WebGL itself, I think.

Yep, I'll probably file it as "something that the optimizer is missing".

I believe the expected result with an infinite loop varies across architectures. The older the architecture, the more severe the outcome. (ffcr has a GeForce GTX 950M, which is from the Maxwell architecture.) I haven't checked this yet, so the following is partially speculation: Probably the GPU first becomes unresponsive for several seconds until a watchdog resets it. I would guess it's not actually the GPU as a whole, rather just the context corresponding to the D3D11 device. Then, in D3D11 I would guess that IDXGISwapChain::Present returns DXGI_ERROR_DEVICE_RESET, and so the device needs to be recreated. Again I would guess this only happens for the D3D11 device that ordered the infinite amount of work, but not sure.

aleino-nv commented 4 years ago

Notice that the dumped TEST = 0 shader is the only one that uses the preprocessor defines LOOP and FLATTEN. Below is some simplified HLSL that corresponds to TEST = 0.

The '[loop]' statement will force the loop to execute all iterations. https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-for The '[flatten]' statement will force both if(condition){...} and if(!condition){...} to execute, and then choose between them. https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-if

Therefore, when ENABLE_LOOP_FLATTEN is 1, the below code is going to take 2000 texture samples and throw them all away even if m is false. So when ENABLE_LOOP_FLATTEN is 1, this shader is expected to generate a lot of work, which it does.

Texture2D tx : register(t0);
sampler sm : register(s0);
uniform bool m : register(c0);

#if ENABLE_LOOP_FLATTEN
#define FLATTEN [flatten]
#define LOOP [loop]
#else
#define FLATTEN
#define LOOP
#endif

struct Input
{
    float4 positionNDC : SV_Position;
    float2 positionTexture : TEXCOORD;
};

float4 main(Input input) : SV_TARGET
{
    float3 color = float3(1.0, 0.0, 1.0);
    bool b = m;
    FLATTEN if(b)
    {
        int sampleCount = 2000;
        LOOP for(float sampleIndex = 0.0; sampleIndex < sampleCount; sampleIndex++)
        {
            float2 p = input.positionTexture + float2(sampleIndex/float(sampleCount-1), 0.0);
            float3 sample = tx.Sample(sm, p).xyz;
            color += sample;
        }
        color /= float(sampleCount);
    }
    return float4(color, 1.0);
}
aleino-nv commented 4 years ago

My current guess about what is happening is that either D3D11 or ANGLE_ENABLE_LOOP_FLATTEN is not enabled on the platforms where you did not see the GPU doing a lot of work. As far as I can tell from looking quickly at the ANGLE code, it attempts to compile with ANGLE_ENABLE_LOOP_FLATTEN first, and then it disables it if there are errors.

@jdashg Firefox is also using ANGLE for WebGL, right? If so, this would also explain why this can be seen in both Chrome and Firefox.

Questions for @ffcr:

  1. What mobile phone did you try on? Probably it does not support D3D11.
  2. What integrated graphics card did you try on? I would guess it might not handle [loop] and [flatten], or possibly ANGLE compiles with different settings for it.
aleino-nv commented 4 years ago

Just to summarize: as far as I can tell this is expected behavior at the driver level. If there is any issue, it seems to be in ANGLE's use of [loop] and [flatten].

aleino-nv commented 4 years ago

I filed http://anglebug.com/4957 for this.

aleino-nv commented 4 years ago

Yeah, this is definitely an ANGLE issue: http://anglebug.com/4957#c1. When you disable the ANGLE_ENABLE_LOOP_FLATTEN, the GPU load goes away.