icculus / mojoshader

Use Direct3D shaders with other 3D rendering APIs.
https://icculus.org/mojoshader/
zlib License
139 stars 36 forks source link

My preshaders cause a debug assertion #11

Closed kg closed 5 years ago

kg commented 5 years ago

This can be worked around by passing /Op to fxc at compile time. I haven't narrowed down what particular thing in a preshader causes it to break.

kg commented 5 years ago

image

flibitijibibo commented 5 years ago

What is the operand type at this assertion? It’d be helpful to know what the preshader looks like.

kg commented 5 years ago

I'll try and dump the preshader assembly using fxc so we can figure it out. It wasn't obvious which shader from the stack trace

kg commented 5 years ago

HLSL:

#include "..\..\..\Fracture\Squared\RenderLib\Shaders\TargetInfo.fxh"
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\ViewTransformCommon.fxh"
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\GeometryCommon.fxh"

Texture2D ClearTexture : register(t0);
sampler   ClearSampler : register(s0) {
    Texture = (ClearTexture);
    MipFilter = POINT;
    MinFilter = POINT;
    MagFilter = POINT;
    AddressU = CLAMP;
    AddressV = CLAMP;
};

uniform float2 ClearInverseScale;
uniform float4 ClearMultiplier;

void DistanceVertexShader (
    in    float3 position : POSITION0, // x, y, z
    out   float4 result   : POSITION0
) {
    result = TransformPosition(float4(position.xy - GetViewportPosition(), 0, 1), 0);
    result.z = 0;
}

void ClearPixelShader (
    out float4 color : COLOR0,
    ACCEPTS_VPOS
) {
    [branch]
    if (ClearMultiplier.a > 0) {
        float2 vp = (GET_VPOS + 0.5) * ClearInverseScale;
        float4 tex = tex2Dlod(ClearSampler, float4(vp.x, vp.y, 0, 0));
        color = tex * ClearMultiplier;
    } else {
        color = float4(0, 0, 0, 0);
    }
}

technique ClearDistanceField
{
    pass P0
    {
        vertexShader = compile vs_3_0 DistanceVertexShader();
        pixelShader  = compile ps_3_0 ClearPixelShader();
    }
}

/O3 assembly according to GPU ShaderAnalyzer (ps_3_0):

//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// Parameters:
//
//   float2 ClearInverseScale;
//   float4 ClearMultiplier;
//   sampler2D ClearSampler;
//   bool __IsRenderTargetUpsideDown__;
//   float2 __RenderTargetDimensions__;
//
//
// Registers:
//
//   Name                         Reg   Size
//   ---------------------------- ----- ----
//   __IsRenderTargetUpsideDown__ c0       1
//   __RenderTargetDimensions__   c1       1
//   ClearInverseScale            c2       1
//   ClearMultiplier              c3       1
//   ClearSampler                 s0       1
//

    ps_3_0
    def c4, 0, 0.5, 0, 0
    dcl vPos.xy
    dcl_2d s0
    mov r0.x, c4.x
    if_lt -c3.w, r0.x
      add r0.x, c1.y, -vPos.y
      cmp r0.y, -c0.x, vPos.y, r0.x
      frc r1.x, vPos.x
      frc r1.y, r0.y
      mov r0.x, vPos.x
      add r0.xy, r0, -r1
      add r0.xy, r0, c4.y
      mul r0.xy, r0, c2
      mov r0.zw, c4.x
      texldl r0, r0, s0
      mul oC0, r0, c3
    else
      mov oC0, c4.x
    endif

// approximately 19 instruction slots used (2 texture, 17 arithmetic)

Doesn't assert at /Od.

kg commented 5 years ago

Output from fxc:

E:\Documents\Projects\Illuminant\Illuminant\Shaders>E:\Documents\Projects\Fracture\ext\fxc\fxc.exe /O3 /T ps_3_0 ClearDistanceField.fx /E ClearPixelShader
Microsoft (R) Direct3D Shader Compiler 10.1 (using E:\Documents\Projects\Fracture\ext\fxc\D3DCOMPILER_47.dll)
Copyright (C) 2013 Microsoft. All rights reserved.

//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
// Parameters:
//
//   float2 ClearInverseScale;
//   float4 ClearMultiplier;
//   sampler2D ClearSampler;
//   bool __IsRenderTargetUpsideDown__;
//   float2 __RenderTargetDimensions__;
//
//
// Registers:
//
//   Name                         Reg   Size
//   ---------------------------- ----- ----
//   __IsRenderTargetUpsideDown__ c0       1
//   __RenderTargetDimensions__   c1       1
//   ClearInverseScale            c2       1
//   ClearMultiplier              c3       1
//   ClearSampler                 s0       1
//

    ps_3_0
    def c4, 0, 0.5, 0, 0
    dcl vPos.xy
    dcl_2d s0
    mov r0.x, c4.x
    if_lt -c3.w, r0.x
      add r0.x, c1.y, -vPos.y
      cmp r0.y, -c0.x, vPos.y, r0.x
      frc r1.x, vPos.x
      frc r1.y, r0.y
      mov r0.x, vPos.x
      add r0.xy, r0, -r1
      add r0.xy, r0, c4.y
      mul r0.xy, r0, c2
      mov r0.zw, c4.x
      texldl r0, r0, s0
      mul oC0, r0, c3
    else
      mov oC0, c4.x
    endif

// approximately 19 instruction slots used (2 texture, 17 arithmetic)
kg commented 5 years ago

FNA's debug information being disabled made the debugger vaguely misleading, so it may be this shader instead. I don't see a preshader either, though:

#include "..\..\..\Fracture\Squared\RenderLib\Shaders\TargetInfo.fxh"
#include "LineLightCore.fxh"

void LineLightVertexShader(
    in int2 vertexIndex              : BLENDINDICES0,
    inout float3 startPosition       : TEXCOORD0,
    inout float3 endPosition         : TEXCOORD1,
    // radius, ramp length, ramp mode, enable shadows
    inout float4 lightProperties     : TEXCOORD2,
    // ao radius, distance falloff, y falloff factor, ao opacity
    inout float4 moreLightProperties : TEXCOORD3,
    inout float4 startColor          : TEXCOORD4,
    inout float4 endColor            : TEXCOORD5,
    out float3 worldPosition         : POSITION1,
    out float4 result                : POSITION0
) {
    DEFINE_LightCorners

    float3 vertex = LightCorners[vertexIndex.x];

    float  radius = lightProperties.x + lightProperties.y + 1;
    float  deltaY = (radius) - (radius / moreLightProperties.z);
    float3 radius3;

    if (1)
        // HACK: How the hell do we compute bounds for this in the first place?
        radius3 = float3(9999, 9999, 0);
    else if (0)
        // HACK: Scale the y axis some to clip off dead pixels caused by the y falloff factor
        radius3 = float3(radius, radius - (deltaY / 2.0), 0);
    else
        radius3 = float3(radius, radius, 0);

    float3 p1 = min(startPosition, endPosition), p2 = max(startPosition, endPosition);
    float3 tl = p1 - radius3, br = p2 + radius3;

    // Unfortunately we need to adjust both by the light's radius (to account for pixels above/below the center point
    //  being lit in 2.5d projection), along with adjusting by the z of the light's centerpoint (to deal with pixels
    //  at high elevation)
    float radiusOffset = radius * getInvZToYMultiplier();
    // FIXME
    float effectiveZ = startPosition.z;
    float zOffset = effectiveZ * getZToYMultiplier();

    worldPosition = lerp(tl, br, vertex);

    if (vertex.y < 0.5) {
        worldPosition.y -= radiusOffset;
        worldPosition.y -= zOffset;
    }

    float3 screenPosition = (worldPosition - float3(GetViewportPosition(), 0));
    screenPosition.xy *= GetViewportScale() * getEnvironmentRenderScale();
    float4 transformedPosition = mul(mul(float4(screenPosition.xyz, 1), Viewport.ModelView), Viewport.Projection);
    result = float4(transformedPosition.xy, 0, transformedPosition.w);
}

void LineLightPixelShader(
    in  float3 worldPosition       : POSITION1,
    in  float3 startPosition       : TEXCOORD0,
    in  float3 endPosition         : TEXCOORD1,
    in  float4 lightProperties     : TEXCOORD2,
    in  float4 moreLightProperties : TEXCOORD3,
    in  float4 startColor          : TEXCOORD4,
    in  float4 endColor            : TEXCOORD5,
    ACCEPTS_VPOS,
    out float4 result              : COLOR0
) {
    float3 shadedPixelPosition;
    float3 shadedPixelNormal;
    sampleGBuffer(
        GET_VPOS,
        shadedPixelPosition, shadedPixelNormal
    );

    float u;
    float opacity = LineLightPixelCore(
        shadedPixelPosition, shadedPixelNormal,
        startPosition, endPosition, u,
        lightProperties, moreLightProperties, false, false
    );

    float4 color = lerp(startColor, endColor, u);
    result = float4(color.rgb * color.a * opacity, 1);
}
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
// Parameters:
//
//   struct
//   {
//       float4 _ConeAndMisc;
//       float4 _TextureSliceAndTexelSize;
//       float4 _StepAndMisc2;
//       float4 TextureSliceCount;
//       float4 Extent;
//
//   } DistanceField;
//
//   sampler2D DistanceFieldTextureSampler;
//
//   struct
//   {
//       float4 ZAndScale;
//
//   } Environment;
//
//   sampler2D GBufferSampler;
//   float2 GBufferTexelSize;
//   bool GBufferViewportRelative;
//
//   struct
//   {
//       float4x4 Projection;
//       float4x4 ModelView;
//       float4 ScaleAndPosition;
//
//   } Viewport;
//
//   bool __IsRenderTargetUpsideDown__;
//   float2 __RenderTargetDimensions__;
//
//
// Registers:
//
//   Name                         Reg   Size
//   ---------------------------- ----- ----
//   Viewport                     c0       9
//   DistanceField                c9       5
//   __IsRenderTargetUpsideDown__ c14      1
//   __RenderTargetDimensions__   c15      1
//   Environment                  c16      1
//   GBufferViewportRelative      c17      1
//   GBufferTexelSize             c18      1
//   DistanceFieldTextureSampler  s0       1
//   GBufferSampler               s2       1
//

    ps_3_0
    def c19, 0, 0.5, 128, 512
    def c20, -0.212114394, 1.57072878, -2, 3.14159274
    def c21, 1, 0, -0.0187292993, 0.0742610022
    def c22, -6.28318548, 0.200000003, -9999, 0.00100000005
    def c23, 0, -0.666666687, -0.333333343, 0.752941191
    def c24, 999.999939, -0.00294117653, 1.5, 0.0299999993
    def c25, 1, -16, 0.330000013, 0.0625
    def c26, -0.075000003, 100, 1.14285719, 0
    def c27, -1, 1, 0.333333343, 333.333313
    defi i0, 255, 0, 0, 0
    dcl_texcoord v0.xyz
    dcl_texcoord1 v1.xyz
    dcl_texcoord2 v2.xyw
    dcl_texcoord4 v3
    dcl_texcoord5 v4
    dcl vPos.xy
    dcl_2d s0
    dcl_2d s2
    add r0.x, c15.y, -vPos.y
    cmp r0.y, -c14.x, vPos.y, r0.x
    frc r1.x, vPos.x
    frc r1.y, r0.y
    mov r0.x, vPos.x
    add r0.xy, r0, -r1
    mov r1.x, c19.x
    dp2add r0.z, c18, c18, r1.x
    if_lt -r0.z, c19.x
      rcp r2.x, c8.x
      rcp r2.y, c8.y
      mad r0.zw, r0.xyxy, r2.xyxy, c8
      cmp r0.zw, -c17.x, r0.xyxy, r0
      add r0.zw, r0, c19.y
      mul r3.xy, r0.zwzw, c18
      mov r3.zw, c19.x
      texldl r3, r3, s2
      mul r4.xz, r3.zyww, c19.zyww
      rcp r0.z, c16.z
      mov r4.y, c19.x
      mad r0.zw, r0.xyxy, r0.z, r4.xyyx
      mad r4.xy, r0.zwzw, r2, c8.zwzw
      add r0.zw, r3.xyxy, -c19.y
      add r2.xz, r0.zyww, r0.zyww
      add r2.y, -r2_abs.z, c21.x
      nrm r3.xyz, r2
    else
      rcp r0.z, c16.z
      mul r0.xy, r0.z, r0
      rcp r2.x, c8.x
      rcp r2.y, c8.y
      mad r4.xy, r0, r2, c8.zwzw
      mov r4.z, c16.x
      mov r3.xyz, c21.yyxw
    endif
    mad r0.xyz, r3, c24.z, r4
    mov r2.xyz, v0
    add r1.yzw, -r2.xxyz, v1.xxyz
    dp3 r0.w, r1.yzww, r1.yzww
    rcp r2.w, r0.w
    rsq r0.w, r0.w
    add r5.xyz, r4, -v0
    dp3 r3.w, r5, r1.yzww
    mul_sat r2.w, r2.w, r3.w
    mad r5.xyz, r2.w, r1.yzww, v0
    add r6.xyz, -r4, r5
    dp3 r3.w, r6, r6
    rsq r4.w, r3.w
    mul r6.xyz, r4.w, r6
    dp3_sat r4.w, r6, r3
    mul r4.w, r4.w, c20.w
    rcp r3.w, r3.w
    mul r5.w, v2.x, v2.x
    mul r3.w, r3.w, r5.w
    mul r3.w, r3.w, r4.w
    mad r7.xyz, r1.yzww, c19.y, v0
    add r7.xyz, -r4, r7
    nrm r8.xyz, r7
    dp3_sat r4.w, r8, r3
    mul r7.xyz, r0.w, r1.wyzw
    mul r8.xyz, r6.yzxw, r7
    mad r6.xyz, r7.zxyw, r6.zxyw, -r8
    mov r7.x, v2.x
    mad r7.yzw, r7.x, r6.xxyz, v1.xxyz
    add r7.yzw, -r4.xxyz, r7
    nrm r8.xyz, r7.yzww
    dp3_sat r5.w, r8, r3
    mad r8.xyz, v2.x, r6, r2
    add r8.xyz, -r4, r8
    nrm r9.xyz, r8
    dp3_sat r6.w, r9, r3
    mad r2.xyz, v2.x, -r6, r2
    add r2.xyz, -r4, r2
    nrm r9.xyz, r2
    dp3_sat r8.w, r9, r3
    add r6.w, r6.w, r8.w
    mad r6.xyz, r7.x, -r6, v1
    add r6.xyz, -r4, r6
    nrm r9.xyz, r6
    dp3_sat r3.x, r9, r3
    add r3.x, r3.x, r6.w
    add r3.x, r5.w, r3.x
    add r3.x, r4.w, r3.x
    mul r4.yzw, r2.xyzx, r8.xzxy
    mad r4.yzw, r8.xyzx, r2.xzxy, -r4
    nrm r9.xyz, r4.yzww
    mul r4.yzw, r7.xwyz, r8.xyzx
    mad r4.yzw, r7.xzwy, r8.xzxy, -r4
    nrm r8.xyz, r4.yzww
    dp3 r3.y, -r8, r9
    add r3.z, -r3_abs.y, c21.x
    rsq r3.z, r3.z
    rcp r3.z, r3.z
    mad r4.y, r3_abs.y, c21.z, c21.w
    mad r4.y, r4.y, r3_abs.y, c20.x
    mad r4.y, r4.y, r3_abs.y, c20.y
    mul r3.z, r3.z, r4.y
    cmp r3.y, r3.y, c21.y, c21.x
    mad r4.y, r3.z, c20.z, c20.w
    mad r3.y, r4.y, r3.y, r3.z
    mul r4.yzw, r7.xzwy, r6.xzxy
    mad r4.yzw, r6.xyzx, r7.xwyz, -r4
    nrm r7.xyz, r4.yzww
    dp3 r3.z, -r7, r8
    add r4.y, -r3_abs.z, c21.x
    rsq r4.y, r4.y
    rcp r4.y, r4.y
    mad r4.z, r3_abs.z, c21.z, c21.w
    mad r4.z, r4.z, r3_abs.z, c20.x
    mad r4.z, r4.z, r3_abs.z, c20.y
    mul r4.y, r4.y, r4.z
    cmp r3.z, r3.z, c21.y, c21.x
    mad r4.z, r4.y, c20.z, c20.w
    mad r3.z, r4.z, r3.z, r4.y
    mul r4.yzw, r2.xzxy, r6.xyzx
    mad r2.xyz, r2.yzxw, r6.zxyw, -r4.yzww
    nrm r6.xyz, r2
    dp3 r2.x, -r6, r7
    add r2.y, -r2_abs.x, c21.x
    rsq r2.y, r2.y
    rcp r2.y, r2.y
    mad r2.z, r2_abs.x, c21.z, c21.w
    mad r2.z, r2.z, r2_abs.x, c20.x
    mad r2.z, r2.z, r2_abs.x, c20.y
    mul r2.y, r2.y, r2.z
    cmp r2.x, r2.x, c21.y, c21.x
    mad r2.z, r2.y, c20.z, c20.w
    mad r2.x, r2.z, r2.x, r2.y
    dp3 r2.y, -r9, r6
    add r2.z, -r2_abs.y, c21.x
    rsq r2.z, r2.z
    rcp r2.z, r2.z
    mad r4.y, r2_abs.y, c21.z, c21.w
    mad r4.y, r4.y, r2_abs.y, c20.x
    mad r4.y, r4.y, r2_abs.y, c20.y
    mul r2.z, r2.z, r4.y
    cmp r2.y, r2.y, c21.y, c21.x
    mad r4.y, r2.z, c20.z, c20.w
    mad r2.y, r4.y, r2.y, r2.z
    add r2.x, r2.x, r2.y
    add r2.x, r3.z, r2.x
    add r2.x, r3.y, r2.x
    add r2.x, r2.x, c22.x
    mul r2.x, r2.x, c22.y
    mad_sat r2.x, r2.x, r3.x, r3.w
    rcp r2.y, c12.x
    mul r2.y, r2.y, c27.z
    mov r3.x, c12.x
    add r2.z, -r3.x, c22.w
    cmp r2.y, r2.z, c27.w, r2.y
    mov r2.z, c13.z
    add r2.z, -r2.z, c22.w
    rcp r3.x, c13.z
    cmp r2.z, r2.z, c24.x, r3.x
    mul r2.z, r2.z, c12.w
    add r3.xyz, -r0, r5
    dp3 r3.w, r3, r3
    rsq r3.w, r3.w
    mul r3.xyz, r3.w, r3
    add r4.x, -r4.x, c22.z
    cmp r4.x, r4.x, c21.y, c21.x
    cmp r4.x, -r2.x, c19.x, r4.x
    add r4.y, r2.x, c24.y
    cmp r4.y, r4.y, r4.x, c19.x
    cmp r4.y, -v2_abs.w, c19.x, r4.y
    add r4.zw, c25.xyxy, v2.xyxy
    mul_sat r0.w, r0.w, r4.z
    max r4.z, r0.w, c24.w
    add_sat r0.w, r2.w, r4.z
    mad r5.xyz, r0.w, r1.yzww, v0
    add r5.xyz, -r0, r5
    dp3 r0.w, r5, r5
    rsq r0.w, r0.w
    mul r5.xyz, r0.w, r5
    rcp r5.w, v2.y
    cmp r4.w, r4.w, r5.w, c25.w
    max r5.w, v2.x, c25.z
    min r6.x, c9.x, r5.w
    mul r4.w, r4.w, r6.x
    mul r4.w, r4.w, c9.y
    rcp r3.w, r3.w
    add r3.w, r3.w, -v2.x
    max r5.w, r3.w, c21.x
    rcp r0.w, r0.w
    add r0.w, r0.w, -v2.x
    max r3.w, r0.w, c21.x
    add r0.w, r2.w, -r4.z
    mul r1.yzw, r1, r0.w
    cmp r1.yzw, r0.w, r1, c19.x
    add r1.yzw, r1, v0.xxyz
    add r1.yzw, -r0.xxyz, r1
    dp3 r0.w, r1.yzww, r1.yzww
    rsq r0.w, r0.w
    mul r1.yzw, r0.w, r1
    rcp r0.w, r0.w
    add r0.w, r0.w, -v2.x
    max r4.z, r0.w, c21.x
    mov r7.x, c21.x
    max r0.w, r7.x, c11.y
    cmp r1.x, -c13.x, r1.x, r4.y
    mov r7.zw, c19.x
    mov r8.zw, c19.x
    mov r9.zw, c19.x
    mov r10.xyz, r0
    mov r10.w, r1.y
    mov r11.xy, r1.zwzw
    mov r11.z, c19.y
    mov r11.w, r4.z
    mov r12.xyz, r0
    mov r12.w, r3.x
    mov r13.xy, r3.yzzw
    mov r13.z, c19.y
    mov r13.w, r5.w
    mov r14.xyz, r0
    mov r14.w, r5.x
    mov r15.xy, r5.yzzw
    mov r15.z, c19.y
    mov r15.w, r3.w
    mov r6.yzw, c21.x
    mov r16.x, c11.x
    mov r16.y, r1.x
    rep i0
      cmp r16.z, -r16.y, c21.x, c21.y
      break_ne r16.z, -r16.z
      mov r17.x, r14.w
      mov r17.yz, r15.xxyw
      mad r17.xyz, r17, r15.z, r14
      max r18.xyz, r17, c19.x
      min r19.xyz, c13, r18
      add r17.xyz, -r17, r19
      dp3 r16.z, r17, r17
      min r16.w, c12.z, r19.z
      mul r17.x, r2.z, r16.w
      frc r17.y, r17.x
      add r17.x, -r17.y, r17.x
      mad r16.w, r16.w, r2.z, -r17.x
      mul r17.yz, r19.xxyw, c10.xzww
      mul r17.w, r2.y, r17.x
      frc r18.x, r17.w
      add r18.y, r17.w, -r18.x
      mad r17.w, r17.x, r2.y, -r18.y
      mul r17.w, r17.w, c12.x
      frc r18.z, r17.w
      add r18.x, r17.w, -r18.z
      mad r9.xy, r18, c10, r17.yzzw
      texldl r18, r9, s0
      mul r9.x, r17.x, c27.z
      frc r9.x, r9_abs.x
      cmp r9.x, r17.x, r9.x, -r9.x
      add r9.xy, r9.x, c23.yzzw
      cmp r17.xy, r9.y, r18.yzzw, r18
      cmp r9.xy, r9.x, r18.zwzw, r17
      lrp r17.x, r16.w, r9.y, r9.x
      rsq r9.x, r16.z
      rcp r9.x, r9.x
      add r9.y, -r17.x, c23.w
      mad r9.x, r9.y, c13.w, r9.x
      mov r17.x, r10.w
      mov r17.yz, r11.xxyw
      mad r17.xyz, r17, r11.z, r10
      max r18.xyz, r17, c19.x
      min r19.xyz, c13, r18
      add r17.xyz, -r17, r19
      dp3 r9.y, r17, r17
      min r16.z, c12.z, r19.z
      mul r17.xy, r19, c10.zwzw
      mul r16.w, r2.z, r16.z
      frc r17.z, r16.w
      add r16.w, r16.w, -r17.z
      mul r17.z, r2.y, r16.w
      frc r17.w, r17.z
      add r18.y, -r17.w, r17.z
      mad r17.z, r16.w, r2.y, -r18.y
      mul r17.z, r17.z, c12.x
      frc r17.w, r17.z
      add r18.x, -r17.w, r17.z
      mad r7.xy, r18, c10, r17
      mad r16.z, r16.z, r2.z, -r16.w
      texldl r17, r7, s0
      mul r7.x, r16.w, c27.z
      frc r7.x, r7_abs.x
      cmp r7.x, r16.w, r7.x, -r7.x
      add r7.xy, r7.x, c23.yzzw
      cmp r17.xy, r7.y, r17.yzzw, r17
      cmp r7.xy, r7.x, r17.zwzw, r17
      lrp r17.x, r16.z, r7.y, r7.x
      rsq r7.x, r9.y
      rcp r7.x, r7.x
      add r7.y, -r17.x, c23.w
      mad r7.x, r7.y, c13.w, r7.x
      mul r7.y, r7_abs.x, c11.z
      max r9.y, r7.y, r0.w
      add r7.y, r9.y, r11.z
      mov r17.x, r12.w
      mov r17.yz, r13.xxyw
      mad r17.xyz, r17, r13.z, r12
      max r18.xyz, r17, c19.x
      min r19.xyz, c13, r18
      add r17.xyz, -r17, r19
      dp3 r9.y, r17, r17
      min r16.z, c12.z, r19.z
      mul r17.xy, r19, c10.zwzw
      mul r16.w, r2.z, r16.z
      frc r17.z, r16.w
      add r16.w, r16.w, -r17.z
      mul r17.z, r2.y, r16.w
      frc r17.w, r17.z
      add r18.y, -r17.w, r17.z
      mad r17.z, r16.w, r2.y, -r18.y
      mul r17.z, r17.z, c12.x
      frc r17.w, r17.z
      add r18.x, -r17.w, r17.z
      mad r8.xy, r18, c10, r17
      mad r16.z, r16.z, r2.z, -r16.w
      texldl r17, r8, s0
      mul r8.x, r16.w, c27.z
      frc r8.x, r8_abs.x
      cmp r8.x, r16.w, r8.x, -r8.x
      add r8.xy, r8.x, c23.yzzw
      cmp r17.xy, r8.y, r17.yzzw, r17
      cmp r8.xy, r8.x, r17.zwzw, r17
      lrp r17.x, r16.z, r8.y, r8.x
      mul r8.x, r9_abs.x, c11.z
      max r16.z, r8.x, r0.w
      add r8.x, r15.z, r16.z
      mad r8.y, r4.w, r15.z, c25.z
      min r16.z, r6.x, r8.y
      rsq r8.y, r9.y
      rcp r8.y, r8.y
      add r9.y, -r17.x, c23.w
      mad r8.y, r9.y, c13.w, r8.y
      mul r9.y, r8_abs.y, c11.z
      max r16.w, r9.y, r0.w
      add r9.y, r13.z, r16.w
      rcp r16.z, r16.z
      add r9.x, r9.x, c24.z
      mul r9.x, r16.z, r9.x
      min r16.z, r9.x, r6.w
      mad r9.x, r4.w, r11.z, c25.z
      min r16.w, r6.x, r9.x
      rcp r9.x, r16.w
      add r7.x, r7.x, c24.z
      mul r7.x, r9.x, r7.x
      min r9.x, r7.x, r6.y
      min r17.z, r11.w, r7.y
      mad r7.x, r4.w, r13.z, c25.z
      min r16.w, r6.x, r7.x
      rcp r7.x, r16.w
      add r7.y, r8.y, c24.z
      mul r7.x, r7.x, r7.y
      min r8.y, r7.x, r6.z
      min r18.z, r13.w, r9.y
      add r7.x, r13.w, -r18.z
      mul_sat r7.x, r7.x, c26.y
      add_sat r7.y, r8.y, c26.x
      mul r7.x, r7.x, r7.y
      add r7.y, r11.w, -r17.z
      mul_sat r7.y, r7.y, c26.y
      add_sat r9.y, r9.x, c26.x
      mad r7.x, r9.y, r7.y, r7.x
      min r19.z, r15.w, r8.x
      add r7.y, r15.w, -r19.z
      mul_sat r7.y, r7.y, c26.y
      add_sat r8.x, r16.z, c26.x
      mad r7.x, r8.x, r7.y, r7.x
      add r16.x, r16.x, -c21.x
      mul r16.y, r7.x, r16.x
      mov r17.xyw, r11
      mov r11, r17
      mov r18.xyw, r13
      mov r13, r18
      mov r19.xyw, r15
      mov r15, r19
      mov r6.y, r9.x
      mov r6.z, r8.y
      mov r6.w, r16.z
    endrep
    add r0.x, r6.z, r6.y
    add r0.x, r6.w, r0.x
    mul r0.y, r16.x, c19.y
    mul r0.x, r0.x, c27.z
    min r1.x, r0.y, r0.x
    add_sat r0.x, r1.x, c26.x
    mul_sat r0.x, r0.x, c26.z
    pow r1.x, r0.x, c9.z
    cmp r0.x, -r4.y, c21.x, r1.x
    mul r0.x, r0.x, r2.x
    cmp r0.x, -r4.x, c19.x, r0.x
    cmp r1, -r4.x, c27.x, c27.y
    texkill r1
    mov r1, r1.w
    texkill r1
    mov r1, v3
    add r1, -r1, v4
    mad r1, r2.w, r1, v3
    mul r0.yzw, r1.w, r1.xxyz
    mul oC0.xyz, r0.x, r0.yzww
    mov oC0.w, c21.x

// approximately 432 instruction slots used (8 texture, 424 arithmetic)
kg commented 5 years ago

Research results: fxc is producing fancy outputs where the pixel shader lists itself as its own preshader, like so: image The result is that at runtime when I apply or set parameters, mojoshader has to execute the entire PS as a preshader. This appears to work and the shader functions, but it hits assertions because I guess this was never expected. This is likely happening because the shader is complex so fxc can't hoist out simple opcodes to do initialization, so it just reuses the whole shader.

kg commented 5 years ago

Shader disassembly from AMD's tool and fxc are both wrong, there are definitely preshaders. I have no idea what they do, but I have a fix.