Closed kg closed 5 years ago
What is the operand type at this assertion? It’d be helpful to know what the preshader looks like.
I'll try and dump the preshader assembly using fxc so we can figure it out. It wasn't obvious which shader from the stack trace
HLSL:
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\TargetInfo.fxh"
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\ViewTransformCommon.fxh"
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\GeometryCommon.fxh"
Texture2D ClearTexture : register(t0);
sampler ClearSampler : register(s0) {
Texture = (ClearTexture);
MipFilter = POINT;
MinFilter = POINT;
MagFilter = POINT;
AddressU = CLAMP;
AddressV = CLAMP;
};
uniform float2 ClearInverseScale;
uniform float4 ClearMultiplier;
void DistanceVertexShader (
in float3 position : POSITION0, // x, y, z
out float4 result : POSITION0
) {
result = TransformPosition(float4(position.xy - GetViewportPosition(), 0, 1), 0);
result.z = 0;
}
void ClearPixelShader (
out float4 color : COLOR0,
ACCEPTS_VPOS
) {
[branch]
if (ClearMultiplier.a > 0) {
float2 vp = (GET_VPOS + 0.5) * ClearInverseScale;
float4 tex = tex2Dlod(ClearSampler, float4(vp.x, vp.y, 0, 0));
color = tex * ClearMultiplier;
} else {
color = float4(0, 0, 0, 0);
}
}
technique ClearDistanceField
{
pass P0
{
vertexShader = compile vs_3_0 DistanceVertexShader();
pixelShader = compile ps_3_0 ClearPixelShader();
}
}
/O3 assembly according to GPU ShaderAnalyzer (ps_3_0):
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// Parameters:
//
// float2 ClearInverseScale;
// float4 ClearMultiplier;
// sampler2D ClearSampler;
// bool __IsRenderTargetUpsideDown__;
// float2 __RenderTargetDimensions__;
//
//
// Registers:
//
// Name Reg Size
// ---------------------------- ----- ----
// __IsRenderTargetUpsideDown__ c0 1
// __RenderTargetDimensions__ c1 1
// ClearInverseScale c2 1
// ClearMultiplier c3 1
// ClearSampler s0 1
//
ps_3_0
def c4, 0, 0.5, 0, 0
dcl vPos.xy
dcl_2d s0
mov r0.x, c4.x
if_lt -c3.w, r0.x
add r0.x, c1.y, -vPos.y
cmp r0.y, -c0.x, vPos.y, r0.x
frc r1.x, vPos.x
frc r1.y, r0.y
mov r0.x, vPos.x
add r0.xy, r0, -r1
add r0.xy, r0, c4.y
mul r0.xy, r0, c2
mov r0.zw, c4.x
texldl r0, r0, s0
mul oC0, r0, c3
else
mov oC0, c4.x
endif
// approximately 19 instruction slots used (2 texture, 17 arithmetic)
Doesn't assert at /Od.
Output from fxc:
E:\Documents\Projects\Illuminant\Illuminant\Shaders>E:\Documents\Projects\Fracture\ext\fxc\fxc.exe /O3 /T ps_3_0 ClearDistanceField.fx /E ClearPixelShader
Microsoft (R) Direct3D Shader Compiler 10.1 (using E:\Documents\Projects\Fracture\ext\fxc\D3DCOMPILER_47.dll)
Copyright (C) 2013 Microsoft. All rights reserved.
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
// Parameters:
//
// float2 ClearInverseScale;
// float4 ClearMultiplier;
// sampler2D ClearSampler;
// bool __IsRenderTargetUpsideDown__;
// float2 __RenderTargetDimensions__;
//
//
// Registers:
//
// Name Reg Size
// ---------------------------- ----- ----
// __IsRenderTargetUpsideDown__ c0 1
// __RenderTargetDimensions__ c1 1
// ClearInverseScale c2 1
// ClearMultiplier c3 1
// ClearSampler s0 1
//
ps_3_0
def c4, 0, 0.5, 0, 0
dcl vPos.xy
dcl_2d s0
mov r0.x, c4.x
if_lt -c3.w, r0.x
add r0.x, c1.y, -vPos.y
cmp r0.y, -c0.x, vPos.y, r0.x
frc r1.x, vPos.x
frc r1.y, r0.y
mov r0.x, vPos.x
add r0.xy, r0, -r1
add r0.xy, r0, c4.y
mul r0.xy, r0, c2
mov r0.zw, c4.x
texldl r0, r0, s0
mul oC0, r0, c3
else
mov oC0, c4.x
endif
// approximately 19 instruction slots used (2 texture, 17 arithmetic)
FNA's debug information being disabled made the debugger vaguely misleading, so it may be this shader instead. I don't see a preshader either, though:
#include "..\..\..\Fracture\Squared\RenderLib\Shaders\TargetInfo.fxh"
#include "LineLightCore.fxh"
void LineLightVertexShader(
in int2 vertexIndex : BLENDINDICES0,
inout float3 startPosition : TEXCOORD0,
inout float3 endPosition : TEXCOORD1,
// radius, ramp length, ramp mode, enable shadows
inout float4 lightProperties : TEXCOORD2,
// ao radius, distance falloff, y falloff factor, ao opacity
inout float4 moreLightProperties : TEXCOORD3,
inout float4 startColor : TEXCOORD4,
inout float4 endColor : TEXCOORD5,
out float3 worldPosition : POSITION1,
out float4 result : POSITION0
) {
DEFINE_LightCorners
float3 vertex = LightCorners[vertexIndex.x];
float radius = lightProperties.x + lightProperties.y + 1;
float deltaY = (radius) - (radius / moreLightProperties.z);
float3 radius3;
if (1)
// HACK: How the hell do we compute bounds for this in the first place?
radius3 = float3(9999, 9999, 0);
else if (0)
// HACK: Scale the y axis some to clip off dead pixels caused by the y falloff factor
radius3 = float3(radius, radius - (deltaY / 2.0), 0);
else
radius3 = float3(radius, radius, 0);
float3 p1 = min(startPosition, endPosition), p2 = max(startPosition, endPosition);
float3 tl = p1 - radius3, br = p2 + radius3;
// Unfortunately we need to adjust both by the light's radius (to account for pixels above/below the center point
// being lit in 2.5d projection), along with adjusting by the z of the light's centerpoint (to deal with pixels
// at high elevation)
float radiusOffset = radius * getInvZToYMultiplier();
// FIXME
float effectiveZ = startPosition.z;
float zOffset = effectiveZ * getZToYMultiplier();
worldPosition = lerp(tl, br, vertex);
if (vertex.y < 0.5) {
worldPosition.y -= radiusOffset;
worldPosition.y -= zOffset;
}
float3 screenPosition = (worldPosition - float3(GetViewportPosition(), 0));
screenPosition.xy *= GetViewportScale() * getEnvironmentRenderScale();
float4 transformedPosition = mul(mul(float4(screenPosition.xyz, 1), Viewport.ModelView), Viewport.Projection);
result = float4(transformedPosition.xy, 0, transformedPosition.w);
}
void LineLightPixelShader(
in float3 worldPosition : POSITION1,
in float3 startPosition : TEXCOORD0,
in float3 endPosition : TEXCOORD1,
in float4 lightProperties : TEXCOORD2,
in float4 moreLightProperties : TEXCOORD3,
in float4 startColor : TEXCOORD4,
in float4 endColor : TEXCOORD5,
ACCEPTS_VPOS,
out float4 result : COLOR0
) {
float3 shadedPixelPosition;
float3 shadedPixelNormal;
sampleGBuffer(
GET_VPOS,
shadedPixelPosition, shadedPixelNormal
);
float u;
float opacity = LineLightPixelCore(
shadedPixelPosition, shadedPixelNormal,
startPosition, endPosition, u,
lightProperties, moreLightProperties, false, false
);
float4 color = lerp(startColor, endColor, u);
result = float4(color.rgb * color.a * opacity, 1);
}
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
// Parameters:
//
// struct
// {
// float4 _ConeAndMisc;
// float4 _TextureSliceAndTexelSize;
// float4 _StepAndMisc2;
// float4 TextureSliceCount;
// float4 Extent;
//
// } DistanceField;
//
// sampler2D DistanceFieldTextureSampler;
//
// struct
// {
// float4 ZAndScale;
//
// } Environment;
//
// sampler2D GBufferSampler;
// float2 GBufferTexelSize;
// bool GBufferViewportRelative;
//
// struct
// {
// float4x4 Projection;
// float4x4 ModelView;
// float4 ScaleAndPosition;
//
// } Viewport;
//
// bool __IsRenderTargetUpsideDown__;
// float2 __RenderTargetDimensions__;
//
//
// Registers:
//
// Name Reg Size
// ---------------------------- ----- ----
// Viewport c0 9
// DistanceField c9 5
// __IsRenderTargetUpsideDown__ c14 1
// __RenderTargetDimensions__ c15 1
// Environment c16 1
// GBufferViewportRelative c17 1
// GBufferTexelSize c18 1
// DistanceFieldTextureSampler s0 1
// GBufferSampler s2 1
//
ps_3_0
def c19, 0, 0.5, 128, 512
def c20, -0.212114394, 1.57072878, -2, 3.14159274
def c21, 1, 0, -0.0187292993, 0.0742610022
def c22, -6.28318548, 0.200000003, -9999, 0.00100000005
def c23, 0, -0.666666687, -0.333333343, 0.752941191
def c24, 999.999939, -0.00294117653, 1.5, 0.0299999993
def c25, 1, -16, 0.330000013, 0.0625
def c26, -0.075000003, 100, 1.14285719, 0
def c27, -1, 1, 0.333333343, 333.333313
defi i0, 255, 0, 0, 0
dcl_texcoord v0.xyz
dcl_texcoord1 v1.xyz
dcl_texcoord2 v2.xyw
dcl_texcoord4 v3
dcl_texcoord5 v4
dcl vPos.xy
dcl_2d s0
dcl_2d s2
add r0.x, c15.y, -vPos.y
cmp r0.y, -c14.x, vPos.y, r0.x
frc r1.x, vPos.x
frc r1.y, r0.y
mov r0.x, vPos.x
add r0.xy, r0, -r1
mov r1.x, c19.x
dp2add r0.z, c18, c18, r1.x
if_lt -r0.z, c19.x
rcp r2.x, c8.x
rcp r2.y, c8.y
mad r0.zw, r0.xyxy, r2.xyxy, c8
cmp r0.zw, -c17.x, r0.xyxy, r0
add r0.zw, r0, c19.y
mul r3.xy, r0.zwzw, c18
mov r3.zw, c19.x
texldl r3, r3, s2
mul r4.xz, r3.zyww, c19.zyww
rcp r0.z, c16.z
mov r4.y, c19.x
mad r0.zw, r0.xyxy, r0.z, r4.xyyx
mad r4.xy, r0.zwzw, r2, c8.zwzw
add r0.zw, r3.xyxy, -c19.y
add r2.xz, r0.zyww, r0.zyww
add r2.y, -r2_abs.z, c21.x
nrm r3.xyz, r2
else
rcp r0.z, c16.z
mul r0.xy, r0.z, r0
rcp r2.x, c8.x
rcp r2.y, c8.y
mad r4.xy, r0, r2, c8.zwzw
mov r4.z, c16.x
mov r3.xyz, c21.yyxw
endif
mad r0.xyz, r3, c24.z, r4
mov r2.xyz, v0
add r1.yzw, -r2.xxyz, v1.xxyz
dp3 r0.w, r1.yzww, r1.yzww
rcp r2.w, r0.w
rsq r0.w, r0.w
add r5.xyz, r4, -v0
dp3 r3.w, r5, r1.yzww
mul_sat r2.w, r2.w, r3.w
mad r5.xyz, r2.w, r1.yzww, v0
add r6.xyz, -r4, r5
dp3 r3.w, r6, r6
rsq r4.w, r3.w
mul r6.xyz, r4.w, r6
dp3_sat r4.w, r6, r3
mul r4.w, r4.w, c20.w
rcp r3.w, r3.w
mul r5.w, v2.x, v2.x
mul r3.w, r3.w, r5.w
mul r3.w, r3.w, r4.w
mad r7.xyz, r1.yzww, c19.y, v0
add r7.xyz, -r4, r7
nrm r8.xyz, r7
dp3_sat r4.w, r8, r3
mul r7.xyz, r0.w, r1.wyzw
mul r8.xyz, r6.yzxw, r7
mad r6.xyz, r7.zxyw, r6.zxyw, -r8
mov r7.x, v2.x
mad r7.yzw, r7.x, r6.xxyz, v1.xxyz
add r7.yzw, -r4.xxyz, r7
nrm r8.xyz, r7.yzww
dp3_sat r5.w, r8, r3
mad r8.xyz, v2.x, r6, r2
add r8.xyz, -r4, r8
nrm r9.xyz, r8
dp3_sat r6.w, r9, r3
mad r2.xyz, v2.x, -r6, r2
add r2.xyz, -r4, r2
nrm r9.xyz, r2
dp3_sat r8.w, r9, r3
add r6.w, r6.w, r8.w
mad r6.xyz, r7.x, -r6, v1
add r6.xyz, -r4, r6
nrm r9.xyz, r6
dp3_sat r3.x, r9, r3
add r3.x, r3.x, r6.w
add r3.x, r5.w, r3.x
add r3.x, r4.w, r3.x
mul r4.yzw, r2.xyzx, r8.xzxy
mad r4.yzw, r8.xyzx, r2.xzxy, -r4
nrm r9.xyz, r4.yzww
mul r4.yzw, r7.xwyz, r8.xyzx
mad r4.yzw, r7.xzwy, r8.xzxy, -r4
nrm r8.xyz, r4.yzww
dp3 r3.y, -r8, r9
add r3.z, -r3_abs.y, c21.x
rsq r3.z, r3.z
rcp r3.z, r3.z
mad r4.y, r3_abs.y, c21.z, c21.w
mad r4.y, r4.y, r3_abs.y, c20.x
mad r4.y, r4.y, r3_abs.y, c20.y
mul r3.z, r3.z, r4.y
cmp r3.y, r3.y, c21.y, c21.x
mad r4.y, r3.z, c20.z, c20.w
mad r3.y, r4.y, r3.y, r3.z
mul r4.yzw, r7.xzwy, r6.xzxy
mad r4.yzw, r6.xyzx, r7.xwyz, -r4
nrm r7.xyz, r4.yzww
dp3 r3.z, -r7, r8
add r4.y, -r3_abs.z, c21.x
rsq r4.y, r4.y
rcp r4.y, r4.y
mad r4.z, r3_abs.z, c21.z, c21.w
mad r4.z, r4.z, r3_abs.z, c20.x
mad r4.z, r4.z, r3_abs.z, c20.y
mul r4.y, r4.y, r4.z
cmp r3.z, r3.z, c21.y, c21.x
mad r4.z, r4.y, c20.z, c20.w
mad r3.z, r4.z, r3.z, r4.y
mul r4.yzw, r2.xzxy, r6.xyzx
mad r2.xyz, r2.yzxw, r6.zxyw, -r4.yzww
nrm r6.xyz, r2
dp3 r2.x, -r6, r7
add r2.y, -r2_abs.x, c21.x
rsq r2.y, r2.y
rcp r2.y, r2.y
mad r2.z, r2_abs.x, c21.z, c21.w
mad r2.z, r2.z, r2_abs.x, c20.x
mad r2.z, r2.z, r2_abs.x, c20.y
mul r2.y, r2.y, r2.z
cmp r2.x, r2.x, c21.y, c21.x
mad r2.z, r2.y, c20.z, c20.w
mad r2.x, r2.z, r2.x, r2.y
dp3 r2.y, -r9, r6
add r2.z, -r2_abs.y, c21.x
rsq r2.z, r2.z
rcp r2.z, r2.z
mad r4.y, r2_abs.y, c21.z, c21.w
mad r4.y, r4.y, r2_abs.y, c20.x
mad r4.y, r4.y, r2_abs.y, c20.y
mul r2.z, r2.z, r4.y
cmp r2.y, r2.y, c21.y, c21.x
mad r4.y, r2.z, c20.z, c20.w
mad r2.y, r4.y, r2.y, r2.z
add r2.x, r2.x, r2.y
add r2.x, r3.z, r2.x
add r2.x, r3.y, r2.x
add r2.x, r2.x, c22.x
mul r2.x, r2.x, c22.y
mad_sat r2.x, r2.x, r3.x, r3.w
rcp r2.y, c12.x
mul r2.y, r2.y, c27.z
mov r3.x, c12.x
add r2.z, -r3.x, c22.w
cmp r2.y, r2.z, c27.w, r2.y
mov r2.z, c13.z
add r2.z, -r2.z, c22.w
rcp r3.x, c13.z
cmp r2.z, r2.z, c24.x, r3.x
mul r2.z, r2.z, c12.w
add r3.xyz, -r0, r5
dp3 r3.w, r3, r3
rsq r3.w, r3.w
mul r3.xyz, r3.w, r3
add r4.x, -r4.x, c22.z
cmp r4.x, r4.x, c21.y, c21.x
cmp r4.x, -r2.x, c19.x, r4.x
add r4.y, r2.x, c24.y
cmp r4.y, r4.y, r4.x, c19.x
cmp r4.y, -v2_abs.w, c19.x, r4.y
add r4.zw, c25.xyxy, v2.xyxy
mul_sat r0.w, r0.w, r4.z
max r4.z, r0.w, c24.w
add_sat r0.w, r2.w, r4.z
mad r5.xyz, r0.w, r1.yzww, v0
add r5.xyz, -r0, r5
dp3 r0.w, r5, r5
rsq r0.w, r0.w
mul r5.xyz, r0.w, r5
rcp r5.w, v2.y
cmp r4.w, r4.w, r5.w, c25.w
max r5.w, v2.x, c25.z
min r6.x, c9.x, r5.w
mul r4.w, r4.w, r6.x
mul r4.w, r4.w, c9.y
rcp r3.w, r3.w
add r3.w, r3.w, -v2.x
max r5.w, r3.w, c21.x
rcp r0.w, r0.w
add r0.w, r0.w, -v2.x
max r3.w, r0.w, c21.x
add r0.w, r2.w, -r4.z
mul r1.yzw, r1, r0.w
cmp r1.yzw, r0.w, r1, c19.x
add r1.yzw, r1, v0.xxyz
add r1.yzw, -r0.xxyz, r1
dp3 r0.w, r1.yzww, r1.yzww
rsq r0.w, r0.w
mul r1.yzw, r0.w, r1
rcp r0.w, r0.w
add r0.w, r0.w, -v2.x
max r4.z, r0.w, c21.x
mov r7.x, c21.x
max r0.w, r7.x, c11.y
cmp r1.x, -c13.x, r1.x, r4.y
mov r7.zw, c19.x
mov r8.zw, c19.x
mov r9.zw, c19.x
mov r10.xyz, r0
mov r10.w, r1.y
mov r11.xy, r1.zwzw
mov r11.z, c19.y
mov r11.w, r4.z
mov r12.xyz, r0
mov r12.w, r3.x
mov r13.xy, r3.yzzw
mov r13.z, c19.y
mov r13.w, r5.w
mov r14.xyz, r0
mov r14.w, r5.x
mov r15.xy, r5.yzzw
mov r15.z, c19.y
mov r15.w, r3.w
mov r6.yzw, c21.x
mov r16.x, c11.x
mov r16.y, r1.x
rep i0
cmp r16.z, -r16.y, c21.x, c21.y
break_ne r16.z, -r16.z
mov r17.x, r14.w
mov r17.yz, r15.xxyw
mad r17.xyz, r17, r15.z, r14
max r18.xyz, r17, c19.x
min r19.xyz, c13, r18
add r17.xyz, -r17, r19
dp3 r16.z, r17, r17
min r16.w, c12.z, r19.z
mul r17.x, r2.z, r16.w
frc r17.y, r17.x
add r17.x, -r17.y, r17.x
mad r16.w, r16.w, r2.z, -r17.x
mul r17.yz, r19.xxyw, c10.xzww
mul r17.w, r2.y, r17.x
frc r18.x, r17.w
add r18.y, r17.w, -r18.x
mad r17.w, r17.x, r2.y, -r18.y
mul r17.w, r17.w, c12.x
frc r18.z, r17.w
add r18.x, r17.w, -r18.z
mad r9.xy, r18, c10, r17.yzzw
texldl r18, r9, s0
mul r9.x, r17.x, c27.z
frc r9.x, r9_abs.x
cmp r9.x, r17.x, r9.x, -r9.x
add r9.xy, r9.x, c23.yzzw
cmp r17.xy, r9.y, r18.yzzw, r18
cmp r9.xy, r9.x, r18.zwzw, r17
lrp r17.x, r16.w, r9.y, r9.x
rsq r9.x, r16.z
rcp r9.x, r9.x
add r9.y, -r17.x, c23.w
mad r9.x, r9.y, c13.w, r9.x
mov r17.x, r10.w
mov r17.yz, r11.xxyw
mad r17.xyz, r17, r11.z, r10
max r18.xyz, r17, c19.x
min r19.xyz, c13, r18
add r17.xyz, -r17, r19
dp3 r9.y, r17, r17
min r16.z, c12.z, r19.z
mul r17.xy, r19, c10.zwzw
mul r16.w, r2.z, r16.z
frc r17.z, r16.w
add r16.w, r16.w, -r17.z
mul r17.z, r2.y, r16.w
frc r17.w, r17.z
add r18.y, -r17.w, r17.z
mad r17.z, r16.w, r2.y, -r18.y
mul r17.z, r17.z, c12.x
frc r17.w, r17.z
add r18.x, -r17.w, r17.z
mad r7.xy, r18, c10, r17
mad r16.z, r16.z, r2.z, -r16.w
texldl r17, r7, s0
mul r7.x, r16.w, c27.z
frc r7.x, r7_abs.x
cmp r7.x, r16.w, r7.x, -r7.x
add r7.xy, r7.x, c23.yzzw
cmp r17.xy, r7.y, r17.yzzw, r17
cmp r7.xy, r7.x, r17.zwzw, r17
lrp r17.x, r16.z, r7.y, r7.x
rsq r7.x, r9.y
rcp r7.x, r7.x
add r7.y, -r17.x, c23.w
mad r7.x, r7.y, c13.w, r7.x
mul r7.y, r7_abs.x, c11.z
max r9.y, r7.y, r0.w
add r7.y, r9.y, r11.z
mov r17.x, r12.w
mov r17.yz, r13.xxyw
mad r17.xyz, r17, r13.z, r12
max r18.xyz, r17, c19.x
min r19.xyz, c13, r18
add r17.xyz, -r17, r19
dp3 r9.y, r17, r17
min r16.z, c12.z, r19.z
mul r17.xy, r19, c10.zwzw
mul r16.w, r2.z, r16.z
frc r17.z, r16.w
add r16.w, r16.w, -r17.z
mul r17.z, r2.y, r16.w
frc r17.w, r17.z
add r18.y, -r17.w, r17.z
mad r17.z, r16.w, r2.y, -r18.y
mul r17.z, r17.z, c12.x
frc r17.w, r17.z
add r18.x, -r17.w, r17.z
mad r8.xy, r18, c10, r17
mad r16.z, r16.z, r2.z, -r16.w
texldl r17, r8, s0
mul r8.x, r16.w, c27.z
frc r8.x, r8_abs.x
cmp r8.x, r16.w, r8.x, -r8.x
add r8.xy, r8.x, c23.yzzw
cmp r17.xy, r8.y, r17.yzzw, r17
cmp r8.xy, r8.x, r17.zwzw, r17
lrp r17.x, r16.z, r8.y, r8.x
mul r8.x, r9_abs.x, c11.z
max r16.z, r8.x, r0.w
add r8.x, r15.z, r16.z
mad r8.y, r4.w, r15.z, c25.z
min r16.z, r6.x, r8.y
rsq r8.y, r9.y
rcp r8.y, r8.y
add r9.y, -r17.x, c23.w
mad r8.y, r9.y, c13.w, r8.y
mul r9.y, r8_abs.y, c11.z
max r16.w, r9.y, r0.w
add r9.y, r13.z, r16.w
rcp r16.z, r16.z
add r9.x, r9.x, c24.z
mul r9.x, r16.z, r9.x
min r16.z, r9.x, r6.w
mad r9.x, r4.w, r11.z, c25.z
min r16.w, r6.x, r9.x
rcp r9.x, r16.w
add r7.x, r7.x, c24.z
mul r7.x, r9.x, r7.x
min r9.x, r7.x, r6.y
min r17.z, r11.w, r7.y
mad r7.x, r4.w, r13.z, c25.z
min r16.w, r6.x, r7.x
rcp r7.x, r16.w
add r7.y, r8.y, c24.z
mul r7.x, r7.x, r7.y
min r8.y, r7.x, r6.z
min r18.z, r13.w, r9.y
add r7.x, r13.w, -r18.z
mul_sat r7.x, r7.x, c26.y
add_sat r7.y, r8.y, c26.x
mul r7.x, r7.x, r7.y
add r7.y, r11.w, -r17.z
mul_sat r7.y, r7.y, c26.y
add_sat r9.y, r9.x, c26.x
mad r7.x, r9.y, r7.y, r7.x
min r19.z, r15.w, r8.x
add r7.y, r15.w, -r19.z
mul_sat r7.y, r7.y, c26.y
add_sat r8.x, r16.z, c26.x
mad r7.x, r8.x, r7.y, r7.x
add r16.x, r16.x, -c21.x
mul r16.y, r7.x, r16.x
mov r17.xyw, r11
mov r11, r17
mov r18.xyw, r13
mov r13, r18
mov r19.xyw, r15
mov r15, r19
mov r6.y, r9.x
mov r6.z, r8.y
mov r6.w, r16.z
endrep
add r0.x, r6.z, r6.y
add r0.x, r6.w, r0.x
mul r0.y, r16.x, c19.y
mul r0.x, r0.x, c27.z
min r1.x, r0.y, r0.x
add_sat r0.x, r1.x, c26.x
mul_sat r0.x, r0.x, c26.z
pow r1.x, r0.x, c9.z
cmp r0.x, -r4.y, c21.x, r1.x
mul r0.x, r0.x, r2.x
cmp r0.x, -r4.x, c19.x, r0.x
cmp r1, -r4.x, c27.x, c27.y
texkill r1
mov r1, r1.w
texkill r1
mov r1, v3
add r1, -r1, v4
mad r1, r2.w, r1, v3
mul r0.yzw, r1.w, r1.xxyz
mul oC0.xyz, r0.x, r0.yzww
mov oC0.w, c21.x
// approximately 432 instruction slots used (8 texture, 424 arithmetic)
Research results: fxc is producing fancy outputs where the pixel shader lists itself as its own preshader, like so: The result is that at runtime when I apply or set parameters, mojoshader has to execute the entire PS as a preshader. This appears to work and the shader functions, but it hits assertions because I guess this was never expected. This is likely happening because the shader is complex so fxc can't hoist out simple opcodes to do initialization, so it just reuses the whole shader.
Shader disassembly from AMD's tool and fxc are both wrong, there are definitely preshaders. I have no idea what they do, but I have a fix.
This can be worked around by passing
/Op
to fxc at compile time. I haven't narrowed down what particular thing in a preshader causes it to break.