crosire / d3d8to9

A D3D8 pseudo-driver which converts API calls and bytecode shaders to equivalent D3D9 ones.
BSD 2-Clause "Simplified" License
881 stars 78 forks source link

Fix m*x* instructions in VertexShader #57

Closed elishacloud closed 6 years ago

elishacloud commented 6 years ago

This updates m*x* instructions to use a temporary register if the first source register is the same as the destination register, since this is not allowed in d3d9.

Fixes the issue NFS HP2 (2002) transparent cars.

Code does the following:

  1. Checks if m*x* instructions exist.
  2. Checks for unused registers.
  3. Loops through each register to see if the same register is used for both the destination register and the first source register.
  4. Adds a new line using a temporary register and updates the m*x* line to use the temporary register.
  5. If there is no unused register or there are no arithmetic instructions remaining it will disable the line to allow shader assembly to be compiled.

Note: the source register can use negative modifier and swizzles.

RegEx break down:

This fixes the following error:

Redirecting 'IDirect3DDevice8::CreateVertexShader(02A99F98, 0943BC80, 0941D290, 0943BF40, 0)' ...
> Translating vertex declaration ...
> Disassembling shader and translating assembly to Direct3D 9 compatible code ...
> Dumping translated shader assembly:

    vs_1_1
    dcl_position v0
    dcl_blendweight v1
    dcl_blendindices v2
    dcl_normal v3
    dcl_psize v4
    mov r11, c0 /* initialize register r11 */
    mov r10, c0 /* initialize register r10 */
    mov r9, c0 /* initialize register r9 */
    mov r8, c0 /* initialize register r8 */
    mov r7, c0 /* initialize register r7 */
    mov r5, c0 /* initialize register r5 */
    mov r4, c0 /* initialize register r4 */
    mov r3, c0 /* initialize register r3 */
    mov r1, c0 /* initialize register r1 */
    mov r0, c0 /* initialize register r0 */
    mov oD1, c0 /* initialize output register oD1 */
    mov oD0, c0 /* initialize output register oD0 */
    mov oT2, c0 /* initialize output register oT2 */
    mov oT1, c0 /* initialize output register oT1 */
    mov oT0, c0 /* initialize output register oT0 */
    m4x4 r0, v0, c1
    mov oPos, r0
    mul r1.w, c0.w, r0.w
    exp oFog /* removed swizzle */, -r1
    mov r10, v0
    mov r10.w, c13.x
    mov oT0, v4
    mov oT2, v4
    mov r3, v2
    mov r3.w, c13.x
    mov r11, r3
    add r0, r10, -c33
    mul r0, r0, c29.w
    add r3, r3, r0
    dp3 r3.w, r3, r3
    rsq r3.w, r3.w
    mul r3, r3, r3.w
    m3x3 r3, r3, c5  /* <--- This is the line with the error */
    mul r3, r3, c32
    mad r0.xy, r3.xyyy, c31.xyyy, c31.zwww
    add r0.xy, c29.z, -r0.xyyy
    mov oT1.xy, r0.xyyy
    m4x4 r9, r10, c9
    m3x3 r8, r11, c9
    add r3, c34, -r9
    add r4, c35, -r9
    dp3 r3.w, r3, r3
    rsq r3.w, r3.w
    mul r3, r3, r3.w
    dp3 r4.w, r4, r4
    rsq r4.w, r4.w
    mul r4, r4, r4.w
    add r5, r3, r4
    dp3 r5.w, r5, r5
    rsq r5.w, r5.w
    mul r5, r5, r5.w
    dp3 r8.w, r8, r8
    rsq r8.w, r8.w
    mul r8, r8, r8.w
    dp3 r7, r8, r5
    mov r9, r7
    mov r8.x, c29.z
    mov r8.y, r7.y
    mov r8.w, c30.y
    lit r8, r8
    max r7, c29.x, r8.z
    mul r10, r7, c38
    mov r8.x, c29.z
    mov r8.y, r9.y
    mov r8.w, c30.z
    lit r8, r8
    mad oD1.xyz, r8.z, c39, r10.xyzz
    mov oD1.w, c30.x
    m3x3 r1, r11, c9
    mov r0.xyz, c29.xzxx
    dp3 r3.z, r1.xyzz, r0.xyzz
    mad r3.z, r3.z, c29.y, c29.y
    mul r5.xyz, r3.z, c37.xyzz
    add r3.z, c29.z, -r3.z
    mad r5.xyz, r3.z, c36.xyzz, r5.xyzz
    min oD0.xyz, r5.xyzz, c29.z
    mov a0.x, c56.x
    mov oD0.w, c40[a0.x]

// approximately 84 instruction slots used

> Failed to reassemble shader:

D:\Games\Need For Speed Hot Pursuit 2\memory(39,5): error X5335: Dest register cannot be the same as first source register for m*x* instructions.

In this case the code will update this line:

    m3x3 r3, r3, c5

To be this:

    mov r2, r3 /* added line */
    m3x3 r3, r2 /* changed r3 to r2 */, c5
crosire commented 6 years ago

Awesome!