crosire / d3d8to9

A D3D8 pseudo-driver which converts API calls and bytecode shaders to equivalent D3D9 ones.
BSD 2-Clause "Simplified" License
881 stars 78 forks source link

Vertex shader translation does not work as expected (Star Wars Republic Commando) #28

Closed metallicafan212 closed 7 years ago

metallicafan212 commented 7 years ago

So I've been following this project silently for about a year and a half for now as it interested me in the project I'm working on.

I have no c++ knowledge of any sorts, since I am primarily a Java programmer, but I hope I can help with one issue with vertex shaders.

For the longest time, the hardware shaders (Unreal Engine 2 combines vertex and pixel shaders) for some of the effects have not been being rebuilt properly. In d3d8, the shaders work fine, but in d3d8to9 it rebuilds the vertex shaders with wrong instructions.

Take for example: the hardware shader DynamicHologram, this is how it appears in editor (I'm using quote since the code tag does not want to work with it)

vs.1.1

; Get the Position mov r3, v0

; Add big rare flicker mul r3.z, r3, c21

; Add small frequent flicker add r3.xyz, r3, c22

////////// Distort /////////

// Position distort mul r1, v0, c16.x add r1.xy, r1.zx, r1.yz

// Time distort mov r2, c17 mad r1, r2.x, c16.y, r1 frc r1.xy, r1 // 0 to 1

// Turn linear into sawtooth add r1, r1, -c20.y // -.5 to .5 max r7, r1, -r1 // .5 to 0 to .5

// Distort mul r4, r7, c16.zzww // z = Amp w = 0 mul r4, r4, r3 mov r4.z, r3.z mov r4.w, c20.z

////////// Scan ///////// mov r5, r3 mov r5.z, c18.y

////////// Combine /////////

// If z > Scan then Distort sge r0.x, r3.z, c18.x

// if z < distort and > Scan then scan slt r0.y, r3.z, c18.x sge r0.z, r3.z, c18.y mul r0.y, r0.y, r0.z

// if z < Scan then scan slt r0.z, r3.z, c18.y

mul r1, r4, r0.x mad r1, r5, r0.y, r1 mad r3, r3, r0.z, r1

////////// Transform /////////

;transform position dp4 r4.x, r3, c0 dp4 r4.y, r3, c1 dp4 r4.z, r3, c2 dp4 r4.w, r3, c3

/;; CAMERA SCAN (in screen space) ; Interpolate with scan position ; Compare world space position sge r5.x, r3.z, c31.x ; Get 1- scan add r5.y, c20.z, -r5.x mul r6, r5.x, c30 mad r4, r5.y, r4, r6 ;; END CAMERA SCAN /

mov oPos, r4

;transform normal dp3 r0.x, v1, c5 dp3 r0.y, v1, c6 dp3 r0.z, v1, c7

;normalize normal dp3 r0.w, r0, r0 rsq r0.w, r0.w mul r0, r0, r0.w

;compute world space position dp4 r1.x, v0, c11 dp4 r1.y, v0, c12 dp4 r1.z, v0, c13 dp4 r1.w, v0, c14

;vector from point to eye add r2, c10, -r1

;normalize e dp3 r2.w, r2, r2 rsq r2.w, r2.w mul r2, r2, r2.w

; eye dot n dp3 r1, r2, r0 / mov r8, c16.y mad r7, r8, c20.y, c20.y max r7, r7, c20.y mul r3, r1, r7 mul r3, r3, c15.x mad r3, r1, c15.y, r3 / mul r3, r1, c15.y add oT0.x, r3, c[20] ; add const offset held in c[20]

; Base Texture mov oT1, v2

// Get Speed mov r5, c25 mul r5, r5, c26.z

; Scale Noise Texture 2 ;mul r6, r4, c26.y mul r6, v0.zx, c26.y

// Add direction mad oT2.xy, r5, c28, r6.yx

; Use the Dot as brightness and add flicker mul oD0, r1, c22.x

mov oFog.x, c20.x

But this is how d3d8to9 parses it

vs_1_1
dcl_position v0
dcl_blendweight v1
dcl_blendindices v2
mov r7, c0 /* initialize register r7 */
mov r6, c0 /* initialize register r6 */
mov r5, c0 /* initialize register r5 */
mov r4, c0 /* initialize register r4 */
mov r3, c0 /* initialize register r3 */
mov r2, c0 /* initialize register r2 */
mov r1, c0 /* initialize register r1 */
mov r0, c0 /* initialize register r0 */
mov oT2, c0 /* initialize output register oT2 */
mov oT1, c0 /* initialize output register oT1 */
mov oT0, c0 /* initialize output register oT0 */
mov oD0, c0 /* initialize output register oD0 */
mov r3, v0
mul r3.z, r3, c21
add r3.xyz, r3, c22
mul r1, v0, c16.x
add r1.xy, r1.zxxx, r1.yzzz
mov r2, c17
mad r1, r2.x, c16.y, r1
frc r1.xy, r1
add r1, r1, -c20.y
max r7, r1, -r1
mul r4, r7, c16.zzww
mul r4, r4, r3
mov r4.z, r3.z
mov r4.w, c20.z
mov r5, r3
mov r5.z, c18.y
sge r0.x, r3.z, c18.x
slt r0.y, r3.z, c18.x
sge r0.z, r3.z, c18.y
mul r0.y, r0.y, r0.z
slt r0.z, r3.z, c18.y
mul r1, r4, r0.x
mad r1, r5, r0.y, r1
mad r3, r3, r0.z, r1
dp4 r4.x, r3, c0
dp4 r4.y, r3, c1
dp4 r4.z, r3, c2
dp4 r4.w, r3, c3
mov oPos, r4
dp3 r0.x, v1, c5
dp3 r0.y, v1, c6
dp3 r0.z, v1, c7
dp3 r0.w, r0, r0
rsq r0.w, r0.w
mul r0, r0, r0.w
dp4 r1.x, v0, c11
dp4 r1.y, v0, c12
dp4 r1.z, v0, c13
dp4 r1.w, v0, c14
add r2, c10, -r1
dp3 r2.w, r2, r2
rsq r2.w, r2.w
mul r2, r2, r2.w
dp3 r1, r2, r0
mul r3, r1, c15.y
add oT0.x, r3, c20
mov oT1, v2
mov r5, c25
mul r5, r5, c26.z
mul r6, v0.zxxx, c26.y
mad oT2.xy, r5, c28, r6.yxxx
mul oD0, r1, c22.x
mov oFog /* removed swizzle */, c2.x /* select single component */0.x

Note the last line on each one: the engine's version has mov oFog, c20.x d3d8to9 inserts a comment and changes it to mov oFog /* removed swizzle */, c2.x /* select single component */0.x and leaves the old 0.x after the inserted comment

It also errors out in the log for d3d8to9 with

Failed to reassemble shader:

...\Star Wars Republic Commando\GameData\System\memory(69,71): error X2000: syntax error : unexpected float '0'

In the game's log it writes Log: CreateVertexShader failed(88760B59).

I looked up the error and 88760B59 means invalid data (D3DXERR_INVALIDDATA).

Now, if I go in game and change the last line from mov oFog.x, c20.x to mov oFog.x, c2.x d3d8to9 builds the shader just fine and it works in game.

This is what it outputs with that change

vs_1_1
dcl_position v0
dcl_blendweight v1
dcl_blendindices v2
mov r7, c0 /* initialize register r7 */
mov r6, c0 /* initialize register r6 */
mov r5, c0 /* initialize register r5 */
mov r4, c0 /* initialize register r4 */
mov r3, c0 /* initialize register r3 */
mov r2, c0 /* initialize register r2 */
mov r1, c0 /* initialize register r1 */
mov r0, c0 /* initialize register r0 */
mov oT2, c0 /* initialize output register oT2 */
mov oT1, c0 /* initialize output register oT1 */
mov oT0, c0 /* initialize output register oT0 */
mov oD0, c0 /* initialize output register oD0 */
mov r3, v0
mul r3.z, r3, c21
add r3.xyz, r3, c22
mul r1, v0, c16.x
add r1.xy, r1.zxxx, r1.yzzz
mov r2, c17
mad r1, r2.x, c16.y, r1
frc r1.xy, r1
add r1, r1, -c20.y
max r7, r1, -r1
mul r4, r7, c16.zzww
mul r4, r4, r3
mov r4.z, r3.z
mov r4.w, c20.z
mov r5, r3
mov r5.z, c18.y
sge r0.x, r3.z, c18.x
slt r0.y, r3.z, c18.x
sge r0.z, r3.z, c18.y
mul r0.y, r0.y, r0.z
slt r0.z, r3.z, c18.y
mul r1, r4, r0.x
mad r1, r5, r0.y, r1
mad r3, r3, r0.z, r1
dp4 r4.x, r3, c0
dp4 r4.y, r3, c1
dp4 r4.z, r3, c2
dp4 r4.w, r3, c3
mov oPos, r4
dp3 r0.x, v1, c5
dp3 r0.y, v1, c6
dp3 r0.z, v1, c7
dp3 r0.w, r0, r0
rsq r0.w, r0.w
mul r0, r0, r0.w
dp4 r1.x, v0, c11
dp4 r1.y, v0, c12
dp4 r1.z, v0, c13
dp4 r1.w, v0, c14
add r2, c10, -r1
dp3 r2.w, r2, r2
rsq r2.w, r2.w
mul r2, r2, r2.w
dp3 r1, r2, r0
mul r3, r1, c15.y
add oT0.x, r3, c20
mov oT1, v2
mov r5, c25
mul r5, r5, c26.z
mul r6, v0.zxxx, c26.y
mad oT2.xy, r5, c28, r6.yxxx
mul oD0, r1, c22.x
mov oFog /* removed swizzle */, c2.x

// approximately 55 instruction slots used

Redirecting 'IDirect3DDevice8::CreatePixelShader(0F9C7C58, 11CC17B0, 11D866C0)' ...

Disassembling shader and translating assembly to Direct3D 9 compatible code ... Dumping translated shader assembly:

ps_1_3
tex t0
tex t1
tex t2
dp3_sat r1, t1, c0
mul r0, r1, t0
mul r0.xyz, r0, t2
  • mov r0.w, t0.w mul r0.xyz, r0, v0 mul r0, r0, c1

// approximately 8 instruction slots used (3 texture, 5 arithmetic)

Now, I do not know c++ or directx at all, so I don't know how to fix it in d3d8to9, but this is the information I've collected on this issue, I only wanted to submit this once I had everything and had time to try everything.

A few other vertex shaders do not recompile correctly, but I do not know exactly which ones they are. The primary ones that do not recompile are ones used for bump mapping, specularity, and the one hologram shader that is used on a lot of different objects throughout the game.

If anyone needs more information, like the raw log files, pictures, or the code to other shaders feel free to ask.

PatrickvL commented 7 years ago

It looks like the regex can't handle cases where the argument already use just the one .x component

crosire commented 7 years ago

The relevant source code line is https://github.com/crosire/d3d8to9/blob/master/source/d3d8to9_device.cpp#L1392.

elishacloud commented 7 years ago

From what I can tell the issue is that it matches on the first number in the c20 register name and ignores the 0. It seems that we need to simply update this to always match on both numbers, if more than one exist.

I think we should change this line from this:

SourceCode = std::regex_replace(SourceCode, std::regex("mov (oFog|oPts)(.*), (-?)([crv][0-9]+)(?!\\.)"), "mov $1$2, $3$4.x /* select single component */");

To this:

SourceCode = std::regex_replace(SourceCode, std::regex("mov (oFog|oPts)(.*), (-?)([crv][0-9]+)(?![0-9])(?!\\.)"), "mov $1$2, $3$4.x /* select single component */");

From my tests this seems to solve the issue. Basically I am adding (?![0-9]) which should ensure that all the number values get picked up. I believe the same issue exists on this line here, here and here.

This also fixes the shader translation issue in Silent Hill 2 mentioned in this comment.

Code changes to fix this issue is posted here 6483006.

PatrickvL commented 7 years ago

Adding it as another bracketed expression, it would need to be referenced as $5 in the output, otherwise the digit would get lost. It's simpler to put the additional expression inside the fourth expression, but the plus sign already there should have catched multiple digits already...

elishacloud commented 7 years ago

It does match on both digits but it also matches on one digit. Since regex_replace is configured to replace all matches, the match on a single digit is causing the problem. Adding (?![0-9]) forces it to match on all of the digits.

As far as referencing $5 in the output, that is not needed. If you notice there are actually 6 bracketed expressions not 5. However the last two expressions are "Negative Lookahead" expressions which are not included in the groups. Check out this site and add mov (oFog|oPts)(.*), (-?)([crv][0-9]+)(?![0-9])(?!\\.) into the regular expression debugger for more details.

metallicafan212 commented 7 years ago

So elishacloud's fix works perfectly for the one hardware shader, but this exposed another issue that I thought this would solve: specularity. (I did not want to make another issue since this issue is just like the other, but differs in the execution)

The hardware shader is DOT3DiffSpec

With d3d8to9 the specularity in the game is way too bright, but this one is much more complicated. Parts of the original vertex shader code is replaced in real time in the game, so in the editor it appears as

vs.1.1

; xform and output vertex m4x4 oPos, v0, c20

; base map and bump map texture coordinates mul oT0.xy, v2, c0.x mul oT1.xy, v2, c0.y

{LIGHT1} {LIGHT2} {LIGHT3} {LIGHT4} {COMBINELIGHTS} {OUTPUTLIGHTS} {DIFF_TO_TANGENT_SPACE} {SPEC_TO_TANGENT_SPACE}

Notice the {LIGHT1} and so forth The game actively replaces these with code depending on the current lights and environment, meaning I cannot access it directly.

So, to d3d8to9, the code for one instance appears as:

vs_1_1 dcl_position v0 dcl_blendweight v1 dcl_color v5 dcl_blendindices v2 dcl_normal v3 dcl_psize v4 dcl_color1 v6 mov r10, c0 / initialize register r10 / mov r9, c0 / initialize register r9 / mov r7, c0 / initialize register r7 / mov r6, c0 / initialize register r6 / mov r5, c0 / initialize register r5 / mov r4, c0 / initialize register r4 / mov r3, c0 / initialize register r3 / mov r2, c0 / initialize register r2 / mov r1, c0 / initialize register r1 / mov r0, c0 / initialize register r0 / mov oT3, c0 / initialize output register oT3 / mov oT2, c0 / initialize output register oT2 / mov oT1, c0 / initialize output register oT1 / mov oT0, c0 / initialize output register oT0 / mov oD1, c0 / initialize output register oD1 / mov oD0, c0 / initialize output register oD0 / m4x4 oPos, v0, c20 mul oT0.xy, v2, c0.x mul oT1.xy, v2, c0.y add r0, c40, -v0 dp3 r1, r0, r0 rsq r1, r1 mul r0, r0, r1.x rcp r1, r1.x add r1.x, r1.x, -c42.z dp3 r2, r0, v1 max r2, r2, c10.x mad r3, -r1.x, c42.x, c42.w max r3, r3, c9.x mov r4, c12 m3x3 r7, r4, c28 dp3 r4, -r0, r7 add r4, c13.x, -r4 max r4, r4, c10.x mad r4, -r4, c13.z, c13.w max r4, r4, c10.x add r7.x, c15.x, -r1.y max r7.x, r7.x, c10.x mad r4.x, r7.x, c15.y, r4.x min r4.x, r4.x, c10.z mul r2, r2, r4.x mul r6, r0, r3.x mul r3, r3.x, c41 mul r5, r3, r2.x add r0, c43, -v0 dp3 r1, r0, r0 rsq r1, r1 mul r0, r0, r1.x rcp r1, r1.x add r1, r1, -c45.z dp3 r2, r0, v1 max r2, r2, c10.x mad r3, -r1, c45.x, c45.w max r3, r3, c9.x mad r6, r0, r3.x, r6 mul r3, r3, c44 mad r5, r3, r2.x, r5 add r0, c46, -v0 dp3 r1, r0, r0 rsq r1, r1 mul r0, r0, r1.x rcp r1, r1 add r1, r1, -c48.z dp3 r2, r0, v1 max r2, r2, c10.x mad r3, -r1, c48.x, c48.w max r3, r3, c9.x mad r6, r0, r3.x, r6 mul r3, r3, c47 mad r5, r3, r2.x, r5 add r0, c49, -v0 dp3 r1, r0, r0 rsq r1, r1 mul r0, r0, r1.x rcp r1, r1 add r1, r1, -c51.z dp3 r2, r0, v1 max r2, r2, c10.x mad r3, -r1, c51.x, c51.w max r3, r3, c9.x mad r6, r0, r3.x, r6 mul r3, r3, c50 mad r5, r3, r2.x, r5 mad r9, v6, c10.wwwx, -c10.zzzx mad r6, r9, v6.w, r6 mov r6.w, c10.x dp3 r1, r6, r6 rsq r1, r1 mul r0, r6, r1.x dp3 r2, r0, v1 max r2, r2, c10.x mad r5, v5, c10.w, r5 mad r7, -r5, r2.x, r5 add oD0, r7, c14 mov oD1, r5 dp4 r9.x, v0, c32 dp4 r9.y, v0, c33 dp4 r9.z, v0, c34 dp3 r9.w, r9, r9 rsq r10.w, r9.w mul oFog / removed swizzle /, r9.w, r10.w dp3 r1.x, r0, v3 dp3 r1.y, r0, v4 dp3 r1.zw, r0, v1 mov oT2, r1 mov r6, r1 add r0, c11, -v0 dp3 r1.x, r0, r0 rsq r1.x, r1.x mul r0, r0.xyzz, r1.x dp3 r1.x, r0, v3 dp3 r1.y, r0, v4 dp3 r1.z, r0, v1 add r2, r1, r6 dp3 r1.x, r2, r2 rsq r1.x, r1.x mul r2, r2.xyzz, r1.x mov oT3, r2 The pixel shader appears as:

ps_1_3
tex t0
tex t1
tex t2
tex t3
dp3_sat r0, t1_bx2, t2_bx2
dp3_sat r1, t1_bx2, t3_bx2
mad r0.xyz, r0, v1, v0
mad_x2_sat r1, r1.w, r1.w, c0 /* removed modifier - */
mul_sat r1.xyz, r1, v1
mad_sat r0, r0, t0, r1

Now I don't think this is a pixel shader issue as the game uses vertex lighting for it's models, hence the active changes to the vertex shader at any time.

This is what it's properly supposed to look like: proper spec

This is what d3d8to9 makes it look like: d3d8spec

Now if you look at the two barrels on the lower left side, you can see it's still getting bump mapped but the specularity is brightening anything in a direct light. In game, it's even worse as bloom goes wild and turns the whole screen bright, making it hard to see anything.

Now, as I've said before, I'm no expert, but this seems (correct me if I'm wrong) like it's missing a - sign somewhere in the code.

(Vertex shader output was collected using a test map with only that one model)

Feel free to ask if you need more information, pictures, hardware shader code, or anything else.

metallicafan212 commented 7 years ago

UPDATE

I feel like it's the pixel shader now, as I went back and looked at the code.

;; PC

ps.1.3

tex t0 ; base map tex t1 ; bump map tex t2 ; light vector from normalizer cube map tex t3 ; half angle vector

; N.L dp3_sat r0, t1_bx2, t2_bx2

; (N.H) dp3_sat r1, t1_bx2, t3_bx2

; Multiply by bump color ; Add baked + vertex lighting mad r0.rgb, r0, v1, v0

; approximate (N.H)^16 ; [(N.H)^2 - 0.75] * 4 == (N.H)^16 mad_x2_sat r1, r1.a, r1.a, -c0

; Set the color of the specular mul_sat r1.rgb, r1, v1

; Mulitply specular by specular mask ;;;mul r1, r1, t1.a

; Self Illumination ;;;lrp r0, t0.a, t0, r0

; [(N.L) * base] + (N.H)^16 ; Diffuse and Spec mad_sat r0, r0, t0, r1

Notice mad_x2_sat r1, r1.a, r1.a, -c0

In the d3d8to9 it takes away the - sign making it mad_x2_sat r1, r1.w, r1.w, c0 /* removed modifier - */

Correct me if I'm wrong, but I think that's the issue. It's supposed to take away from the saturation there but it's being changed so it adds to it instead. (I'm way over my head....)

Hope this helps track it down.

UPDATE 2 After changing the code to remove the - sign and running it without d3d8to9

well whaddya know

WELL WHADDYA KNOW

It also exposes that another shader has the same issue and it's a pixel shader issue.

If anyone wants me to, I'll open a new issue only dealing with the pixel shader.

elishacloud commented 7 years ago

Interesting. This is a separate issue. What game is this happening in and how do I reproduce it?

Can you try with this file and let me know what the results are?

metallicafan212 commented 7 years ago

This is still Star Wars Republic Commando with a unreleased, still in development mod, and I'm guessing you commented out the lines for the removed modifier, since I just tried that before trying yours.

This causes the editor to crash, producing

..\Star Wars Republic Commando\GameData\System\memory(9,5): error X5539: (third source param) Modifiers are not allowed on constants for ps_1_x.

I'll make a new issue for this.