GPUOpen-Drivers / AMDVLK

AMD Open Source Driver For Vulkan
MIT License
1.76k stars 163 forks source link

AMDVLK subgroup codegen is significantly different than Windows #68

Closed Themaister closed 4 years ago

Themaister commented 5 years ago

I've tried some subgroup-code on AMDVLK (RX 470), and seeing very different results compared to Windows. I think the Windows codegen is better as I'm getting a 15% gain on Windows, but just 5% on AMDVLK by using subgroup-ops on my more complex test below. Overall, Windows runs significantly faster after adding subgroup ops, and without it's within margin of error.

First, a very reduced example: https://github.com/Themaister/Granite/blob/master/tests/assets/shaders/subgroup.comp

AMDVLK GCN output:

There's a split DPP and OR, but Windows compiler just emits OR. In the scalar load loop, there is ping ponging via VGPRs then readfirstlanes it back to SGPR then does the scalar load, while Windows compiler stays in SGPRs. Windows also gets x8/x4 load and AMDVLK gets three x4 loads.

_amdgpu_cs_main:
BB0_0:
    s_mov_b32 s9, s3                                              ; BE890003
    s_mov_b32 s8, s2                                              ; BE880002
    s_getpc_b64 s[2:3]                                            ; BE821C00
    s_mov_b32 s0, s5                                              ; BE800005
    s_mov_b32 s5, s3                                              ; BE850003
    s_load_dwordx4 s[12:15], s[4:5], 0x0                          ; C00A0302 00000000
    s_load_dwordx4 s[4:7], s[4:5], 0x10                           ; C00A0102 00000010
    s_lshl_b32 s0, s0, 6                                          ; 8E008600
    v_add_u32_e32 v1, vcc, s0, v0                                 ; 32020000
    v_lshlrev_b32_e32 v0, 4, v1                                   ; 24000284
    s_waitcnt lgkmcnt(0)                                          ; BF8C007F
    buffer_load_format_x v5, v1, s[12:15], 0 idxen                ; E0002000 80030501
    buffer_load_dwordx4 v[1:4], v0, s[4:7], 0 offen               ; E05C1000 80010100
    s_not_b64 exec, exec                                          ; BEFE057E
    s_waitcnt vmcnt(1)                                            ; BF8C0F71
    v_mov_b32_e32 v5, 0                                           ; 7E0A0280
    s_not_b64 exec, exec                                          ; BEFE057E
    s_or_saveexec_b64 s[0:1], -1                                  ; BE8021C1
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_shr:1 row_mask:0xf bank_mask:0xf    ; 7E0C02FA FF011106
    v_or_b32_e32 v5, v6, v5                                       ; 280A0B06
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_shr:2 row_mask:0xf bank_mask:0xf    ; 7E0C02FA FF011206
    v_or_b32_e32 v5, v5, v6                                       ; 280A0D05
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_shr:4 row_mask:0xf bank_mask:0xf    ; 7E0C02FA FF011406
    v_or_b32_e32 v5, v5, v6                                       ; 280A0D05
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_shr:8 row_mask:0xf bank_mask:0xf    ; 7E0C02FA FF011806
    v_or_b32_e32 v5, v5, v6                                       ; 280A0D05
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_bcast:15 row_mask:0xa bank_mask:0xf ; 7E0C02FA AF014206
    v_or_b32_e32 v5, v5, v6                                       ; 280A0D05
    v_mov_b32_e32 v6, v5                                          ; 7E0C0305
    s_nop 1                                                       ; BF800001
    v_mov_b32_dpp v6, v6  row_bcast:31 row_mask:0xc bank_mask:0xf ; 7E0C02FA CF014306
    v_or_b32_e32 v5, v5, v6                                       ; 280A0D05
    s_mov_b64 exec, s[0:1]                                        ; BEFE0100
    v_readlane_b32 s0, v5, 63                                     ; D2890000 00017F05
    v_mov_b32_e32 v5, s0                                          ; 7E0A0200
    v_readfirstlane_b32 s0, v5                                    ; 7E000505
    s_cmp_eq_u32 s0, 0                                            ; BF068000
    s_cbranch_scc1 BB0_3                                          ; BF850000
    s_and_b32 s9, s9, 0xffff                                      ; 8609FF09 0000FFFF
    s_mov_b32 s11, 0x24fac                                        ; BE8B00FF 00024FAC
    s_mov_b32 s10, -1                                             ; BE8A00C1
BB0_2:
    s_ff1_i32_b32 s1, s0                                          ; BE811000
    v_mul_u32_u24_e64 v5, s1, 48                                  ; D1080005 00016001
    v_mad_u32_u24 v6, s1, 48, 16                                  ; D1C30006 02416001
    v_mad_u32_u24 v7, s1, 48, 32                                  ; D1C30007 02816001
    v_readfirstlane_b32 s2, v5                                    ; 7E040505
    v_readfirstlane_b32 s3, v6                                    ; 7E060506
    v_readfirstlane_b32 s20, v7                                   ; 7E280507
    s_buffer_load_dwordx4 s[12:15], s[8:11], s2                   ; C0280304 00000002
    s_buffer_load_dwordx4 s[16:19], s[8:11], s3                   ; C0280404 00000003
    s_buffer_load_dwordx4 s[20:23], s[8:11], s20                  ; C0280504 00000014
    s_lshl_b32 s1, 1, s1                                          ; 8E010181
    s_xor_b32 s0, s1, s0                                          ; 88000001
    s_waitcnt vmcnt(0) lgkmcnt(0)                                 ; BF8C0070
    v_mul_f32_e32 v5, s12, v1                                     ; 0A0A020C
    v_subrev_f32_e32 v9, s16, v1                                  ; 06120210
    v_mul_f32_e32 v6, s13, v2                                     ; 0A0C040D
    v_subrev_f32_e32 v10, s17, v2                                 ; 06140411
    v_mul_f32_e32 v7, s14, v3                                     ; 0A0E060E
    v_subrev_f32_e32 v11, s18, v3                                 ; 06160612
    v_mul_f32_e32 v8, s15, v4                                     ; 0A10080F
    v_subrev_f32_e32 v12, s19, v4                                 ; 06180813
    v_mul_f32_e32 v5, v5, v9                                      ; 0A0A1305
    v_add_f32_e32 v13, s20, v1                                    ; 021A0214
    v_add_f32_e32 v14, s21, v2                                    ; 021C0415
    v_mul_f32_e32 v6, v6, v10                                     ; 0A0C1506
    v_add_f32_e32 v15, s22, v3                                    ; 021E0616
    v_mul_f32_e32 v7, v7, v11                                     ; 0A0E1707
    v_add_f32_e32 v9, s23, v4                                     ; 02120817
    v_mul_f32_e32 v8, v8, v12                                     ; 0A101908
    s_cmp_lg_u32 s0, 0                                            ; BF078000
    v_mac_f32_e32 v1, v5, v13                                     ; 2C021B05
    v_mac_f32_e32 v2, v6, v14                                     ; 2C041D06
    v_mac_f32_e32 v3, v7, v15                                     ; 2C061F07
    v_mac_f32_e32 v4, v8, v9                                      ; 2C081308
    s_cbranch_scc1 BB0_2                                          ; BF850000
BB0_3:
    s_waitcnt vmcnt(0)                                            ; BF8C0F70
    buffer_store_dwordx4 v[1:4], v0, s[4:7], 0 offen              ; E07C1000 80010100
    s_endpgm                                                      ; BF810000
=== STATISTICS ===

VGPRs: 16
SGPRs: 26
Scratch usage (bytes): 0
LDS usage (bytes): 0

Windows (Driver 18.11.2):

; -------- Disassembly --------------------
shader main
  asic(VI)
  type(CS)

  s_getpc_b64   s[0:1]                                  // 000000000000: BE801C80
  v_mad_u32_u24  v0, s5, 64, v0                         // 000000000004: D1C30000 04018005
  s_mov_b32     s0, s4                                  // 00000000000C: BE800004
  s_load_dwordx8  s[4:11], s[0:1], 0x00                 // 000000000010: C00E0100 00000000
  v_lshlrev_b32  v4, 4, v0                              // 000000000018: 24080084
  s_waitcnt     lgkmcnt(0)                              // 00000000001C: BF8C007F
  buffer_load_format_x  v5, v0, s[4:7], 0 idxen         // 000000000020: E0002000 80010500
  buffer_load_dwordx4  v[0:3], v4, s[8:11], 0 offen     // 000000000028: E05C1000 80020004
  s_waitcnt     vmcnt(1)                                // 000000000030: BF8C0F71
  v_mov_b32     v13, v5                                 // 000000000034: 7E1A0305
  s_orn2_saveexec_b64  s[0:1], 0                        // 000000000038: BE802480
  v_mov_b32     v13, 0                                  // 00000000003C: 7E1A0280
  s_nand_b64    exec, 0, 0                              // 000000000040: 8BFE8080
  s_nop         0x0000                                  // 000000000044: BF800000
  v_or_b32      v13, v13, v13 row_shr:1                 // 000000000048: 281A1AFA FF01110D
  s_nop         0x0001                                  // 000000000050: BF800001
  v_or_b32      v13, v13, v13 row_shr:2                 // 000000000054: 281A1AFA FF01120D
  s_nop         0x0001                                  // 00000000005C: BF800001
  v_or_b32      v13, v13, v13 row_shr:4                 // 000000000060: 281A1AFA FF01140D
  s_nop         0x0001                                  // 000000000068: BF800001
  v_or_b32      v13, v13, v13 row_shr:8                 // 00000000006C: 281A1AFA FF01180D
  s_nop         0x0001                                  // 000000000074: BF800001
  v_or_b32      v13, v13, v13 row_bcast:15 row_mask:0xa // 000000000078: 281A1AFA AF01420D
  s_nop         0x0001                                  // 000000000080: BF800001
  v_or_b32      v13, v13, v13 row_bcast:31 row_mask:0xc // 000000000084: 281A1AFA CF01430D
  s_mov_b64     exec, s[0:1]                            // 00000000008C: BEFE0100
  v_readlane_b32  s0, v13, 63                           // 000000000090: D2890000 00017F0D
  s_nop         0x0000                                  // 000000000098: BF800000
  s_nop         0x0000                                  // 00000000009C: BF800000
label_0028:
  s_cmp_eq_i32  s0, 0                                   // 0000000000A0: BF008000
  s_cbranch_scc1  label_0052                            // 0000000000A4: BF850028
  s_and_b32     s1, s3, 0x0000ffff                      // 0000000000A8: 8601FF03 0000FFFF
  s_ff1_i32_b32  s4, s0                                 // 0000000000B0: BE841000
  s_andn2_b32   s1, s1, 0x3fff0000                      // 0000000000B4: 8901FF01 3FFF0000
  s_mul_i32     s5, s4, 48                              // 0000000000BC: 9205B004
  s_mov_b32     s12, s2                                 // 0000000000C0: BE8C0002
  s_mov_b32     s13, s1                                 // 0000000000C4: BE8D0001
  s_movk_i32    s14, 0xffff                             // 0000000000C8: B00EFFFF
  s_mov_b32     s15, 0x00024fac                         // 0000000000CC: BE8F00FF 00024FAC
  s_buffer_load_dwordx8  s[16:23], s[12:15], s5         // 0000000000D4: C02C0406 00000005
  s_add_u32     s1, s5, 32                              // 0000000000DC: 8001A005
  s_buffer_load_dwordx4  s[12:15], s[12:15], s1         // 0000000000E0: C0280306 00000001
  s_lshl_b32    s1, 1, s4                               // 0000000000E8: 8E010481
  s_xor_b32     s0, s0, s1                              // 0000000000EC: 88000100
  s_waitcnt     vmcnt(0) & lgkmcnt(0)                   // 0000000000F0: BF8C0070
  v_mul_f32     v5, s16, v0                             // 0000000000F4: 0A0A0010
  v_mul_f32     v6, s17, v1                             // 0000000000F8: 0A0C0211
  v_mul_f32     v7, s18, v2                             // 0000000000FC: 0A0E0412
  v_mul_f32     v8, s19, v3                             // 000000000100: 0A100613
  v_subrev_f32  v9, s20, v0                             // 000000000104: 06120014
  v_subrev_f32  v10, s21, v1                            // 000000000108: 06140215
  v_subrev_f32  v11, s22, v2                            // 00000000010C: 06160416
  v_subrev_f32  v12, s23, v3                            // 000000000110: 06180617
  v_mul_f32     v5, v5, v9                              // 000000000114: 0A0A1305
  v_mul_f32     v6, v6, v10                             // 000000000118: 0A0C1506
  v_mul_f32     v7, v7, v11                             // 00000000011C: 0A0E1707
  v_mul_f32     v8, v8, v12                             // 000000000120: 0A101908
  v_add_f32     v9, s12, v0                             // 000000000124: 0212000C
  v_add_f32     v10, s13, v1                            // 000000000128: 0214020D
  v_add_f32     v11, s14, v2                            // 00000000012C: 0216040E
  v_add_f32     v12, s15, v3                            // 000000000130: 0218060F
  v_mac_f32     v0, v5, v9                              // 000000000134: 2C001305
  v_mac_f32     v1, v6, v10                             // 000000000138: 2C021506
  v_mac_f32     v2, v7, v11                             // 00000000013C: 2C041707
  v_mac_f32     v3, v8, v12                             // 000000000140: 2C061908
  s_branch      label_0028                              // 000000000144: BF82FFD6
label_0052:
  s_waitcnt     vmcnt(0)                                // 000000000148: BF8C0F70
  buffer_store_dwordx4  v[0:3], v4, s[8:11], 0 offen glc // 00000000014C: E07C5000 80020004
  s_endpgm                                              // 000000000154: BF810000
end

; ----------------- CS Data ------------------------

codeLenInByte        = 344 bytes;
pm4CrcCodeLength     = 344 bytes;
pm4Crc               = babbcfbd424767d0

; launchModeFlags    = 0x00000400
externalLogicalBinding = TRUE;

userElementCount     = 0;
extUserElementCount  = 0;
NumVgprs             = 14;
NumSgprs             = 26;
FloatMode            = 192;
IeeeMode             = 0;
FlatPtr32            = 0;
ScratchSize          = 0 dwords/thread;
LDSByteSize          = 0 bytes/workgroup (compile time only);
ScratchWaveOffsetReg = s65535;

; Optimization Report
Intent           = SI_OPT_INTENT_UNSET

; Shader Stats
NumInst         = 69
uNumVALUInst    = 31
uNumVMemInst    = 3
uNumDSInst      = 0
uNumExportInst  = 0
uNumCFlowInst   = 16
uNumBranchInst   = 3
uNumSALUInst    = 16
uNumSMemInst    = 3
uNumAtomicOper    = 0
uNumLocalAtomicOper    = 0
uNumGlobalAtomicOper    = 0
uNumLocalLoads    = 0
uNumGlobalLoads    = 5
uNumLocalStores    = 0
uNumGlobalStores    = 1
uNumWait    = 4
uNumTexReads    = 0
uNumTexWrites    = 0
uNumF16Inst    = 0
uNumF32Inst    = 20
uNumFpInst    = 20
uNumInt16Inst    = 0
uNumInt32Inst    = 7
uNumIntInst    = 8
uNumCompilerSpills   = 0
uNumCompilerVectorSpills    = 0
uNumCompilerScalarSpills    = 0
uNumSMovInst    = 6
uNumVMovInst    = 2
uNumMovInst    = 8
MemSizePerThread (dwords/thread)    =0
; Scheduler stats
Highest schedule scores for top 3 basic blocks: 
                = 85.20 52.80 8.00 

;COMPUTE_PGM_RSRC2       = 0x0000008A
COMPUTE_PGM_RSRC2:USER_SGPR      = 5
COMPUTE_PGM_RSRC2:TGID_X_EN      = 1
NumThreadX                       = 64
NumThreadY                       = 1
NumThreadZ                       = 1
=== STATISTICS ===

VGPRs: 14
SGPRs: 26
Scratch usage (bytes): 0
LDS usage (bytes): 0

For a more complex example, I tried added subgroupOr() to my clustered shader: https://github.com/Themaister/Granite/blob/master/assets/shaders/lights/clusterer.h#L105

We have a similar difference where AMDVLK will ping-pong between SGPR and VGPR in the loop, while AMD Windows stays in SGPR.

AMDVLK:

Highlight:

...
    v_mov_b32_dpp v16, v16  row_shr:1 row_mask:0xf bank_mask:0xf        ; 7E2002FA FF011110
    v_or_b32_e32 v16, v16, v45                                          ; 28205B10
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:2 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011211
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:4 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011411
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:8 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011811
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_bcast:15 row_mask:0xa bank_mask:0xf     ; 7E2202FA AF014211
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_bcast:31 row_mask:0xc bank_mask:0xf     ; 7E2202FA CF014311
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    s_mov_b64 exec, s[12:13]                                            ; BEFE010C
    v_readlane_b32 s3, v16, 63                                          ; D2890003 00017F10
    v_mov_b32_e32 v16, s3                                               ; 7E200203
    v_readfirstlane_b32 s3, v16                                         ; 7E060510
    s_cmp_eq_u32 s3, 0                                                  ; BF068003
    v_mov_b32_e32 v16, 0                                                ; 7E200280
    v_mov_b32_e32 v17, 0                                                ; 7E220280
    s_cbranch_scc1 BB1_13                                               ; BF850000
    s_load_dwordx8 s[20:27], s[0:1], 0xc0                               ; C00E0500 000000C0
    s_load_dwordx4 s[12:15], s[0:1], 0xe0                               ; C00A0300 000000E0
    v_mov_b32_e32 v23, s3                                               ; 7E2E0203
    v_mov_b32_e32 v22, 0                                                ; 7E2C0280
    s_movk_i32 s3, 0x50                                                 ; B0030050
    s_movk_i32 s16, 0x58                                                ; B0100058
    s_movk_i32 s17, 0x60                                                ; B0110060
    s_movk_i32 s28, 0x68                                                ; B01C0068
    s_mov_b32 s29, 0x3faaaaab                                           ; BE9D00FF 3FAAAAAB
    s_mov_b32 s30, 0x411ffffe                                           ; BE9E00FF 411FFFFE
    s_mov_b32 s31, 0x38d1b717                                           ; BE9F00FF 38D1B717
    s_mov_b32 s32, 0x3ea2f9e9                                           ; BEA000FF 3EA2F9E9
    v_mov_b32_e32 v24, 0                                                ; 7E300280
    v_mov_b32_e32 v25, 0                                                ; 7E320280
BB1_10:
    v_ffbl_b32_e32 v45, v23                                             ; 7E5A5D17
    v_lshlrev_b32_e32 v27, 6, v45                                       ; 24365A86
    v_add_u32_e32 v28, vcc, 0xc40, v27                                  ; 323836FF 00000C40
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    v_add_u32_e32 v28, vcc, 0xc50, v27                                  ; 323836FF 00000C50
    s_buffer_load_dwordx4 s[36:39], s[8:11], s33                        ; C0280904 00000021
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    s_buffer_load_dwordx4 s[40:43], s[8:11], s33                        ; C0280A04 00000021
    v_add_u32_e32 v28, vcc, 0xc60, v27                                  ; 323836FF 00000C60
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    v_add_u32_e32 v27, vcc, 0xc70, v27                                  ; 323636FF 00000C70
    s_buffer_load_dwordx4 s[44:47], s[8:11], s33                        ; C0280B04 00000021
    v_readfirstlane_b32 s33, v27                                        ; 7E42051B
    s_buffer_load_dwordx4 s[48:51], s[8:11], s33                        ; C0280C04 00000021
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
...
_amdgpu_ps_main:
BB1_0:
    s_mov_b64 s[18:19], exec                                            ; BE92017E
    s_wqm_b64 exec, exec                                                ; BEFE077E
    s_getpc_b64 s[0:1]                                                  ; BE801C00
    s_mov_b32 s36, s15                                                  ; BEA4000F
    s_mov_b32 s37, s1                                                   ; BEA50001
    s_mov_b32 s21, s3                                                   ; BE950003
    s_mov_b32 s20, s2                                                   ; BE940002
    s_load_dwordx4 s[32:35], s[36:37], 0x34                             ; C00A0812 00000034
    s_load_dwordx2 s[2:3], s[36:37], 0x44                               ; C0060092 00000044
    s_mov_b32 s61, s1                                                   ; BEBD0001
    s_mov_b32 s29, s11                                                  ; BE9D000B
    s_mov_b32 s28, s10                                                  ; BE9C000A
    s_mov_b32 s27, s9                                                   ; BE9B0009
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    s_mov_b32 s60, s3                                                   ; BEBC0003
    s_mov_b32 s26, s8                                                   ; BE9A0008
    s_mov_b32 s25, s7                                                   ; BE990007
    s_mov_b32 s24, s6                                                   ; BE980006
    s_mov_b32 s23, s5                                                   ; BE970005
    s_mov_b32 s22, s4                                                   ; BE960004
    s_load_dwordx8 s[4:11], s[60:61], 0x0                               ; C00E011E 00000000
    s_load_dwordx4 s[52:55], s[60:61], 0x20                             ; C00A0D1E 00000020
    s_load_dwordx8 s[36:43], s[60:61], 0x30                             ; C00E091E 00000030
    s_load_dwordx4 s[56:59], s[60:61], 0x50                             ; C00A0E1E 00000050
    s_load_dwordx8 s[44:51], s[60:61], 0x60                             ; C00E0B1E 00000060
    s_load_dwordx4 s[60:63], s[60:61], 0x80                             ; C00A0F1E 00000080
    s_mov_b32 m0, s16                                                   ; BEFC0010
    v_interp_p1_f32_e32 v4, v0, attr1.x                                 ; D4100400
    v_interp_p1_f32_e32 v5, v0, attr1.y                                 ; D4140500
    v_interp_p2_f32_e32 v4, v1, attr1.x                                 ; D4110401
    v_interp_p2_f32_e32 v5, v1, attr1.y                                 ; D4150501
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    image_sample v[9:12], v[4:5], s[4:11], s[52:55] dmask:0xf           ; F0800F00 01A10904
    image_sample v[18:19], v[4:5], s[36:43], s[56:59] dmask:0x3         ; F0800300 01C91204
    image_sample v[14:17], v[4:5], s[44:51], s[60:63] dmask:0xf         ; F0800F00 01EB0E04
    s_mov_b32 s3, 0xffff                                                ; BE8300FF 0000FFFF
    s_and_b32 s5, s33, s3                                               ; 86050321
    s_mov_b32 s7, 0x24fac                                               ; BE8700FF 00024FAC
    s_mov_b32 s6, -1                                                    ; BE8600C1
    s_mov_b32 s4, s32                                                   ; BE840020
    s_mov_b64 s[10:11], s[6:7]                                          ; BE8A0106
    s_mov_b32 s0, s2                                                    ; BE800002
    s_mov_b64 s[8:9], s[4:5]                                            ; BE880104
    s_and_b32 s2, s14, s3                                               ; 8602030E
    s_mov_b32 s8, s13                                                   ; BE88000D
    s_mov_b32 s9, s2                                                    ; BE890002
    s_mov_b32 s30, s12                                                  ; BE9E000C
    s_buffer_load_dwordx2 s[2:3], s[8:11], 0x200                        ; C0260084 00000200
    s_buffer_load_dword s12, s[8:11], 0x208                             ; C0220304 00000208
    s_buffer_load_dwordx2 s[14:15], s[8:11], 0x210                      ; C0260384 00000210
    s_buffer_load_dword s8, s[8:11], 0x218                              ; C0220204 00000218
    v_interp_p1_f32_e32 v4, v0, attr0.y                                 ; D4100100
    v_interp_p1_f32_e32 v5, v0, attr0.x                                 ; D4140000
    v_interp_p2_f32_e32 v4, v1, attr0.y                                 ; D4110101
    s_buffer_load_dword s9, s[4:7], 0xb0                                ; C0220242 000000B0
    v_interp_p1_f32_e32 v7, v0, attr0.z                                 ; D41C0200
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_subrev_f32_e32 v8, s3, v4                                         ; 06100803
    v_interp_p2_f32_e32 v5, v1, attr0.x                                 ; D4150001
    v_interp_p2_f32_e32 v7, v1, attr0.z                                 ; D41D0201
    v_subrev_f32_e32 v13, s2, v5                                        ; 061A0A02
    v_mul_f32_e32 v8, s15, v8                                           ; 0A10100F
    v_mac_f32_e32 v8, s14, v13                                          ; 2C101A0E
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_subrev_f32_e32 v14, s12, v7                                       ; 061C0E0C
    v_mac_f32_e32 v8, s8, v14                                           ; 2C101C08
    v_mul_f32_e32 v14, s9, v8                                           ; 0A1C1009
    v_cmp_gt_f32_e32 vcc, 1.0, v14                                      ; 7C881CF2
    v_mov_b32_e32 v13, 0                                                ; 7E1A0280
    s_and_saveexec_b64 s[8:9], vcc                                      ; BE88206A
    s_cbranch_execz BB1_3                                               ; BF880000
BB1_1:
    s_buffer_load_dwordx16 s[36:51], s[4:7], 0x30                       ; C0320902 00000030
    s_load_dwordx8 s[52:59], s[0:1], 0x30                               ; C00E0D00 00000030
    s_load_dwordx4 s[60:63], s[0:1], 0x50                               ; C00A0F00 00000050
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v13, s43, v4                                          ; 0A1A082B
    v_mac_f32_e32 v13, s39, v5                                          ; 2C1A0A27
    v_mac_f32_e32 v13, s47, v7                                          ; 2C1A0E2F
    v_add_f32_e32 v13, s51, v13                                         ; 021A1A33
    v_mul_f32_e32 v17, s41, v4                                          ; 0A220829
    v_mul_f32_e32 v20, s40, v4                                          ; 0A280828
    v_rcp_f32_e32 v13, v13                                              ; 7E1A450D
    v_mac_f32_e32 v17, s37, v5                                          ; 2C220A25
    v_mac_f32_e32 v20, s36, v5                                          ; 2C280A24
    v_mac_f32_e32 v17, s45, v7                                          ; 2C220E2D
    v_mac_f32_e32 v20, s44, v7                                          ; 2C280E2C
    v_add_f32_e32 v20, s48, v20                                         ; 02282830
    v_add_f32_e32 v17, s49, v17                                         ; 02222231
    v_mul_f32_e32 v21, v17, v13                                         ; 0A2A1B11
    v_mul_f32_e32 v20, v20, v13                                         ; 0A281B14
    image_sample_lz v[20:23], v[20:21], s[52:59], s[60:63] dmask:0xf    ; F09C0F00 01ED1414
    v_mul_f32_e32 v17, s42, v4                                          ; 0A22082A
    v_mac_f32_e32 v17, s38, v5                                          ; 2C220A26
    v_mac_f32_e32 v17, s46, v7                                          ; 2C220E2E
    v_add_f32_e32 v17, s50, v17                                         ; 02222232
    v_mul_f32_e32 v17, v17, v13                                         ; 0A221B11
    v_mov_b32_e32 v13, 1.0                                              ; 7E1A02F2
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_cmp_gt_f32_e32 vcc, v17, v20                                      ; 7C882911
    s_and_b64 exec, exec, vcc                                           ; 86FE6A7E
BB1_2:
    v_mad_f32 v13, -v20, v20, v21                                       ; D1C1000D 24562914
    v_max_f32_e32 v21, 0x3727c5ac, v13                                  ; 162A1AFF 3727C5AC
    v_sub_f32_e32 v13, v17, v20                                         ; 041A2911
    v_mad_f32 v13, v13, v13, v21                                        ; D1C1000D 04561B0D
    v_rcp_f32_e32 v17, v13                                              ; 7E22450D
    v_mov_b32_e32 v13, 0xbeaaaaab                                       ; 7E1A02FF BEAAAAAB
    s_mov_b32 s10, 0x3faaaaab                                           ; BE8A00FF 3FAAAAAB
    v_mul_f32_e32 v17, v21, v17                                         ; 0A222315
    v_mac_f32_e64 v13, v17, s10 clamp                                   ; D116800D 00001511
BB1_3:
    s_or_b64 exec, exec, s[8:9]                                         ; 87FE087E
    s_mov_b32 s8, 0x3f400000                                            ; BE8800FF 3F400000
    v_cmp_lt_f32_e32 vcc, s8, v14                                       ; 7C821C08
    v_mov_b32_e32 v47, 0                                                ; 7E5E0280
    s_and_saveexec_b64 s[8:9], vcc                                      ; BE88206A
    s_cbranch_execz BB1_6                                               ; BF880000
BB1_4:
    s_buffer_load_dwordx16 s[36:51], s[4:7], 0x70                       ; C0320902 00000070
    s_load_dwordx8 s[52:59], s[0:1], 0x0                                ; C00E0D00 00000000
    s_load_dwordx4 s[60:63], s[0:1], 0x20                               ; C00A0F00 00000020
    v_mov_b32_e32 v47, 1.0                                              ; 7E5E02F2
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v21, s43, v4                                          ; 0A2A082B
    v_mac_f32_e32 v21, s39, v5                                          ; 2C2A0A27
    v_mac_f32_e32 v21, s47, v7                                          ; 2C2A0E2F
    v_add_f32_e32 v21, s51, v21                                         ; 022A2A33
    v_mul_f32_e32 v17, s40, v4                                          ; 0A220828
    v_mul_f32_e32 v20, s41, v4                                          ; 0A280829
    v_rcp_f32_e32 v24, v21                                              ; 7E304515
    v_mac_f32_e32 v17, s36, v5                                          ; 2C220A24
    v_mac_f32_e32 v20, s37, v5                                          ; 2C280A25
    v_mac_f32_e32 v17, s44, v7                                          ; 2C220E2C
    v_mac_f32_e32 v20, s45, v7                                          ; 2C280E2D
    v_add_f32_e32 v21, s49, v20                                         ; 022A2831
    v_add_f32_e32 v17, s48, v17                                         ; 02222230
    v_mul_f32_e32 v20, v17, v24                                         ; 0A283111
    v_mul_f32_e32 v21, v21, v24                                         ; 0A2A3115
    image_sample_lz v[20:23], v[20:21], s[52:59], s[60:63] dmask:0xf    ; F09C0F00 01ED1414
    v_mul_f32_e32 v17, s42, v4                                          ; 0A22082A
    v_mac_f32_e32 v17, s38, v5                                          ; 2C220A26
    v_mac_f32_e32 v17, s46, v7                                          ; 2C220E2E
    v_add_f32_e32 v17, s50, v17                                         ; 02222232
    v_mul_f32_e32 v17, v17, v24                                         ; 0A223111
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_cmp_gt_f32_e32 vcc, v17, v20                                      ; 7C882911
    s_and_b64 exec, exec, vcc                                           ; 86FE6A7E
BB1_5:
    v_mad_f32 v21, -v20, v20, v21                                       ; D1C10015 24562914
    v_max_f32_e32 v21, 0x3727c5ac, v21                                  ; 162A2AFF 3727C5AC
    v_sub_f32_e32 v17, v17, v20                                         ; 04222911
    v_mad_f32 v17, v17, v17, v21                                        ; D1C10011 04562311
    v_rcp_f32_e32 v17, v17                                              ; 7E224511
    v_mov_b32_e32 v47, 0xbeaaaaab                                       ; 7E5E02FF BEAAAAAB
    s_mov_b32 s10, 0x3faaaaab                                           ; BE8A00FF 3FAAAAAB
    v_mul_f32_e32 v17, v21, v17                                         ; 0A222315
    v_mac_f32_e64 v47, v17, s10 clamp                                   ; D116802F 00001511
BB1_6:
    s_or_b64 exec, exec, s[8:9]                                         ; 87FE087E
    s_and_b32 s9, s35, 0xffff                                           ; 8609FF23 0000FFFF
    s_mov_b32 s11, 0x24fac                                              ; BE8B00FF 00024FAC
    s_mov_b32 s10, -1                                                   ; BE8A00C1
    s_mov_b32 s8, s34                                                   ; BE880022
    v_mov_b32_e32 v45, 0                                                ; 7E5A0280
    v_mov_b32_e32 v46, 0                                                ; 7E5C0280
    s_and_saveexec_b64 s[14:15], s[18:19]                               ; BE8E2012
    s_cbranch_execz BB1_8                                               ; BF880000
BB1_7:
    s_buffer_load_dwordx16 s[32:47], s[8:11], 0x0                       ; C0320804 00000000
    v_mov_b32_e32 v17, 0x40e00000                                       ; 7E2202FF 40E00000
    s_mov_b32 s13, 0x3a83126f                                           ; BE8D00FF 3A83126F
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v20, s38, v4                                          ; 0A280826
    v_mac_f32_e32 v20, s34, v5                                          ; 2C280A22
    v_mac_f32_e32 v20, s42, v7                                          ; 2C280E2A
    v_add_f32_e32 v23, s46, v20                                         ; 022E282E
    v_max_f32_e32 v20, 0x38d1b717, v23                                  ; 16282EFF 38D1B717
    v_log_f32_e32 v20, v20                                              ; 7E284314
    v_mul_f32_e32 v21, s37, v4                                          ; 0A2A0825
    v_mul_f32_e32 v22, s36, v4                                          ; 0A2C0824
    v_mac_f32_e32 v21, s33, v5                                          ; 2C2A0A21
    v_ceil_f32_e32 v20, v20                                             ; 7E283B14
    v_med3_f32 v17, v20, -1.0, v17                                      ; D1D60011 0445E714
    v_min_f32_e64 v20, -v17, 0                                          ; D10A0014 20010111
    v_exp_f32_e32 v24, v20                                              ; 7E304114
    v_mac_f32_e32 v22, s32, v5                                          ; 2C2C0A20
    v_mac_f32_e32 v21, s41, v7                                          ; 2C2A0E29
    v_mac_f32_e32 v22, s40, v7                                          ; 2C2C0E28
    v_add_f32_e32 v20, s45, v21                                         ; 02282A2D
    v_add_f32_e32 v21, s44, v22                                         ; 022A2C2C
    v_mul_f32_e32 v20, v20, v24                                         ; 0A283114
    v_mul_f32_e32 v22, v21, v24                                         ; 0A2C3115
    v_mad_f32 v21, v20, 0.5, 0.5                                        ; D1C10015 03C1E114
    v_mad_f32 v20, v22, 0.5, 0.5                                        ; D1C10014 03C1E116
    v_mul_f32_e32 v22, v23, v24                                         ; 0A2C3117
    v_max_f32_e64 v23, -v17, 0                                          ; D10B0017 20010111
    v_add_f32_e32 v23, -1.0, v23                                        ; 022E2EF3
    s_load_dwordx8 s[32:39], s[0:1], 0x90                               ; C00E0800 00000090
    s_load_dwordx4 s[40:43], s[0:1], 0xb0                               ; C00A0A00 000000B0
    v_mac_f32_e32 v23, 2.0, v22                                         ; 2C2E2CF4
    v_mov_b32_e32 v22, 0x3f7fbe77                                       ; 7E2C02FF 3F7FBE77
    v_med3_f32 v22, v23, s13, v22                                       ; D1D60016 04581B17
    v_add_f32_e32 v17, 1.0, v17                                         ; 022222F2
    v_add_f32_e32 v17, v17, v22                                         ; 02222D11
    v_mul_f32_e32 v22, 0x3de38e39, v17                                  ; 0A2C22FF 3DE38E39
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    image_sample_lz v[45:46], v[20:23], s[32:39], s[40:43] dmask:0x3    ; F09C0300 01482D14
BB1_8:
    s_or_b64 exec, exec, s[14:15]                                       ; 87FE0E7E
    s_mov_b32 m0, s16                                                   ; BEFC0010
    v_interp_p1_f32_e32 v20, v0, attr3.y                                ; D4500D00
    v_interp_p1_f32_e32 v24, v0, attr2.y                                ; D4600900
    v_interp_p1_f32_e32 v17, v0, attr3.x                                ; D4440C00
    v_interp_p1_f32_e32 v21, v0, attr3.z                                ; D4540E00
    v_interp_p1_f32_e32 v22, v0, attr3.w                                ; D4580F00
    v_interp_p1_f32_e32 v23, v0, attr2.x                                ; D45C0800
    v_interp_p1_f32_e32 v0, v0, attr2.z                                 ; D4000A00
    v_interp_p2_f32_e32 v20, v1, attr3.y                                ; D4510D01
    v_interp_p2_f32_e32 v24, v1, attr2.y                                ; D4610901
    v_interp_p2_f32_e32 v17, v1, attr3.x                                ; D4450C01
    v_mul_f32_e32 v25, v20, v20                                         ; 0A322914
    v_interp_p2_f32_e32 v21, v1, attr3.z                                ; D4550E01
    v_interp_p2_f32_e32 v22, v1, attr3.w                                ; D4590F01
    v_interp_p2_f32_e32 v23, v1, attr2.x                                ; D45D0801
    v_interp_p2_f32_e32 v0, v1, attr2.z                                 ; D4010A01
    v_mul_f32_e32 v1, v24, v24                                          ; 0A023118
    v_mac_f32_e32 v1, v23, v23                                          ; 2C022F17
    v_mac_f32_e32 v25, v17, v17                                         ; 2C322311
    v_mac_f32_e32 v1, v0, v0                                            ; 2C020100
    v_mac_f32_e32 v25, v21, v21                                         ; 2C322B15
    v_rsq_f32_e32 v1, v1                                                ; 7E024901
    v_rsq_f32_e32 v25, v25                                              ; 7E324919
    v_mad_f32 v18, v18, 2.0, -1.0                                       ; D1C10012 03CDE912
    v_mad_f32 v19, v19, 2.0, -1.0                                       ; D1C10013 03CDE913
    v_mul_legacy_f32_e32 v23, v23, v1                                   ; 082E0317
    v_mul_legacy_f32_e32 v24, v24, v1                                   ; 08300318
    v_mul_legacy_f32_e32 v0, v0, v1                                     ; 08000300
    v_mul_legacy_f32_e32 v1, v17, v25                                   ; 08023311
    v_mul_legacy_f32_e32 v17, v20, v25                                  ; 08223314
    v_mul_legacy_f32_e32 v20, v21, v25                                  ; 08283315
    v_mul_f32_e32 v21, v17, v0                                          ; 0A2A0111
    v_mul_f32_e32 v25, v20, v23                                         ; 0A322F14
    v_mul_f32_e32 v26, v1, v24                                          ; 0A343101
    v_mad_f32 v21, v24, v20, -v21                                       ; D1C10015 84562918
    v_mad_f32 v25, v0, v1, -v25                                         ; D1C10019 84660300
    v_mad_f32 v26, v23, v17, -v26                                       ; D1C1001A 846A2317
    v_mul_f32_e32 v21, v22, v21                                         ; 0A2A2B16
    v_mul_f32_e32 v25, v22, v25                                         ; 0A323316
    v_mul_f32_e32 v22, v22, v26                                         ; 0A2C3516
    v_mul_f32_e32 v26, s30, v18                                         ; 0A34241E
    v_mul_f32_e32 v1, v1, v26                                           ; 0A023501
    v_mul_f32_e32 v17, v17, v26                                         ; 0A223511
    v_mul_f32_e32 v20, v20, v26                                         ; 0A283514
    v_mul_f32_e32 v26, s30, v19                                         ; 0A34261E
    v_mad_f32 v19, -v19, v19, 1.0                                       ; D1C10013 23CA2713
    v_mad_f32 v18, -v18, v18, v19                                       ; D1C10012 244E2512
    v_max_f32_e32 v18, 0, v18                                           ; 16242480
    v_sqrt_f32_e32 v18, v18                                             ; 7E244F12
    v_mac_f32_e32 v17, v25, v26                                         ; 2C223519
    v_mac_f32_e32 v1, v21, v26                                          ; 2C023515
    v_mac_f32_e32 v20, v22, v26                                         ; 2C283516
    v_mac_f32_e32 v17, v24, v18                                         ; 2C222518
    v_mac_f32_e32 v20, v0, v18                                          ; 2C282500
    v_mac_f32_e32 v1, v23, v18                                          ; 2C022517
    v_mul_f32_e32 v0, v17, v17                                          ; 0A002311
    v_mac_f32_e32 v0, v1, v1                                            ; 2C000301
    v_mac_f32_e32 v0, v20, v20                                          ; 2C002914
    v_rsq_f32_e32 v0, v0                                                ; 7E004900
    v_bfrev_b32_e32 v19, 1                                              ; 7E265881
    v_sub_f32_e32 v21, s3, v4                                           ; 042A0803
    v_cmp_eq_u32_e32 vcc, 0, v6                                         ; 7D940C80
    v_mul_legacy_f32_e32 v1, v1, v0                                     ; 08020101
    v_mul_legacy_f32_e32 v17, v17, v0                                   ; 08220111
    v_mul_legacy_f32_e32 v18, v20, v0                                   ; 08240114
    v_xor_b32_e32 v0, v1, v19                                           ; 2A002701
    v_xor_b32_e32 v20, v17, v19                                         ; 2A282711
    v_cndmask_b32_e32 v0, v1, v0, vcc                                   ; 00000101
    v_cndmask_b32_e32 v1, v17, v20, vcc                                 ; 00022911
    v_sub_f32_e32 v20, s2, v5                                           ; 04280A02
    v_mul_f32_e32 v6, v21, v21                                          ; 0A0C2B15
    v_sub_f32_e32 v22, s12, v7                                          ; 042C0E0C
    v_mac_f32_e32 v6, v20, v20                                          ; 2C0C2914
    v_mac_f32_e32 v6, v22, v22                                          ; 2C0C2D16
    v_rsq_f32_e32 v23, v6                                               ; 7E2E4906
    v_mul_f32_e32 v17, s29, v16                                         ; 0A22201D
    v_mad_f32 v6, -s29, v16, 1.0                                        ; D1C10006 23CA201D
    v_xor_b32_e32 v16, v18, v19                                         ; 2A202712
    v_cndmask_b32_e32 v31, v18, v16, vcc                                ; 003E2112
    v_mov_b32_e32 v16, 0xbd23d70a                                       ; 7E2002FF BD23D70A
    v_mad_f32 v18, s20, v9, v16                                         ; D1C10012 04421214
    v_mad_f32 v19, s21, v10, v16                                        ; D1C10013 04421415
    v_mac_f32_e32 v16, s22, v11                                         ; 2C201616
    v_mov_b32_e32 v35, 0x3d23d70a                                       ; 7E4602FF 3D23D70A
    v_mac_f32_e32 v35, v16, v17                                         ; 2C462310
    v_mul_f32_e32 v15, s28, v15                                         ; 0A1E1E1C
    v_mul_legacy_f32_e32 v33, v21, v23                                  ; 08422F15
    v_madak_f32 v36, v18, v17, 0x3d23d70a                               ; 30482312 3D23D70A
    v_madak_f32 v37, v19, v17, 0x3d23d70a                               ; 304A2313 3D23D70A
    s_mov_b32 s2, 0x3f400000                                            ; BE8200FF 3F400000
    v_mov_b32_e32 v16, 0x3e800000                                       ; 7E2002FF 3E800000
    v_mov_b32_e32 v17, 0x3fa00000                                       ; 7E2202FF 3FA00000
    v_mac_f32_e32 v16, s2, v15                                          ; 2C201E02
    v_mac_f32_e32 v17, s2, v15                                          ; 2C221E02
    v_mul_legacy_f32_e32 v32, v20, v23                                  ; 08402F14
    v_mul_f32_e32 v15, v33, v1                                          ; 0A1E0321
    v_mul_f32_e32 v16, v16, v16                                         ; 0A202110
    v_mul_legacy_f32_e32 v34, v22, v23                                  ; 08442F16
    v_mac_f32_e32 v15, v32, v0                                          ; 2C1E0120
    v_mul_f32_e32 v38, v16, v16                                         ; 0A4C2110
    v_mac_f32_e32 v15, v34, v31                                         ; 2C1E3F22
    s_mov_b32 s2, 0x3a83126f                                            ; BE8200FF 3A83126F
    v_mul_f32_e32 v16, v17, v17                                         ; 0A202311
    s_mov_b32 s3, 0x3e000000                                            ; BE8300FF 3E000000
    v_med3_f32 v15, v15, s2, 1.0                                        ; D1D6000F 03C8050F
    v_mul_f32_e32 v39, s3, v16                                          ; 0A4E2003
    v_mad_f32 v40, -v16, s3, 1.0                                        ; D1C10028 23C80710
    v_mad_f32 v41, v40, v15, v39                                        ; D1C10029 049E1F28
    s_not_b64 exec, exec                                                ; BEFE057E
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_mov_b32_e32 v45, 0                                                ; 7E5A0280
    s_not_b64 exec, exec                                                ; BEFE057E
    v_mov_b32_e32 v15, s20                                              ; 7E1E0214
    v_mul_f32_e32 v42, s20, v9                                          ; 0A541214
    v_mul_f32_e32 v10, s21, v10                                         ; 0A141415
    v_mul_f32_e32 v11, s22, v11                                         ; 0A161616
    v_mov_b32_e32 v9, 0                                                 ; 7E120280
    v_sub_f32_e32 v43, 1.0, v36                                         ; 045648F2
    v_sub_f32_e32 v44, 1.0, v37                                         ; 04584AF2
    v_mov_b32_e32 v16, s21                                              ; 7E200215
    v_mov_b32_e32 v17, s22                                              ; 7E220216
    v_mov_b32_e32 v18, s23                                              ; 7E240217
    v_mov_b32_e32 v19, s24                                              ; 7E260218
    v_mov_b32_e32 v20, s25                                              ; 7E280219
    v_mov_b32_e32 v21, s26                                              ; 7E2A021A
    v_mov_b32_e32 v22, s27                                              ; 7E2C021B
    v_mov_b32_e32 v23, s28                                              ; 7E2E021C
    v_mov_b32_e32 v24, s29                                              ; 7E30021D
    v_mov_b32_e32 v25, s30                                              ; 7E32021E
    v_mov_b32_e32 v26, s31                                              ; 7E34021F
    v_mov_b32_e32 v27, s32                                              ; 7E360220
    v_mov_b32_e32 v28, s33                                              ; 7E380221
    v_mov_b32_e32 v29, s34                                              ; 7E3A0222
    v_mov_b32_e32 v30, s35                                              ; 7E3C0223
    v_sub_f32_e32 v15, 1.0, v35                                         ; 041E46F2
    s_or_saveexec_b64 s[12:13], -1                                      ; BE8C21C1
    v_mov_b32_e32 v16, v45                                              ; 7E20032D
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v16, v16  row_shr:1 row_mask:0xf bank_mask:0xf        ; 7E2002FA FF011110
    v_or_b32_e32 v16, v16, v45                                          ; 28205B10
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:2 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011211
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:4 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011411
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_shr:8 row_mask:0xf bank_mask:0xf        ; 7E2202FA FF011811
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_bcast:15 row_mask:0xa bank_mask:0xf     ; 7E2202FA AF014211
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    v_mov_b32_e32 v17, v16                                              ; 7E220310
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v17, v17  row_bcast:31 row_mask:0xc bank_mask:0xf     ; 7E2202FA CF014311
    v_or_b32_e32 v16, v16, v17                                          ; 28202310
    s_mov_b64 exec, s[12:13]                                            ; BEFE010C
    v_readlane_b32 s3, v16, 63                                          ; D2890003 00017F10
    v_mov_b32_e32 v16, s3                                               ; 7E200203
    v_readfirstlane_b32 s3, v16                                         ; 7E060510
    s_cmp_eq_u32 s3, 0                                                  ; BF068003
    v_mov_b32_e32 v16, 0                                                ; 7E200280
    v_mov_b32_e32 v17, 0                                                ; 7E220280
    s_cbranch_scc1 BB1_13                                               ; BF850000
    s_load_dwordx8 s[20:27], s[0:1], 0xc0                               ; C00E0500 000000C0
    s_load_dwordx4 s[12:15], s[0:1], 0xe0                               ; C00A0300 000000E0
    v_mov_b32_e32 v23, s3                                               ; 7E2E0203
    v_mov_b32_e32 v22, 0                                                ; 7E2C0280
    s_movk_i32 s3, 0x50                                                 ; B0030050
    s_movk_i32 s16, 0x58                                                ; B0100058
    s_movk_i32 s17, 0x60                                                ; B0110060
    s_movk_i32 s28, 0x68                                                ; B01C0068
    s_mov_b32 s29, 0x3faaaaab                                           ; BE9D00FF 3FAAAAAB
    s_mov_b32 s30, 0x411ffffe                                           ; BE9E00FF 411FFFFE
    s_mov_b32 s31, 0x38d1b717                                           ; BE9F00FF 38D1B717
    s_mov_b32 s32, 0x3ea2f9e9                                           ; BEA000FF 3EA2F9E9
    v_mov_b32_e32 v24, 0                                                ; 7E300280
    v_mov_b32_e32 v25, 0                                                ; 7E320280
BB1_10:
    v_ffbl_b32_e32 v45, v23                                             ; 7E5A5D17
    v_lshlrev_b32_e32 v27, 6, v45                                       ; 24365A86
    v_add_u32_e32 v28, vcc, 0xc40, v27                                  ; 323836FF 00000C40
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    v_add_u32_e32 v28, vcc, 0xc50, v27                                  ; 323836FF 00000C50
    s_buffer_load_dwordx4 s[36:39], s[8:11], s33                        ; C0280904 00000021
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    s_buffer_load_dwordx4 s[40:43], s[8:11], s33                        ; C0280A04 00000021
    v_add_u32_e32 v28, vcc, 0xc60, v27                                  ; 323836FF 00000C60
    v_readfirstlane_b32 s33, v28                                        ; 7E42051C
    v_add_u32_e32 v27, vcc, 0xc70, v27                                  ; 323636FF 00000C70
    s_buffer_load_dwordx4 s[44:47], s[8:11], s33                        ; C0280B04 00000021
    v_readfirstlane_b32 s33, v27                                        ; 7E42051B
    s_buffer_load_dwordx4 s[48:51], s[8:11], s33                        ; C0280C04 00000021
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v29, s43, v4                                          ; 0A3A082B
    v_mac_f32_e32 v29, s39, v5                                          ; 2C3A0A27
    v_mac_f32_e32 v29, s47, v7                                          ; 2C3A0E2F
    v_mul_f32_e32 v27, s40, v4                                          ; 0A360828
    v_add_f32_e32 v29, s51, v29                                         ; 023A3A33
    v_mul_f32_e32 v28, s41, v4                                          ; 0A380829
    v_rcp_f32_e32 v29, v29                                              ; 7E3A451D
    v_mac_f32_e32 v27, s36, v5                                          ; 2C360A24
    v_mac_f32_e32 v28, s37, v5                                          ; 2C380A25
    v_mac_f32_e32 v27, s44, v7                                          ; 2C360E2C
    v_mac_f32_e32 v28, s45, v7                                          ; 2C380E2D
    v_add_f32_e32 v27, s48, v27                                         ; 02363630
    v_add_f32_e32 v28, s49, v28                                         ; 02383831
    v_mul_f32_e32 v27, v27, v29                                         ; 0A363B1B
    v_mul_f32_e32 v28, v28, v29                                         ; 0A383B1C
    image_sample_lz v[48:51], v[27:28], s[20:27], s[12:15] dmask:0xf    ; F09C0F00 0065301B
    v_mad_u32_u24 v27, v45, 48, s3                                      ; D1C3001B 000D612D
    v_mad_u32_u24 v28, v45, 48, s16                                     ; D1C3001C 0041612D
    v_mad_u32_u24 v29, v45, 48, s17                                     ; D1C3001D 0045612D
    v_readfirstlane_b32 s33, v27                                        ; 7E42051B
    v_mad_u32_u24 v30, v45, 48, s28                                     ; D1C3001E 0071612D
    v_readfirstlane_b32 s34, v28                                        ; 7E44051C
    v_readfirstlane_b32 s35, v29                                        ; 7E46051D
    s_buffer_load_dwordx2 s[38:39], s[8:11], s33                        ; C0240984 00000021
    v_readfirstlane_b32 s37, v30                                        ; 7E4A051E
    s_buffer_load_dword s36, s[8:11], s34                               ; C0200904 00000022
    s_buffer_load_dwordx2 s[34:35], s[8:11], s35                        ; C0240884 00000023
    s_buffer_load_dword s33, s[8:11], s37                               ; C0200844 00000025
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_subrev_f32_e32 v29, s39, v4                                       ; 063A0827
    v_subrev_f32_e32 v28, s38, v5                                       ; 06380A26
    v_subrev_f32_e32 v30, s36, v7                                       ; 063C0E24
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_mul_f32_e32 v50, s35, v29                                         ; 0A643A23
    v_mac_f32_e32 v50, s34, v28                                         ; 2C643822
    v_mac_f32_e32 v50, s33, v30                                         ; 2C643C21
    v_mul_u32_u24_e32 v26, 48, v45                                      ; 10345AB0
    v_mov_b32_e32 v27, 1.0                                              ; 7E3602F2
    v_cmp_gt_f32_e32 vcc, v50, v48                                      ; 7C886132
    s_and_saveexec_b64 s[40:41], vcc                                    ; BEA8206A
BB1_11:
    v_mad_f32 v27, -v48, v48, v49                                       ; D1C1001B 24C66130
    v_max_f32_e32 v27, 0x3727c5ac, v27                                  ; 163636FF 3727C5AC
    v_sub_f32_e32 v48, v50, v48                                         ; 04606132
    v_mad_f32 v48, v48, v48, v27                                        ; D1C10030 046E6130
    v_rcp_f32_e32 v48, v48                                              ; 7E604530
    v_mul_f32_e32 v48, v27, v48                                         ; 0A60611B
    v_mov_b32_e32 v27, 0xbeaaaaab                                       ; 7E3602FF BEAAAAAB
    v_mac_f32_e64 v27, v48, s29 clamp                                   ; D116801B 00003B30
BB1_12:
    s_or_b64 exec, exec, s[40:41]                                       ; 87FE287E
    v_sub_f32_e32 v49, s39, v4                                          ; 04620827
    v_sub_f32_e32 v48, s38, v5                                          ; 04600A26
    v_mul_f32_e32 v50, v49, v49                                         ; 0A646331
    v_mac_f32_e32 v50, v48, v48                                         ; 2C646130
    v_sub_f32_e32 v51, s36, v7                                          ; 04660E24
    v_mac_f32_e32 v50, v51, v51                                         ; 2C646733
    v_rsq_f32_e32 v52, v50                                              ; 7E684932
    v_lshlrev_b32_e64 v45, v45, 1                                       ; D112002D 0001032D
    v_xor_b32_e32 v23, v45, v23                                         ; 2A2E2F2D
    v_mul_legacy_f32_e32 v49, v49, v52                                  ; 08626931
    v_add_f32_e32 v53, v33, v49                                         ; 026A6321
    v_mul_legacy_f32_e32 v48, v48, v52                                  ; 08606930
    v_mul_legacy_f32_e32 v51, v51, v52                                  ; 08666933
    v_add_f32_e32 v52, v32, v48                                         ; 02686120
    v_mul_f32_e32 v54, v53, v53                                         ; 0A6C6B35
    v_mac_f32_e32 v54, v52, v52                                         ; 2C6C6934
    v_add_f32_e32 v55, v34, v51                                         ; 026E6722
    v_mac_f32_e32 v54, v55, v55                                         ; 2C6C6F37
    v_rsq_f32_e32 v54, v54                                              ; 7E6C4936
    v_mul_f32_e32 v49, v1, v49                                          ; 0A626301
    v_mac_f32_e32 v49, v0, v48                                          ; 2C626100
    v_mac_f32_e32 v49, v31, v51                                         ; 2C62671F
    v_mul_legacy_f32_e32 v45, v52, v54                                  ; 085A6D34
    v_mul_legacy_f32_e32 v52, v53, v54                                  ; 08686D35
    v_mul_f32_e32 v48, v33, v52                                         ; 0A606921
    v_mul_legacy_f32_e32 v53, v55, v54                                  ; 086A6D37
    v_mac_f32_e32 v48, v32, v45                                         ; 2C605B20
    v_mul_f32_e32 v51, v1, v52                                          ; 0A666901
    v_mac_f32_e32 v48, v34, v53                                         ; 2C606B22
    v_mac_f32_e32 v51, v0, v45                                          ; 2C665B00
    v_med3_f32 v48, v48, s2, 1.0                                        ; D1D60030 03C80530
    v_sub_f32_e32 v48, 1.0, v48                                         ; 046060F2
    v_mac_f32_e32 v51, v31, v53                                         ; 2C666B1F
    v_med3_f32 v45, v51, s31, 1.0                                       ; D1D6002D 03C83F33
    v_log_f32_e32 v48, v48                                              ; 7E604330
    v_mad_f32 v51, v38, v45, -v45                                       ; D1C10033 84B65B26
    v_med3_f32 v49, v49, s2, 1.0                                        ; D1D60031 03C80531
    v_mad_f32 v45, v45, v51, 1.0                                        ; D1C1002D 03CA672D
    v_mad_f32 v51, v40, v49, v39                                        ; D1C10033 049E6328
    v_mul_f32_e32 v51, v41, v51                                         ; 0A666729
    v_mul_legacy_f32_e32 v48, 0x40a00000, v48                           ; 086060FF 40A00000
    v_mul_f32_e32 v45, v45, v45                                         ; 0A5A5B2D
    v_max_f32_e32 v51, s2, v51                                          ; 16666602
    v_exp_f32_e32 v48, v48                                              ; 7E604130
    v_mul_f32_e32 v45, 0x40490f5d, v45                                  ; 0A5A5AFF 40490F5D
    v_rcp_f32_e32 v51, v51                                              ; 7E664533
    v_rcp_f32_e32 v45, v45                                              ; 7E5A452D
    v_mad_f32 v52, v43, v48, v36                                        ; D1C10034 0492612B
    v_mad_f32 v53, v44, v48, v37                                        ; D1C10035 0496612C
    v_mul_f32_e32 v51, 0x3e800000, v51                                  ; 0A6666FF 3E800000
    v_mad_f32 v48, v15, v48, v35                                        ; D1C10030 048E610F
    v_mul_f32_e32 v45, v38, v45                                         ; 0A5A5B26
    v_mul_f32_e32 v54, v51, v52                                         ; 0A6C6933
    v_mul_f32_e32 v55, v51, v53                                         ; 0A6E6B33
    v_mul_f32_e32 v51, v51, v48                                         ; 0A666133
    v_mul_f32_e32 v54, v54, v45                                         ; 0A6C5B36
    v_mul_f32_e32 v55, v55, v45                                         ; 0A6E5B37
    v_mul_f32_e32 v45, v51, v45                                         ; 0A5A5B33
    v_sub_f32_e32 v51, 1.0, v52                                         ; 046668F2
    v_sub_f32_e32 v52, 1.0, v53                                         ; 04686AF2
    v_mul_f32_e32 v53, v29, v29                                         ; 0A6A3B1D
    v_sub_f32_e32 v48, 1.0, v48                                         ; 046060F2
    v_mac_f32_e32 v53, v28, v28                                         ; 2C6A391C
    v_mul_f32_e32 v48, v49, v48                                         ; 0A606131
    v_mac_f32_e32 v53, v30, v30                                         ; 2C6A3D1E
    v_rsq_f32_e32 v53, v53                                              ; 7E6A4935
    v_mul_f32_e32 v48, s32, v48                                         ; 0A606020
    v_mul_f32_e32 v48, v11, v48                                         ; 0A60610B
    v_mul_f32_e32 v48, v6, v48                                          ; 0A606106
    v_mac_f32_e32 v48, v49, v45                                         ; 2C605B31
    v_or_b32_e32 v45, 12, v26                                           ; 285A348C
    v_mul_legacy_f32_e32 v29, v29, v53                                  ; 083A6B1D
    v_add_u32_e32 v45, vcc, 64, v45                                     ; 325A5AC0
    v_mul_f32_e32 v29, s35, v29                                         ; 0A3A3A23
    v_readfirstlane_b32 s35, v45                                        ; 7E46052D
    v_add_u32_e32 v45, vcc, 0x5c, v26                                   ; 325A34FF 0000005C
    v_readfirstlane_b32 s36, v45                                        ; 7E48052D
    s_buffer_load_dword s35, s[8:11], s35                               ; C02008C4 00000023
    s_buffer_load_dword s36, s[8:11], s36                               ; C0200904 00000024
    v_mul_legacy_f32_e32 v28, v28, v53                                  ; 08386B1C
    v_mul_legacy_f32_e32 v30, v30, v53                                  ; 083C6B1E
    v_mac_f32_e32 v29, s34, v28                                         ; 2C3A3822
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mov_b32_e32 v45, s35                                              ; 7E5A0223
    v_sub_f32_e32 v45, s36, v45                                         ; 045A5A24
    v_rcp_f32_e32 v45, v45                                              ; 7E5A452D
    v_mac_f32_e32 v29, s33, v30                                         ; 2C3A3C21
    v_subrev_f32_e32 v28, s35, v29                                      ; 06383A23
    v_add_u32_e32 v30, vcc, 0x6c, v26                                   ; 323C34FF 0000006C
    v_mul_f32_e64 v28, v28, v45 clamp                                   ; D105801C 00025B1C
    v_readfirstlane_b32 s33, v30                                        ; 7E42051E
    v_mul_f32_e32 v29, v28, v28                                         ; 0A3A391C
    v_madak_f32 v28, -2.0, v28, 0x40400000                              ; 303838F5 40400000
    v_mul_f32_e32 v28, v29, v28                                         ; 0A38391D
    v_sqrt_f32_e32 v29, v50                                             ; 7E3A4F32
    s_buffer_load_dword s33, s[8:11], s33                               ; C0200844 00000021
    v_mov_b32_e32 v30, 0xbf666666                                       ; 7E3C02FF BF666666
    v_mov_b32_e32 v45, 0xc0400000                                       ; 7E5A02FF C0400000
    v_max_f32_e32 v29, 0x3dcccccd, v29                                  ; 163A3AFF 3DCCCCCD
    v_mul_f32_e32 v51, v49, v51                                         ; 0A666731
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mac_f32_e32 v30, s33, v29                                         ; 2C3C3A21
    v_mul_f32_e64 v30, v30, s30 clamp                                   ; D105801E 00003D1E
    v_mac_f32_e32 v45, 2.0, v30                                         ; 2C5A3CF4
    v_mul_f32_e32 v30, v30, v30                                         ; 0A3C3D1E
    v_mad_f32 v30, v30, v45, 1.0                                        ; D1C1001E 03CA5B1E
    v_mul_f32_e32 v27, v27, v30                                         ; 0A363D1B
    v_add_u32_e32 v30, vcc, 64, v26                                     ; 323C34C0
    v_mul_f32_e32 v29, v29, v29                                         ; 0A3A3B1D
    v_readfirstlane_b32 s33, v30                                        ; 7E42051E
    v_add_u32_e32 v26, vcc, 0x48, v26                                   ; 323434FF 00000048
    s_buffer_load_dwordx2 s[34:35], s[8:11], s33                        ; C0240884 00000021
    v_readfirstlane_b32 s33, v26                                        ; 7E42051A
    v_rcp_f32_e32 v29, v29                                              ; 7E3A451D
    s_buffer_load_dword s33, s[8:11], s33                               ; C0200844 00000021
    v_mul_f32_e32 v51, s32, v51                                         ; 0A666620
    v_mul_f32_e32 v52, v49, v52                                         ; 0A686931
    v_mul_f32_e32 v51, v42, v51                                         ; 0A66672A
    v_mul_f32_e32 v52, s32, v52                                         ; 0A686820
    v_mul_f32_e32 v27, v28, v27                                         ; 0A36371C
    v_mul_f32_e32 v51, v6, v51                                          ; 0A666706
    v_mul_f32_e32 v52, v10, v52                                         ; 0A68690A
    v_mul_f32_e32 v27, v29, v27                                         ; 0A36371D
    v_mul_f32_e32 v52, v6, v52                                          ; 0A686906
    v_mac_f32_e32 v51, v49, v54                                         ; 2C666D31
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v28, s34, v27                                         ; 0A383622
    v_mac_f32_e32 v22, v51, v28                                         ; 2C2C3933
    v_mul_f32_e32 v26, s33, v27                                         ; 0A343621
    v_mac_f32_e32 v52, v49, v55                                         ; 2C686F31
    v_mul_f32_e32 v28, s35, v27                                         ; 0A383623
    v_cmp_ne_u32_e32 vcc, 0, v23                                        ; 7D9A2E80
    v_mac_f32_e32 v24, v52, v28                                         ; 2C303934
    v_mac_f32_e32 v25, v48, v26                                         ; 2C323530
    s_and_b64 vcc, exec, vcc                                            ; 86EA6A7E
    v_mov_b32_e32 v9, v25                                               ; 7E120319
    v_mov_b32_e32 v16, v24                                              ; 7E200318
    v_mov_b32_e32 v17, v22                                              ; 7E220316
    s_cbranch_vccnz BB1_10                                              ; BF870000
BB1_13:
    s_buffer_load_dword s3, s[4:7], 0xd8                                ; C02200C2 000000D8
    s_buffer_load_dwordx2 s[12:13], s[4:7], 0xc0                        ; C0260302 000000C0
    s_buffer_load_dword s16, s[4:7], 0xc8                               ; C0220402 000000C8
    s_buffer_load_dwordx2 s[14:15], s[4:7], 0xd0                        ; C0260382 000000D0
    v_mul_f32_e32 v12, v18, v12                                         ; 0A181912
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_add_f32_e32 v18, s3, v34                                          ; 02244403
    v_madak_f32 v14, 4.0, v14, 0xc0400000                               ; 301C1CF6 C0400000
    v_max_f32_e64 v14, v14, v14 clamp                                   ; D10B800E 00021D0E
    v_add_f32_e32 v23, s15, v33                                         ; 022E420F
    v_add_f32_e32 v22, s14, v32                                         ; 022C400E
    v_mul_f32_e32 v24, v23, v23                                         ; 0A302F17
    v_mac_f32_e32 v24, v22, v22                                         ; 2C302D16
    v_mac_f32_e32 v24, v18, v18                                         ; 2C302512
    v_rsq_f32_e32 v24, v24                                              ; 7E304918
    v_sub_f32_e32 v25, v47, v13                                         ; 04321B2F
    v_mac_f32_e32 v13, v14, v25                                         ; 2C1A330E
    s_mov_b32 s17, 0x38d1b717                                           ; BE9100FF 38D1B717
    v_mul_legacy_f32_e32 v14, v22, v24                                  ; 081C3116
    v_mul_legacy_f32_e32 v22, v23, v24                                  ; 082C3117
    v_mul_f32_e32 v23, v33, v22                                         ; 0A2E2D21
    v_mul_f32_e32 v22, v1, v22                                          ; 0A2C2D01
    v_mul_legacy_f32_e32 v18, v18, v24                                  ; 08243112
    v_mac_f32_e32 v22, v0, v14                                          ; 2C2C1D00
    v_mac_f32_e32 v22, v31, v18                                         ; 2C2C251F
    v_mac_f32_e32 v23, v32, v14                                         ; 2C2E1D20
    v_med3_f32 v14, v22, s17, 1.0                                       ; D1D6000E 03C82316
    v_mac_f32_e32 v23, v34, v18                                         ; 2C2E2522
    v_mad_f32 v18, v38, v14, -v14                                       ; D1C10012 843A1D26
    v_mad_f32 v14, v14, v18, 1.0                                        ; D1C1000E 03CA250E
    v_med3_f32 v18, v23, s2, 1.0                                        ; D1D60012 03C80517
    v_sub_f32_e32 v18, 1.0, v18                                         ; 042424F2
    v_log_f32_e32 v18, v18                                              ; 7E244312
    v_mul_f32_e32 v22, s15, v1                                          ; 0A2C020F
    v_mac_f32_e32 v22, s14, v0                                          ; 2C2C000E
    v_mac_f32_e32 v22, s3, v31                                          ; 2C2C3E03
    v_med3_f32 v22, v22, s2, 1.0                                        ; D1D60016 03C80516
    v_mul_legacy_f32_e32 v18, 0x40a00000, v18                           ; 082424FF 40A00000
    v_exp_f32_e32 v23, v18                                              ; 7E2E4112
    v_mad_f32 v18, v40, v22, v39                                        ; D1C10012 049E2D28
    v_mul_f32_e32 v14, v14, v14                                         ; 0A1C1D0E
    v_mul_f32_e32 v18, v41, v18                                         ; 0A242529
    v_max_f32_e32 v18, s2, v18                                          ; 16242402
    v_mul_f32_e32 v14, 0x40490f5d, v14                                  ; 0A1C1CFF 40490F5D
    v_rcp_f32_e32 v14, v14                                              ; 7E1C450E
    v_rcp_f32_e32 v18, v18                                              ; 7E244512
    v_mul_f32_e32 v24, s12, v22                                         ; 0A302C0C
    v_mul_f32_e32 v24, v24, v13                                         ; 0A301B18
    v_mul_f32_e32 v25, v38, v14                                         ; 0A321D26
    v_mul_f32_e32 v26, 0x3e800000, v18                                  ; 0A3424FF 3E800000
    v_mad_f32 v14, v43, v23, v36                                        ; D1C1000E 04922F2B
    v_mul_f32_e32 v18, v26, v14                                         ; 0A241D1A
    v_sub_f32_e32 v14, 1.0, v14                                         ; 041C1CF2
    v_mul_f32_e32 v14, v14, v24                                         ; 0A1C310E
    s_mov_b32 s2, 0x3ea2f9e9                                            ; BE8200FF 3EA2F9E9
    v_mov_b32_e32 v27, 0x3dcccccd                                       ; 7E3602FF 3DCCCCCD
    v_mad_f32 v14, s2, v14, v27                                         ; D1C1000E 046E1C02
    v_mul_f32_e32 v14, v42, v14                                         ; 0A1C1D2A
    v_mul_f32_e32 v18, v18, v25                                         ; 0A243312
    v_mul_f32_e32 v14, v6, v14                                          ; 0A1C1D06
    v_mac_f32_e32 v14, v18, v24                                         ; 2C1C3112
    v_mul_f32_e32 v18, s13, v22                                         ; 0A242C0D
    v_mul_f32_e32 v22, s16, v22                                         ; 0A2C2C10
    v_mul_f32_e32 v24, v18, v13                                         ; 0A301B12
    v_mad_f32 v18, v44, v23, v37                                        ; D1C10012 04962F2C
    v_mul_f32_e32 v22, v22, v13                                         ; 0A2C1B16
    v_mad_f32 v13, v15, v23, v35                                        ; D1C1000D 048E2F0F
    v_mul_f32_e32 v28, v26, v18                                         ; 0A38251A
    v_mul_f32_e32 v23, v26, v13                                         ; 0A2E1B1A
    v_sub_f32_e32 v18, 1.0, v18                                         ; 042424F2
    v_sub_f32_e32 v13, 1.0, v13                                         ; 041A1AF2
    v_mul_f32_e32 v18, v18, v24                                         ; 0A243112
    v_mul_f32_e32 v13, v13, v22                                         ; 0A1A2D0D
    v_mad_f32 v18, s2, v18, v27                                         ; D1C10012 046E2402
    v_mac_f32_e32 v27, s2, v13                                          ; 2C361A02
    v_mul_f32_e32 v18, v10, v18                                         ; 0A24250A
    v_mul_f32_e32 v13, v11, v27                                         ; 0A1A370B
    v_mul_f32_e32 v28, v28, v25                                         ; 0A38331C
    v_mul_f32_e32 v18, v6, v18                                          ; 0A242506
    v_mul_f32_e32 v23, v23, v25                                         ; 0A2E3317
    v_mul_f32_e32 v13, v6, v13                                          ; 0A1A1B06
    v_mac_f32_e32 v18, v28, v24                                         ; 2C24311C
    v_mac_f32_e32 v13, v23, v22                                         ; 2C1A2D17
    s_not_b64 exec, exec                                                ; BEFE057E
    v_mov_b32_e32 v46, 0                                                ; 7E5C0280
    s_not_b64 exec, exec                                                ; BEFE057E
    s_or_saveexec_b64 s[2:3], -1                                        ; BE8221C1
    v_mov_b32_e32 v22, v46                                              ; 7E2C032E
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v22, v22  row_shr:1 row_mask:0xf bank_mask:0xf        ; 7E2C02FA FF011116
    v_or_b32_e32 v22, v22, v46                                          ; 282C5D16
    v_mov_b32_e32 v23, v22                                              ; 7E2E0316
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v23, v23  row_shr:2 row_mask:0xf bank_mask:0xf        ; 7E2E02FA FF011217
    v_or_b32_e32 v22, v22, v23                                          ; 282C2F16
    v_mov_b32_e32 v23, v22                                              ; 7E2E0316
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v23, v23  row_shr:4 row_mask:0xf bank_mask:0xf        ; 7E2E02FA FF011417
    v_or_b32_e32 v22, v22, v23                                          ; 282C2F16
    v_mov_b32_e32 v23, v22                                              ; 7E2E0316
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v23, v23  row_shr:8 row_mask:0xf bank_mask:0xf        ; 7E2E02FA FF011817
    v_or_b32_e32 v22, v22, v23                                          ; 282C2F16
    v_mov_b32_e32 v23, v22                                              ; 7E2E0316
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v23, v23  row_bcast:15 row_mask:0xa bank_mask:0xf     ; 7E2E02FA AF014217
    v_or_b32_e32 v22, v22, v23                                          ; 282C2F16
    v_mov_b32_e32 v23, v22                                              ; 7E2E0316
    s_nop 1                                                             ; BF800001
    v_mov_b32_dpp v23, v23  row_bcast:31 row_mask:0xc bank_mask:0xf     ; 7E2E02FA CF014317
    v_or_b32_e32 v22, v22, v23                                          ; 282C2F16
    s_mov_b64 exec, s[2:3]                                              ; BEFE0102
    s_and_b64 exec, exec, s[18:19]                                      ; 86FE127E
    v_readlane_b32 s2, v22, 63                                          ; D2890002 00017F16
    v_mov_b32_e32 v22, s2                                               ; 7E2C0202
    v_readfirstlane_b32 s2, v22                                         ; 7E040516
    s_cmp_eq_u32 s2, 0                                                  ; BF068002
    s_cbranch_scc1 BB1_18                                               ; BF850000
    s_load_dwordx8 s[12:19], s[0:1], 0xf0                               ; C00E0300 000000F0
    s_load_dwordx4 s[20:23], s[0:1], 0x110                              ; C00A0500 00000110
    v_mov_b32_e32 v22, s2                                               ; 7E2C0202
    s_movk_i32 s2, 0x650                                                ; B0020650
    s_movk_i32 s3, 0x658                                                ; B0030658
    s_mov_b32 s24, 0x3faaaaab                                           ; BE9800FF 3FAAAAAB
    s_mov_b32 s25, 0x411ffffe                                           ; BE9900FF 411FFFFE
    s_mov_b32 s26, 0x3a83126f                                           ; BE9A00FF 3A83126F
    s_mov_b32 s27, 0x38d1b717                                           ; BE9B00FF 38D1B717
    s_mov_b32 s28, 0x3ea2f9e9                                           ; BE9C00FF 3EA2F9E9
BB1_15:
    v_ffbl_b32_e32 v23, v22                                             ; 7E2E5D16
    v_mad_u32_u24 v25, v23, 48, s2                                      ; D1C30019 00096117
    v_readfirstlane_b32 s29, v25                                        ; 7E3A0519
    v_mad_u32_u24 v25, v23, 48, s3                                      ; D1C30019 000D6117
    s_buffer_load_dwordx2 s[30:31], s[8:11], s29                        ; C0240784 0000001D
    v_readfirstlane_b32 s29, v25                                        ; 7E3A0519
    s_buffer_load_dword s29, s[8:11], s29                               ; C0200744 0000001D
    v_lshlrev_b32_e32 v25, 5, v23                                       ; 24322E85
    v_or_b32_e32 v25, 16, v25                                           ; 28323290
    v_add_u32_e32 v25, vcc, 0x1440, v25                                 ; 323232FF 00001440
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_subrev_f32_e32 v26, s30, v5                                       ; 06340A1E
    v_subrev_f32_e32 v28, s29, v7                                       ; 06380E1D
    v_readfirstlane_b32 s29, v25                                        ; 7E3A0519
    s_buffer_load_dword s29, s[8:11], s29                               ; C0200744 0000001D
    v_subrev_f32_e32 v27, s31, v4                                       ; 0636081F
    v_cubema_f32 v30, v26, v27, v28                                     ; D1C7001E 0472371A
    v_rcp_f32_e64 v30, |v30|                                            ; D162011E 0000011E
    v_cubeid_f32 v48, v26, v27, v28                                     ; D1C40030 0472371A
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_rndne_f32_e32 v25, s29                                            ; 7E323C1D
    v_cubesc_f32 v29, v26, v27, v28                                     ; D1C5001D 0472371A
    v_mac_f32_e32 v48, 0x41000000, v25                                  ; 2C6032FF 41000000
    v_cubetc_f32 v47, v26, v27, v28                                     ; D1C6002F 0472371A
    v_mov_b32_e32 v46, 0x3fc00000                                       ; 7E5C02FF 3FC00000
    v_madak_f32 v45, v29, v30, 0x3fc00000                               ; 305A3D1D 3FC00000
    v_mac_f32_e32 v46, v47, v30                                         ; 2C5C3D2F
    v_rndne_f32_e32 v47, v48                                            ; 7E5E3D30
    image_sample_lz v[45:48], v[45:48], s[12:19], s[20:23] dmask:0xf da ; F09C4F00 00A32D2D
    v_max3_f32 v29, |v26|, |v27|, |v28|                                 ; D1D3071D 0472371A
    v_mul_u32_u24_e32 v24, 48, v23                                      ; 10302EB0
    v_mov_b32_e32 v25, 1.0                                              ; 7E3202F2
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_cmp_gt_f32_e32 vcc, v29, v45                                      ; 7C885B1D
    s_and_saveexec_b64 s[30:31], vcc                                    ; BE9E206A
BB1_16:
    v_mad_f32 v25, -v45, v45, v46                                       ; D1C10019 24BA5B2D
    v_max_f32_e32 v25, 0x3727c5ac, v25                                  ; 163232FF 3727C5AC
    v_sub_f32_e32 v29, v29, v45                                         ; 043A5B1D
    v_mad_f32 v29, v29, v29, v25                                        ; D1C1001D 04663B1D
    v_rcp_f32_e32 v29, v29                                              ; 7E3A451D
    v_mul_f32_e32 v29, v25, v29                                         ; 0A3A3B19
    v_mov_b32_e32 v25, 0xbeaaaaab                                       ; 7E3202FF BEAAAAAB
    v_mac_f32_e64 v25, v29, s24 clamp                                   ; D1168019 0000311D
BB1_17:
    s_or_b64 exec, exec, s[30:31]                                       ; 87FE1E7E
    v_mul_f32_e32 v29, v27, v27                                         ; 0A3A371B
    v_mac_f32_e32 v29, v26, v26                                         ; 2C3A351A
    v_mac_f32_e32 v29, v28, v28                                         ; 2C3A391C
    v_rsq_f32_e32 v30, v29                                              ; 7E3C491D
    v_lshlrev_b32_e64 v23, v23, 1                                       ; D1120017 00010317
    v_xor_b32_e32 v22, v23, v22                                         ; 2A2C2D17
    v_mul_legacy_f32_e64 v27, -v27, v30                                 ; D104001B 20023D1B
    v_add_f32_e32 v45, v33, v27                                         ; 025A3721
    v_mul_legacy_f32_e64 v26, -v26, v30                                 ; D104001A 20023D1A
    v_mul_legacy_f32_e64 v28, -v28, v30                                 ; D104001C 20023D1C
    v_add_f32_e32 v30, v32, v26                                         ; 023C3520
    v_mul_f32_e32 v46, v45, v45                                         ; 0A5C5B2D
    v_mac_f32_e32 v46, v30, v30                                         ; 2C5C3D1E
    v_add_f32_e32 v47, v34, v28                                         ; 025E3922
    v_mac_f32_e32 v46, v47, v47                                         ; 2C5C5F2F
    v_rsq_f32_e32 v46, v46                                              ; 7E5C492E
    v_mul_f32_e32 v27, v1, v27                                          ; 0A363701
    v_mac_f32_e32 v27, v0, v26                                          ; 2C363500
    v_mac_f32_e32 v27, v31, v28                                         ; 2C36391F
    v_mul_legacy_f32_e32 v45, v45, v46                                  ; 085A5D2D
    v_mul_legacy_f32_e32 v30, v30, v46                                  ; 083C5D1E
    v_mul_f32_e32 v26, v33, v45                                         ; 0A345B21
    v_mul_legacy_f32_e32 v46, v47, v46                                  ; 085C5D2F
    v_mac_f32_e32 v26, v32, v30                                         ; 2C343D20
    v_mul_f32_e32 v28, v1, v45                                          ; 0A385B01
    v_mac_f32_e32 v26, v34, v46                                         ; 2C345D22
    v_mac_f32_e32 v28, v0, v30                                          ; 2C383D00
    v_med3_f32 v26, v26, s26, 1.0                                       ; D1D6001A 03C8351A
    v_mac_f32_e32 v28, v31, v46                                         ; 2C385D1F
    v_sub_f32_e32 v26, 1.0, v26                                         ; 043434F2
    v_log_f32_e32 v26, v26                                              ; 7E34431A
    v_med3_f32 v28, v28, s27, 1.0                                       ; D1D6001C 03C8371C
    v_mad_f32 v30, v38, v28, -v28                                       ; D1C1001E 84723926
    v_med3_f32 v27, v27, s26, 1.0                                       ; D1D6001B 03C8351B
    v_mad_f32 v28, v28, v30, 1.0                                        ; D1C1001C 03CA3D1C
    v_mad_f32 v30, v40, v27, v39                                        ; D1C1001E 049E3728
    v_mul_f32_e32 v30, v41, v30                                         ; 0A3C3D29
    v_mul_legacy_f32_e32 v26, 0x40a00000, v26                           ; 083434FF 40A00000
    v_max_f32_e32 v30, s26, v30                                         ; 163C3C1A
    v_exp_f32_e32 v26, v26                                              ; 7E34411A
    v_mul_f32_e32 v28, v28, v28                                         ; 0A38391C
    v_rcp_f32_e32 v30, v30                                              ; 7E3C451E
    v_mul_f32_e32 v28, 0x40490f5d, v28                                  ; 0A3838FF 40490F5D
    v_rcp_f32_e32 v28, v28                                              ; 7E38451C
    v_mad_f32 v45, v43, v26, v36                                        ; D1C1002D 0492352B
    v_mad_f32 v46, v44, v26, v37                                        ; D1C1002E 0496352C
    v_mul_f32_e32 v30, 0x3e800000, v30                                  ; 0A3C3CFF 3E800000
    v_mad_f32 v26, v15, v26, v35                                        ; D1C1001A 048E350F
    v_mul_f32_e32 v47, v30, v45                                         ; 0A5E5B1E
    v_mul_f32_e32 v48, v30, v46                                         ; 0A605D1E
    v_mul_f32_e32 v30, v30, v26                                         ; 0A3C351E
    v_sub_f32_e32 v26, 1.0, v26                                         ; 043434F2
    v_mul_f32_e32 v28, v38, v28                                         ; 0A383926
    v_mul_f32_e32 v26, v27, v26                                         ; 0A34351B
    v_mul_f32_e32 v26, s28, v26                                         ; 0A34341C
    v_mul_f32_e32 v47, v47, v28                                         ; 0A5E392F
    v_mul_f32_e32 v48, v48, v28                                         ; 0A603930
    v_mul_f32_e32 v28, v30, v28                                         ; 0A38391E
    v_sub_f32_e32 v30, 1.0, v45                                         ; 043C5AF2
    v_sub_f32_e32 v45, 1.0, v46                                         ; 045A5CF2
    v_mul_f32_e32 v30, v27, v30                                         ; 0A3C3D1B
    v_mul_f32_e32 v45, v27, v45                                         ; 0A5A5B1B
    v_mul_f32_e32 v26, v11, v26                                         ; 0A34350B
    v_mul_f32_e32 v30, s28, v30                                         ; 0A3C3C1C
    v_mul_f32_e32 v45, s28, v45                                         ; 0A5A5A1C
    v_mul_f32_e32 v26, v6, v26                                          ; 0A343506
    v_mul_f32_e32 v30, v42, v30                                         ; 0A3C3D2A
    v_mul_f32_e32 v45, v10, v45                                         ; 0A5A5B0A
    v_mac_f32_e32 v26, v27, v28                                         ; 2C34391B
    v_add_u32_e32 v28, vcc, 0x66c, v24                                  ; 323830FF 0000066C
    v_readfirstlane_b32 s29, v28                                        ; 7E3A051C
    v_mul_f32_e32 v30, v6, v30                                          ; 0A3C3D06
    v_mul_f32_e32 v45, v6, v45                                          ; 0A5A5B06
    v_mac_f32_e32 v30, v27, v47                                         ; 2C3C5F1B
    v_mac_f32_e32 v45, v27, v48                                         ; 2C5A611B
    v_sqrt_f32_e32 v27, v29                                             ; 7E364F1D
    s_buffer_load_dword s29, s[8:11], s29                               ; C0200744 0000001D
    v_mov_b32_e32 v28, 0xbf666666                                       ; 7E3802FF BF666666
    v_max_f32_e32 v27, 0x3dcccccd, v27                                  ; 163636FF 3DCCCCCD
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mac_f32_e32 v28, s29, v27                                         ; 2C38361D
    v_mul_f32_e64 v28, v28, s25 clamp                                   ; D105801C 0000331C
    v_mul_f32_e32 v29, v28, v28                                         ; 0A3A391C
    v_madak_f32 v28, 2.0, v28, 0xc0400000                               ; 303838F4 C0400000
    v_mad_f32 v28, v29, v28, 1.0                                        ; D1C1001C 03CA391D
    v_mul_f32_e32 v25, v25, v28                                         ; 0A323919
    v_add_u32_e32 v28, vcc, 0x640, v24                                  ; 323830FF 00000640
    v_mul_f32_e32 v27, v27, v27                                         ; 0A36371B
    v_readfirstlane_b32 s29, v28                                        ; 7E3A051C
    v_add_u32_e32 v24, vcc, 0x648, v24                                  ; 323030FF 00000648
    v_rcp_f32_e32 v27, v27                                              ; 7E36451B
    s_buffer_load_dwordx2 s[30:31], s[8:11], s29                        ; C0240784 0000001D
    v_readfirstlane_b32 s29, v24                                        ; 7E3A0518
    s_buffer_load_dword s29, s[8:11], s29                               ; C0200744 0000001D
    v_mul_f32_e32 v25, v27, v25                                         ; 0A32331B
    v_cmp_ne_u32_e32 vcc, 0, v22                                        ; 7D9A2C80
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v27, s30, v25                                         ; 0A36321E
    v_mac_f32_e32 v17, v30, v27                                         ; 2C22371E
    v_mul_f32_e32 v27, s31, v25                                         ; 0A36321F
    v_mul_f32_e32 v24, s29, v25                                         ; 0A30321D
    v_mac_f32_e32 v16, v45, v27                                         ; 2C20372D
    s_and_b64 vcc, exec, vcc                                            ; 86EA6A7E
    v_mac_f32_e32 v9, v26, v24                                          ; 2C12311A
    s_cbranch_vccnz BB1_15                                              ; BF870000
BB1_18:
    s_buffer_load_dword s14, s[4:7], 0x20                               ; C0220382 00000020
    s_buffer_load_dwordx2 s[12:13], s[4:7], 0xf8                        ; C0260302 000000F8
    v_add_f32_e32 v0, 1.0, v8                                           ; 020010F2
    s_load_dwordx8 s[4:11], s[0:1], 0x60                                ; C00E0100 00000060
    v_log_f32_e32 v5, v0                                                ; 7E0A4300
    s_load_dwordx4 s[0:3], s[0:1], 0x80                                 ; C00A0000 00000080
    s_waitcnt lgkmcnt(0)                                                ; BF8C007F
    v_mul_f32_e32 v0, s12, v2                                           ; 0A00040C
    v_add_f32_e32 v2, v18, v16                                          ; 02042112
    v_mul_f32_e32 v1, s13, v3                                           ; 0A02060D
    v_add_f32_e32 v3, v13, v9                                           ; 0206130D
    v_add_f32_e32 v6, v20, v2                                           ; 020C0514
    v_mul_f32_e32 v2, s14, v5                                           ; 0A040A0E
    v_add_f32_e32 v7, v21, v3                                           ; 020E0715
    image_sample_lz v[0:3], v[0:3], s[4:11], s[0:3] dmask:0xf           ; F09C0F00 00010000
    v_add_f32_e32 v4, v14, v17                                          ; 0208230E
    v_add_f32_e32 v4, v19, v4                                           ; 02080913
    s_waitcnt vmcnt(0)                                                  ; BF8C0F70
    v_mad_f32 v0, v4, v3, v0                                            ; D1C10000 04020704
    v_mad_f32 v1, v6, v3, v1                                            ; D1C10001 04060706
    v_mad_f32 v2, v7, v3, v2                                            ; D1C10002 040A0707
    v_cvt_pkrtz_f16_f32 v0, v0, v1                                      ; D2960000 00020300
    v_cvt_pkrtz_f16_f32 v1, v2, v12                                     ; D2960001 00021902
    exp mrt0 v0, v0, v1, v1 done compr vm                               ; C4001C0F 00000100
    s_endpgm                                                            ; BF810000
=== STATISTICS ===

VGPRs: 56
SGPRs: 66
Scratch usage (bytes): 0
LDS usage (bytes): 0

Windows:

Highlight:

...
  v_or_b32      v52, v52, v52 row_shr:1                 // 000000000820: 286868FA FF011134
  s_nop         0x0001                                  // 000000000828: BF800001
  v_or_b32      v52, v52, v52 row_shr:2                 // 00000000082C: 286868FA FF011234
  s_nop         0x0001                                  // 000000000834: BF800001
  v_or_b32      v52, v52, v52 row_shr:4                 // 000000000838: 286868FA FF011434
  s_nop         0x0001                                  // 000000000840: BF800001
  v_or_b32      v52, v52, v52 row_shr:8                 // 000000000844: 286868FA FF011834
  s_nop         0x0001                                  // 00000000084C: BF800001
  v_or_b32      v52, v52, v52 row_bcast:15 row_mask:0xa // 000000000850: 286868FA AF014234
  s_nop         0x0001                                  // 000000000858: BF800001
  v_or_b32      v52, v52, v52 row_bcast:31 row_mask:0xc // 00000000085C: 286868FA CF014334
  s_mov_b64     exec, s[4:5]                            // 000000000864: BEFE0104
  v_readlane_b32  s4, v52, 63                           // 000000000868: D2890004 00017F34
  v_mov_b32     v8, 0                                   // 000000000870: 7E100280
  v_mov_b32     v11, 0                                  // 000000000874: 7E160280
  v_mov_b32     v14, 0                                  // 000000000878: 7E1C0280
  s_nop         0x0000                                  // 00000000087C: BF800000
label_0220:
  s_cmp_eq_i32  s4, 0                                   // 000000000880: BF008004
  s_cbranch_scc1  label_030A                            // 000000000884: BF8500E8
  s_ff1_i32_b32  s5, s4                                 // 000000000888: BE851004
  s_lshl_b32    s9, s5, 6                               // 00000000088C: 8E098605
  s_add_u32     s10, s9, 0x00000c40                     // 000000000890: 800AFF09 00000C40
  s_buffer_load_dwordx8  s[32:39], s[12:15], s10        // 000000000898: C02C0806 0000000A
  s_add_u32     s9, s9, 0x00000c60                      // 0000000008A0: 8009FF09 00000C60
  s_buffer_load_dwordx8  s[40:47], s[12:15], s9         // 0000000008A8: C02C0A06 00000009
  s_mul_i32     s9, s5, 48                              // 0000000008B0: 9209B005
  s_add_u32     s10, s9, 0x00000050                     // 0000000008B4: 800AFF09 00000050
  s_add_u32     s11, s9, 0x00000060                     // 0000000008BC: 800BFF09 00000060
  s_waitcnt     lgkmcnt(0)                              // 0000000008C4: BF8C007F
...
; -------- Disassembly --------------------
shader main
  asic(VI)
  type(PS)
                                                            // s_ps_state in s0

  s_mov_b64     s[64:65], exec                          // 000000000000: BEC0017E
  s_wqm_b64     exec, exec                              // 000000000004: BEFE077E
  s_getpc_b64   s[0:1]                                  // 000000000008: BE801C80
  s_mov_b32     m0, s15                                 // 00000000000C: BEFC000F
  s_mov_b32     s16, s13                                // 000000000010: BE90000D
  s_mov_b32     s17, s1                                 // 000000000014: BE910001
  s_mov_b32     s15, s1                                 // 000000000018: BE8F0001
  s_load_dword  s0, s[14:15], 0x48                      // 00000000001C: C0020007 00000048
  s_waitcnt     lgkmcnt(0)                              // 000000000024: BF8C007F
  s_load_dwordx8  s[20:27], s[0:1], 0x30                // 000000000028: C00E0500 00000030
  s_load_dwordx4  s[28:31], s[0:1], 0x50                // 000000000030: C00A0700 00000050
  v_interp_p1_f32  v5, v0, attr0.x                      // 000000000038: D4140000
  v_interp_p1_f32  v6, v0, attr0.y                      // 00000000003C: D4180100
  v_interp_p2_f32  v5, v1, attr0.x                      // 000000000040: D4150001
  v_interp_p2_f32  v6, v1, attr0.y                      // 000000000044: D4190101
  s_waitcnt     lgkmcnt(0)                              // 000000000048: BF8C007F
  image_sample  v[7:8], v[5:8], s[20:27], s[28:31] dmask:0x3 // 00000000004C: F0800300 00E50705
  s_load_dwordx8  s[20:27], s[0:1], 0x00                // 000000000054: C00E0500 00000000
  s_load_dwordx8  s[28:35], s[0:1], 0x60                // 00000000005C: C00E0700 00000060
  s_load_dwordx4  s[36:39], s[0:1], 0x20                // 000000000064: C00A0900 00000020
  s_load_dwordx4  s[40:43], s[0:1], 0x80                // 00000000006C: C00A0A00 00000080
  s_waitcnt     lgkmcnt(0)                              // 000000000074: BF8C007F
  image_sample  v[9:11], v[5:8], s[20:27], s[36:39] dmask:0x7 // 000000000078: F0800700 01250905
  s_nop         0x0000                                  // 000000000080: BF800000
  image_sample  v[5:6], v[5:8], s[28:35], s[40:43] dmask:0x6 // 000000000084: F0800600 01470505
  v_interp_p1_f32  v12, v0, attr1.x                     // 00000000008C: D4300400
  v_interp_p1_f32  v13, v0, attr2.x                     // 000000000090: D4340800
  s_load_dwordx2  s[0:1], s[14:15], 0x34                // 000000000094: C0060007 00000034
  v_interp_p2_f32  v12, v1, attr1.x                     // 00000000009C: D4310401
  v_interp_p1_f32  v14, v0, attr1.y                     // 0000000000A0: D4380500
  v_interp_p2_f32  v13, v1, attr2.x                     // 0000000000A4: D4350801
  v_interp_p1_f32  v15, v0, attr2.y                     // 0000000000A8: D43C0900
  v_interp_p2_f32  v14, v1, attr1.y                     // 0000000000AC: D4390501
  v_interp_p1_f32  v16, v0, attr1.z                     // 0000000000B0: D4400600
  v_mul_legacy_f32  v17, v12, v12                       // 0000000000B4: 0822190C
  v_interp_p2_f32  v15, v1, attr2.y                     // 0000000000B8: D43D0901
  v_interp_p1_f32  v18, v0, attr2.z                     // 0000000000BC: D4480A00
  v_mul_legacy_f32  v19, v13, v13                       // 0000000000C0: 08261B0D
  v_interp_p2_f32  v16, v1, attr1.z                     // 0000000000C4: D4410601
  v_mac_f32     v17, v14, v14                           // 0000000000C8: 2C221D0E
  v_interp_p2_f32  v18, v1, attr2.z                     // 0000000000CC: D4490A01
  v_mac_f32     v19, v15, v15                           // 0000000000D0: 2C261F0F
  v_mac_f32     v17, v16, v16                           // 0000000000D4: 2C222110
  v_mac_f32     v19, v18, v18                           // 0000000000D8: 2C262512
  s_load_dwordx2  s[18:19], s[14:15], 0x2c              // 0000000000DC: C0060487 0000002C
  v_rsq_f32     v17, v17                                // 0000000000E4: 7E224911
  v_rsq_f32     v19, v19                                // 0000000000E8: 7E264913
  s_waitcnt     lgkmcnt(0)                              // 0000000000EC: BF8C007F
  s_and_b32     s1, s1, 0x0000ffff                      // 0000000000F0: 8601FF01 0000FFFF
  s_andn2_b32   s21, s1, 0x3fff0000                     // 0000000000F8: 8915FF01 3FFF0000
  s_and_b32     s5, s19, 0x0000ffff                     // 000000000100: 8605FF13 0000FFFF
  s_andn2_b32   s57, s5, 0x3fff0000                     // 000000000108: 8939FF05 3FFF0000
  s_waitcnt     vmcnt(2)                                // 000000000110: BF8C0F72
  v_add_f32     v7, v7, -0.5 mul:2                      // 000000000114: D1010007 0801E307
  v_add_f32     v8, v8, -0.5 mul:2                      // 00000000011C: D1010008 0801E308
  v_mad_f32     v20, -v7, v7, 1.0                       // 000000000124: D1C10014 23CA0F07
  v_mad_f32     v20, -v8, v8, v20                       // 00000000012C: D1C10014 24521108
  v_max_f32     v20, 0, v20                             // 000000000134: 16282880
  s_mov_b32     s20, s0                                 // 000000000138: BE940000
  s_movk_i32    s22, 0xffff                             // 00000000013C: B016FFFF
  s_mov_b32     s23, 0x00024fac                         // 000000000140: BE9700FF 00024FAC
  s_buffer_load_dwordx8  s[24:31], s[20:23], 0x30       // 000000000148: C02E060A 00000030
  s_buffer_load_dwordx8  s[32:39], s[20:23], 0x70       // 000000000150: C02E080A 00000070
  v_mul_legacy_f32  v14, v14, v17                       // 000000000158: 081C230E
  v_mul_legacy_f32  v16, v16, v17                       // 00000000015C: 08202310
  v_interp_p1_f32  v21, v0, attr2.w                     // 000000000160: D4540B00
  v_mul_legacy_f32  v13, v13, v19                       // 000000000164: 081A270D
  v_mul_legacy_f32  v18, v18, v19                       // 000000000168: 08242712
  v_sqrt_f32    v20, v20                                // 00000000016C: 7E284F14
  v_mul_legacy_f32  v12, v12, v17                       // 000000000170: 0818230C
  v_interp_p2_f32  v21, v1, attr2.w                     // 000000000174: D4550B01
  v_mul_legacy_f32  v15, v15, v19                       // 000000000178: 081E270F
  v_mul_legacy_f32  v17, v14, v18                       // 00000000017C: 0822250E
  v_mul_legacy_f32  v19, v16, v13                       // 000000000180: 08261B10
  v_mul_f32     v7, s12, v7                             // 000000000184: 0A0E0E0C
  v_mul_f32     v8, s12, v8                             // 000000000188: 0A10100C
  s_buffer_load_dwordx8  s[40:47], s[20:23], 0x50       // 00000000018C: C02E0A0A 00000050
  s_buffer_load_dwordx8  s[48:55], s[20:23], 0x90       // 000000000194: C02E0C0A 00000090
  v_mul_legacy_f32  v22, v12, v15                       // 00000000019C: 082C1F0C
  v_mad_legacy_f32  v17, -v15, v16, v17                 // 0000000001A0: D1C00011 2446210F
  v_mad_legacy_f32  v19, -v18, v12, v19                 // 0000000001A8: D1C00013 244E1912
  v_mul_f32     v23, v13, v7                            // 0000000001B0: 0A2E0F0D
  v_mul_f32     v24, v21, v8                            // 0000000001B4: 0A301115
  v_mad_legacy_f32  v13, -v13, v14, v22                 // 0000000001B8: D1C0000D 245A1D0D
  v_mul_f32     v19, v21, v19                           // 0000000001C0: 0A262715
  v_mul_f32     v15, v15, v7                            // 0000000001C4: 0A1E0F0F
  v_mac_f32     v23, v24, v17                           // 0000000001C8: 2C2E2318
  s_mov_b32     s56, s18                                // 0000000001CC: BEB80012
  s_movk_i32    s58, 0xffff                             // 0000000001D0: B03AFFFF
  s_mov_b32     s59, 0x00024fac                         // 0000000001D4: BEBB00FF 00024FAC
  s_buffer_load_dwordx4  s[60:63], s[56:59], 0x200      // 0000000001DC: C02A0F1C 00000200
  v_mul_f32     v13, v21, v13                           // 0000000001E4: 0A1A1B15
  v_mul_f32     v7, v18, v7                             // 0000000001E8: 0A0E0F12
  v_mac_f32     v15, v19, v8                            // 0000000001EC: 2C1E1113
  v_mac_f32     v23, v12, v20                           // 0000000001F0: 2C2E290C
  v_interp_p1_f32  v12, v0, attr3.x                     // 0000000001F4: D4300C00
  s_buffer_load_dwordx4  s[56:59], s[56:59], 0x210      // 0000000001F8: C02A0E1C 00000210
  v_mac_f32     v7, v13, v8                             // 000000000200: 2C0E110D
  v_mac_f32     v15, v14, v20                           // 000000000204: 2C1E290E
  v_mul_legacy_f32  v8, v23, v23                        // 000000000208: 08102F17
  v_interp_p2_f32  v12, v1, attr3.x                     // 00000000020C: D4310C01
  v_interp_p1_f32  v13, v0, attr3.y                     // 000000000210: D4340D00
  v_mac_f32     v7, v16, v20                            // 000000000214: 2C0E2910
  v_mac_f32     v8, v15, v15                            // 000000000218: 2C101F0F
  v_interp_p2_f32  v13, v1, attr3.y                     // 00000000021C: D4350D01
  v_interp_p1_f32  v0, v0, attr3.z                      // 000000000220: D4000E00
  s_waitcnt     lgkmcnt(0)                              // 000000000224: BF8C007F
  v_mul_f32     v14, s27, v12                           // 000000000228: 0A1C181B
  v_mul_f32     v16, s35, v12                           // 00000000022C: 0A201823
  v_mac_f32     v8, v7, v7                              // 000000000230: 2C100F07
  v_interp_p2_f32  v0, v1, attr3.z                      // 000000000234: D4010E01
  v_mac_f32     v14, s31, v13                           // 000000000238: 2C1C1A1F
  v_mac_f32     v16, s39, v13                           // 00000000023C: 2C201A27
  s_buffer_load_dword  s0, s[20:23], 0xb0               // 000000000240: C022000A 000000B0
  v_rsq_f32     v1, v8                                  // 000000000248: 7E024908
  v_mac_f32     v14, s43, v0                            // 00000000024C: 2C1C002B
  v_mac_f32     v16, s51, v0                            // 000000000250: 2C200033
  v_add_f32     v8, s47, v14                            // 000000000254: 02101C2F
  v_add_f32     v14, s55, v16                           // 000000000258: 021C2037
  v_subrev_f32  v16, s60, v12                           // 00000000025C: 0620183C
  v_mul_f32     v17, s24, v12                           // 000000000260: 0A221818
  v_mul_f32     v18, s25, v12                           // 000000000264: 0A241819
  v_mul_f32     v19, s26, v12                           // 000000000268: 0A26181A
  v_mul_f32     v20, s32, v12                           // 00000000026C: 0A281820
  v_mul_f32     v21, s33, v12                           // 000000000270: 0A2A1821
  v_mul_f32     v22, s34, v12                           // 000000000274: 0A2C1822
  v_rcp_f32     v8, v8                                  // 000000000278: 7E104508
  v_rcp_f32     v14, v14                                // 00000000027C: 7E1C450E
  v_subrev_f32  v24, s61, v13                           // 000000000280: 06301A3D
  v_mul_f32     v16, s56, v16                           // 000000000284: 0A202038
  v_mac_f32     v17, s28, v13                           // 000000000288: 2C221A1C
  v_mac_f32     v18, s29, v13                           // 00000000028C: 2C241A1D
  v_mac_f32     v19, s30, v13                           // 000000000290: 2C261A1E
  v_mac_f32     v20, s36, v13                           // 000000000294: 2C281A24
  v_mac_f32     v21, s37, v13                           // 000000000298: 2C2A1A25
  v_mac_f32     v22, s38, v13                           // 00000000029C: 2C2C1A26
  v_subrev_f32  v25, s62, v0                            // 0000000002A0: 0632003E
  v_mac_f32     v16, s57, v24                           // 0000000002A4: 2C203039
  v_mul_legacy_f32  v23, v23, v1                        // 0000000002A8: 082E0317
  v_mul_legacy_f32  v15, v15, v1                        // 0000000002AC: 081E030F
  v_mul_legacy_f32  v1, v7, v1                          // 0000000002B0: 08020307
  v_mac_f32     v17, s40, v0                            // 0000000002B4: 2C220028
  v_mac_f32     v18, s41, v0                            // 0000000002B8: 2C240029
  v_mac_f32     v19, s42, v0                            // 0000000002BC: 2C26002A
  v_mac_f32     v20, s48, v0                            // 0000000002C0: 2C280030
  v_mac_f32     v21, s49, v0                            // 0000000002C4: 2C2A0031
  v_mac_f32     v22, s50, v0                            // 0000000002C8: 2C2C0032
  v_mac_f32     v16, s58, v25                           // 0000000002CC: 2C20323A
  v_cmp_ne_u32  vcc, -1, v4                             // 0000000002D0: 7D9A08C1
  v_xor_b32     v4, 0x80000000, v23                     // 0000000002D4: 2A082EFF 80000000
  v_xor_b32     v7, 0x80000000, v15                     // 0000000002DC: 2A0E1EFF 80000000
  v_xor_b32     v24, 0x80000000, v1                     // 0000000002E4: 2A3002FF 80000000
  v_add_f32     v17, s44, v17                           // 0000000002EC: 0222222C
  v_add_f32     v18, s45, v18                           // 0000000002F0: 0224242D
  v_add_f32     v19, s46, v19                           // 0000000002F4: 0226262E
  v_add_f32     v20, s52, v20                           // 0000000002F8: 02282834
  v_add_f32     v21, s53, v21                           // 0000000002FC: 022A2A35
  v_add_f32     v22, s54, v22                           // 000000000300: 022C2C36
  s_waitcnt     lgkmcnt(0)                              // 000000000304: BF8C007F
  v_mul_f32     v25, s0, v16                            // 000000000308: 0A322000
  s_buffer_load_dwordx4  s[24:27], s[20:23], 0xc0       // 00000000030C: C02A060A 000000C0
  s_buffer_load_dwordx4  s[28:31], s[20:23], 0xd0       // 000000000314: C02A070A 000000D0
  s_waitcnt     vmcnt(1)                                // 00000000031C: BF8C0F71
  v_mul_f32     v26, s2, v9                             // 000000000320: 0A341202
  v_mul_f32     v27, s3, v10                            // 000000000324: 0A361403
  v_mul_f32     v28, s4, v11                            // 000000000328: 0A381604
  v_cndmask_b32  v1, v1, v24, vcc                       // 00000000032C: 00023101
  v_cndmask_b32  v7, v15, v7, vcc                       // 000000000330: 000E0F0F
  v_cndmask_b32  v4, v23, v4, vcc                       // 000000000334: 00080917
  s_waitcnt     vmcnt(0)                                // 000000000338: BF8C0F70
  v_mul_f32     v15, s11, v6                            // 00000000033C: 0A1E0C0B
  v_mul_f32     v5, s10, v5                             // 000000000340: 0A0A0A0A
  v_mul_f32     v17, v17, v8                            // 000000000344: 0A221111
  v_mul_f32     v18, v18, v8                            // 000000000348: 0A241112
  v_mul_f32     v23, v19, v8                            // 00000000034C: 0A2E1113
  v_mul_f32     v20, v20, v14                           // 000000000350: 0A281D14
  v_mul_f32     v21, v21, v14                           // 000000000354: 0A2A1D15
  v_mul_f32     v24, v22, v14                           // 000000000358: 0A301D16
  v_cmp_ngt_f32  vcc, 1.0, v25                          // 00000000035C: 7C9632F2
  s_and_saveexec_b64  s[12:13], vcc                     // 000000000360: BE8C206A
  v_mov_b32     v8, 0                                   // 000000000364: 7E100280
  s_andn2_b64   exec, s[12:13], exec                    // 000000000368: 89FE7E0C
  s_cbranch_execz  label_00F8                           // 00000000036C: BF88001C
  s_load_dwordx8  s[32:39], s[16:17], 0x30              // 000000000370: C00E0808 00000030
  s_load_dwordx4  s[40:43], s[16:17], 0x50              // 000000000378: C00A0A08 00000050
  s_waitcnt     lgkmcnt(0)                              // 000000000380: BF8C007F
  image_sample_lz  v[17:18], v[17:20], s[32:39], s[40:43] dmask:0x3 // 000000000384: F09C0300 01481111
  s_waitcnt     vmcnt(0)                                // 00000000038C: BF8C0F70
  v_cmp_ngt_f32  vcc, v23, v17                          // 000000000390: 7C962317
  s_and_saveexec_b64  s[18:19], vcc                     // 000000000394: BE92206A
  v_mov_b32     v8, 1.0                                 // 000000000398: 7E1002F2
  s_andn2_b64   exec, s[18:19], exec                    // 00000000039C: 89FE7E12
  v_mad_f32     v18, -v17, v17, v18                     // 0000000003A0: D1C10012 244A2311
  s_cbranch_execz  label_00F8                           // 0000000003A8: BF88000D
  v_max_f32     v18, 0x3727c5ac, v18                    // 0000000003AC: 162424FF 3727C5AC
  v_mad_f32     v17, v19, v8, -v17                      // 0000000003B4: D1C10011 84461113
  v_mad_f32     v8, v17, v17, v18                       // 0000000003BC: D1C10008 044A2311
  v_rcp_f32     v8, v8                                  // 0000000003C4: 7E104508
  v_madak_f32   v8, v18, v8, 0xbe800000                 // 0000000003C8: 30101112 BE800000
  s_mov_b32     s1, 0x3faaaaab                          // 0000000003D0: BE8100FF 3FAAAAAB
  v_mul_f32     v8, v8, s1 clamp                        // 0000000003D8: D1058008 00000308
label_00F8:
  s_mov_b64     exec, s[12:13]                          // 0000000003E0: BEFE010C
  s_mov_b32     s1, 0x3f400000                          // 0000000003E4: BE8100FF 3F400000
  v_cmp_nlt_f32  vcc, s1, v25                           // 0000000003EC: 7C9C3201
  s_and_saveexec_b64  s[12:13], vcc                     // 0000000003F0: BE8C206A
  v_mov_b32     v14, 0                                  // 0000000003F4: 7E1C0280
  s_andn2_b64   exec, s[12:13], exec                    // 0000000003F8: 89FE7E0C
  s_cbranch_execz  label_011C                           // 0000000003FC: BF88001C
  s_load_dwordx8  s[32:39], s[16:17], 0x00              // 000000000400: C00E0808 00000000
  s_load_dwordx4  s[40:43], s[16:17], 0x20              // 000000000408: C00A0A08 00000020
  s_waitcnt     lgkmcnt(0)                              // 000000000410: BF8C007F
  image_sample_lz  v[17:18], v[20:23], s[32:39], s[40:43] dmask:0x3 // 000000000414: F09C0300 01481114
  s_waitcnt     vmcnt(0)                                // 00000000041C: BF8C0F70
  v_cmp_ngt_f32  vcc, v24, v17                          // 000000000420: 7C962318
  s_and_saveexec_b64  s[18:19], vcc                     // 000000000424: BE92206A
  v_mov_b32     v14, 1.0                                // 000000000428: 7E1C02F2
  s_andn2_b64   exec, s[18:19], exec                    // 00000000042C: 89FE7E12
  v_mad_f32     v18, -v17, v17, v18                     // 000000000430: D1C10012 244A2311
  s_cbranch_execz  label_011C                           // 000000000438: BF88000D
  v_max_f32     v18, 0x3727c5ac, v18                    // 00000000043C: 162424FF 3727C5AC
  v_mad_f32     v17, v22, v14, -v17                     // 000000000444: D1C10011 84461D16
  v_mad_f32     v14, v17, v17, v18                      // 00000000044C: D1C1000E 044A2311
  v_rcp_f32     v14, v14                                // 000000000454: 7E1C450E
  v_madak_f32   v14, v18, v14, 0xbe800000               // 000000000458: 301C1D12 BE800000
  s_mov_b32     s1, 0x3faaaaab                          // 000000000460: BE8100FF 3FAAAAAB
  v_mul_f32     v14, v14, s1 clamp                      // 000000000468: D105800E 0000030E
label_011C:
  s_mov_b64     exec, s[12:13]                          // 000000000470: BEFE010C
  s_load_dwordx2  s[12:13], s[14:15], 0x3c              // 000000000474: C0060307 0000003C
  s_waitcnt     lgkmcnt(0)                              // 00000000047C: BF8C007F
  s_and_b32     s1, s13, 0x0000ffff                     // 000000000480: 8601FF0D 0000FFFF
  s_andn2_b32   s13, s1, 0x3fff0000                     // 000000000488: 890DFF01 3FFF0000
  s_movk_i32    s14, 0xffff                             // 000000000490: B00EFFFF
  s_mov_b32     s15, 0x00024fac                         // 000000000494: BE8F00FF 00024FAC
  s_buffer_load_dwordx4  s[32:35], s[12:15], 0x00       // 00000000049C: C02A0806 00000000
  s_buffer_load_dwordx4  s[36:39], s[12:15], 0x10       // 0000000004A4: C02A0906 00000010
  s_buffer_load_dwordx4  s[40:43], s[12:15], 0x20       // 0000000004AC: C02A0A06 00000020
  s_buffer_load_dwordx4  s[44:47], s[12:15], 0x30       // 0000000004B4: C02A0B06 00000030
  s_waitcnt     lgkmcnt(0)                              // 0000000004BC: BF8C007F
  v_mul_f32     v17, s34, v12                           // 0000000004C0: 0A221822
  v_mac_f32     v17, s38, v13                           // 0000000004C4: 2C221A26
  v_mac_f32     v17, s42, v0                            // 0000000004C8: 2C22002A
  v_add_f32     v17, s46, v17                           // 0000000004CC: 0222222E
  v_max_f32     v18, 0x38d1b717, v17                    // 0000000004D0: 162422FF 38D1B717
  v_log_f32     v18, v18                                // 0000000004D8: 7E244312
  v_ceil_f32    v18, v18                                // 0000000004DC: 7E243B12
  s_mov_b32     s1, 0x40e00000                          // 0000000004E0: BE8100FF 40E00000
  v_med3_f32    v18, v18, -1.0, s1                      // 0000000004E8: D1D60012 0005E712
  v_min_f32     v19, -v18, 0                            // 0000000004F0: D10A0013 20010112
  v_exp_f32     v19, v19                                // 0000000004F8: 7E264113
  v_max_f32     v20, -v18, 0                            // 0000000004FC: D10B0014 20010112
  s_load_dwordx8  s[48:55], s[16:17], 0x90              // 000000000504: C00E0C08 00000090
  v_mul_f32     v21, s32, v12                           // 00000000050C: 0A2A1820
  v_mul_f32     v22, s33, v12                           // 000000000510: 0A2C1821
  v_mul_f32     v17, v17, v19 mul:2                     // 000000000514: D1050011 08022711
  v_add_f32     v20, -1.0, v20                          // 00000000051C: 022828F3
  s_load_dwordx4  s[32:35], s[16:17], 0xb0              // 000000000520: C00A0808 000000B0
  v_mac_f32     v21, s36, v13                           // 000000000528: 2C2A1A24
  v_mac_f32     v22, s37, v13                           // 00000000052C: 2C2C1A25
  v_add_f32     v17, v20, v17                           // 000000000530: 02222314
  v_mov_b32     v20, 0x3f7fbe77                         // 000000000534: 7E2802FF 3F7FBE77
  s_mov_b32     s1, 0x3a83126f                          // 00000000053C: BE8100FF 3A83126F
  v_mac_f32     v21, s40, v0                            // 000000000544: 2C2A0028
  v_mac_f32     v22, s41, v0                            // 000000000548: 2C2C0029
  v_med3_f32    v17, v17, s1, v20                       // 00000000054C: D1D60011 04500311
  v_add_f32     v18, 1.0, v18                           // 000000000554: 022424F2
  v_add_f32     v20, s44, v21                           // 000000000558: 02282A2C
  v_add_f32     v21, s45, v22                           // 00000000055C: 022A2C2D
  v_add_f32     v17, v17, v18                           // 000000000560: 02222511
  v_mad_f32     v18, v20, v19, 1.0 div:2                // 000000000564: D1C10012 1BCA2714
  v_mad_f32     v19, v21, v19, 1.0 div:2                // 00000000056C: D1C10013 1BCA2715
  v_mul_f32     v20, 0x3de38e39, v17                    // 000000000574: 0A2822FF 3DE38E39
  s_waitcnt     lgkmcnt(0)                              // 00000000057C: BF8C007F
  image_sample_lz  v[17:18], v[18:21], s[48:55], s[32:35] dmask:0x3 // 000000000580: F09C0300 010C1112
  v_sub_f32     v19, s60, v12                           // 000000000588: 0426183C
  v_sub_f32     v20, s61, v13                           // 00000000058C: 04281A3D
  v_mul_legacy_f32  v21, v19, v19                       // 000000000590: 082A2713
  v_sub_f32     v22, s62, v0                            // 000000000594: 042C003E
  v_mac_f32     v21, v20, v20                           // 000000000598: 2C2A2914
  v_mac_f32     v21, v22, v22                           // 00000000059C: 2C2A2D16
  v_rsq_f32     v21, v21                                // 0000000005A0: 7E2A4915
  v_mad_legacy_f32  v23, v19, v21, s28                  // 0000000005A4: D1C00017 00722B13
  v_mad_legacy_f32  v24, v20, v21, s29                  // 0000000005AC: D1C00018 00762B14
  v_mul_legacy_f32  v25, v23, v23                       // 0000000005B4: 08322F17
  v_mad_legacy_f32  v29, v22, v21, s30                  // 0000000005B8: D1C0001D 007A2B16
  v_mac_f32     v25, v24, v24                           // 0000000005C0: 2C323118
  v_mac_f32     v25, v29, v29                           // 0000000005C4: 2C323B1D
  v_rsq_f32     v25, v25                                // 0000000005C8: 7E324919
  v_mul_legacy_f32  v30, v19, v21                       // 0000000005CC: 083C2B13
  v_mul_legacy_f32  v23, v23, v25                       // 0000000005D0: 082E3317
  v_mul_legacy_f32  v31, v20, v21                       // 0000000005D4: 083E2B14
  v_mul_legacy_f32  v24, v24, v25                       // 0000000005D8: 08303318
  v_mul_f32     v32, v30, v23                           // 0000000005DC: 0A402F1E
  v_mul_legacy_f32  v33, v22, v21                       // 0000000005E0: 08422B16
  v_mul_legacy_f32  v25, v29, v25                       // 0000000005E4: 0832331D
  v_mac_f32     v32, v24, v31                           // 0000000005E8: 2C403F18
  v_mac_f32     v32, v25, v33                           // 0000000005EC: 2C404319
  v_med3_f32    v29, v32, s1, 1.0                       // 0000000005F0: D1D6001D 03C80320
  v_sub_f32     v29, 1.0, v29                           // 0000000005F8: 043A3AF2
  v_mul_f32     v32, v4, v30                            // 0000000005FC: 0A403D04
  v_mul_f32     v34, s28, v4                            // 000000000600: 0A44081C
  v_log_f32     v29, v29                                // 000000000604: 7E3A431D
  v_mov_b32     v35, 0x3fa00000                         // 000000000608: 7E4602FF 3FA00000
  s_mov_b32     s5, 0x3f400000                          // 000000000610: BE8500FF 3F400000
  v_mov_b32     v36, 0x3e800000                         // 000000000618: 7E4802FF 3E800000
  v_mac_f32     v32, v7, v31                            // 000000000620: 2C403F07
  v_mac_f32     v34, s29, v7                            // 000000000624: 2C440E1D
  v_mul_f32     v23, v4, v23                            // 000000000628: 0A2E2F04
  v_mac_f32     v35, s5, v5                             // 00000000062C: 2C460A05
  v_mac_f32     v36, s5, v5                             // 000000000630: 2C480A05
  v_mac_f32     v32, v1, v33                            // 000000000634: 2C404301
  v_mac_f32     v34, s30, v1                            // 000000000638: 2C44021E
  v_mac_f32     v23, v7, v24                            // 00000000063C: 2C2E3107
  v_mul_f32     v5, v35, v35                            // 000000000640: 0A0A4723
  s_mov_b32     s5, 0xbe000000                          // 000000000644: BE8500FF BE000000
  v_med3_f32    v24, v32, s1, 1.0                       // 00000000064C: D1D60018 03C80320
  v_med3_f32    v32, v34, s1, 1.0                       // 000000000654: D1D60020 03C80322
  v_mac_f32     v23, v1, v25                            // 00000000065C: 2C2E3301
  s_mov_b32     s1, 0x38d1b717                          // 000000000660: BE8100FF 38D1B717
  v_mul_f32     v25, v36, v36                           // 000000000668: 0A324924
  v_mad_f32     v34, v5, s5, 1.0                        // 00000000066C: D1C10022 03C80B05
  v_mul_legacy_f32  v29, 0x40a00000, v29                // 000000000674: 083A3AFF 40A00000
  v_med3_f32    v23, v23, s1, 1.0                       // 00000000067C: D1D60017 03C80317
  v_mul_f32     v25, v25, v25                           // 000000000684: 0A323319
  v_mul_f32     v24, v24, v34                           // 000000000688: 0A304518
  v_mul_f32     v35, v32, v34                           // 00000000068C: 0A464520
  v_mov_b32     v36, 0xbd23d70a                         // 000000000690: 7E4802FF BD23D70A
  v_exp_f32     v29, v29                                // 000000000698: 7E3A411D
  v_mad_f32     v37, v23, v25, -v23                     // 00000000069C: D1C10025 845E3317
  v_madmk_f32   v24, v5, 0x3e000000, v24                // 0000000006A4: 2E303105 3E000000
  v_madmk_f32   v35, v5, 0x3e000000, v35                // 0000000006AC: 2E464705 3E000000
  v_mad_f32     v9, v9, s2, v36                         // 0000000006B4: D1C10009 04900509
  v_mad_f32     v10, v10, s3, v36                       // 0000000006BC: D1C1000A 0490070A
  v_mac_f32     v36, s4, v11                            // 0000000006C4: 2C481604
  s_mov_b32     s1, 0x3d23d70a                          // 0000000006C8: BE8100FF 3D23D70A
  v_mad_f32     v11, v23, v37, 1.0                      // 0000000006D0: D1C1000B 03CA4B17
  v_mul_f32     v23, v24, v35                           // 0000000006D8: 0A2E4718
  v_mov_b32     v35, 0xbf400000                         // 0000000006DC: 7E4602FF BF400000
  v_mad_legacy_f32  v9, v9, v15, s1                     // 0000000006E4: D1C00009 00061F09
  v_mad_legacy_f32  v10, v10, v15, s1                   // 0000000006EC: D1C0000A 00061F0A
  v_mad_legacy_f32  v15, v36, v15, s1                   // 0000000006F4: D1C0000F 00061F24
  v_mul_f32     v36, 0x40490f5d, v11                    // 0000000006FC: 0A4816FF 40490F5D
  v_max_f32     v23, 0x3a83126f, v23                    // 000000000704: 162E2EFF 3A83126F
  v_mad_f32     v35, s0, v16, v35 mul:4 clamp           // 00000000070C: D1C18023 148E2000
  v_subrev_f32  v14, v8, v14                            // 000000000714: 061C1D08
  v_sub_f32     v37, 1.0, v9                            // 000000000718: 044A12F2
  v_sub_f32     v38, 1.0, v10                           // 00000000071C: 044C14F2
  v_sub_f32     v39, 1.0, v15                           // 000000000720: 044E1EF2
  v_mul_f32     v11, v11, v36                           // 000000000724: 0A16490B
  v_rcp_f32     v23, v23                                // 000000000728: 7E2E4517
  v_mad_legacy_f32  v8, v14, v35, v8                    // 00000000072C: D1C00008 0422470E
  v_mad_legacy_f32  v14, v37, v29, v9                   // 000000000734: D1C0000E 04263B25
  v_mad_legacy_f32  v35, v38, v29, v10                  // 00000000073C: D1C00023 042A3B26
  v_mad_legacy_f32  v29, v39, v29, v15                  // 000000000744: D1C0001D 043E3B27
  v_mul_f32     v36, s24, v32                           // 00000000074C: 0A484018
  v_mul_f32     v40, s25, v32                           // 000000000750: 0A504019
  v_mul_f32     v32, s26, v32                           // 000000000754: 0A40401A
  v_rcp_f32     v11, v11                                // 000000000758: 7E16450B
  v_mul_f32     v36, v8, v36                            // 00000000075C: 0A484908
  v_mul_f32     v40, v8, v40                            // 000000000760: 0A505108
  v_mul_f32     v8, v8, v32                             // 000000000764: 0A104108
  v_sub_f32     v32, 1.0, v14                           // 000000000768: 04401CF2
  v_sub_f32     v41, 1.0, v35                           // 00000000076C: 045246F2
  v_sub_f32     v42, 1.0, v29                           // 000000000770: 04543AF2
  v_mul_f32     v32, v36, v32                           // 000000000774: 0A404124
  v_mul_f32     v41, v40, v41                           // 000000000778: 0A525328
  v_mul_f32     v42, v8, v42                            // 00000000077C: 0A545508
  v_mov_b32     v43, 0x3dcccccd                         // 000000000780: 7E5602FF 3DCCCCCD
  s_mov_b32     s0, 0x3ea2f9e9                          // 000000000788: BE8000FF 3EA2F9E9
  v_mul_f32     v23, 0x3e800000, v23                    // 000000000790: 0A2E2EFF 3E800000
  v_mad_f32     v32, v32, s0, v43                       // 000000000798: D1C10020 04AC0120
  v_mad_f32     v41, v41, s0, v43                       // 0000000007A0: D1C10029 04AC0129
  v_mac_f32     v43, s0, v42                            // 0000000007A8: 2C565400
  v_mul_f32     v11, v25, v11                           // 0000000007AC: 0A161719
  v_mul_f32     v14, v14, v23                           // 0000000007B0: 0A1C2F0E
  v_mul_f32     v35, v35, v23                           // 0000000007B4: 0A462F23
  v_mul_f32     v23, v29, v23                           // 0000000007B8: 0A2E2F1D
  v_mul_f32     v29, v26, v32                           // 0000000007BC: 0A3A411A
  v_mul_f32     v32, v27, v41                           // 0000000007C0: 0A40531B
  v_mul_f32     v41, v28, v43                           // 0000000007C4: 0A52571C
  v_mad_f32     v6, -v6, s11, 1.0                       // 0000000007C8: D1C10006 23C81706
  v_mul_f32     v14, v11, v14                           // 0000000007D0: 0A1C1D0B
  v_mul_f32     v35, v11, v35                           // 0000000007D4: 0A46470B
  v_mul_f32     v11, v11, v23                           // 0000000007D8: 0A162F0B
  v_mul_f32     v23, v29, v6                            // 0000000007DC: 0A2E0D1D
  v_mul_f32     v29, v32, v6                            // 0000000007E0: 0A3A0D20
  v_mul_f32     v32, v41, v6                            // 0000000007E4: 0A400D29
  s_load_dwordx8  s[24:31], s[16:17], 0xc0              // 0000000007E8: C00E0608 000000C0
  s_load_dwordx4  s[0:3], s[16:17], 0xe0                // 0000000007F0: C00A0008 000000E0
  v_mac_f32     v23, v36, v14                           // 0000000007F8: 2C2E1D24
  v_mac_f32     v29, v40, v35                           // 0000000007FC: 2C3A4728
  v_mac_f32     v32, v8, v11                            // 000000000800: 2C401708
  s_and_b64     exec, exec, s[64:65]                    // 000000000804: 86FE407E
  s_waitcnt     vmcnt(0)                                // 000000000808: BF8C0F70
  v_mov_b32     v52, v17                                // 00000000080C: 7E680311
  s_orn2_saveexec_b64  s[4:5], 0                        // 000000000810: BE842480
  v_mov_b32     v52, 0                                  // 000000000814: 7E680280
  s_nand_b64    exec, 0, 0                              // 000000000818: 8BFE8080
  s_nop         0x0000                                  // 00000000081C: BF800000
  v_or_b32      v52, v52, v52 row_shr:1                 // 000000000820: 286868FA FF011134
  s_nop         0x0001                                  // 000000000828: BF800001
  v_or_b32      v52, v52, v52 row_shr:2                 // 00000000082C: 286868FA FF011234
  s_nop         0x0001                                  // 000000000834: BF800001
  v_or_b32      v52, v52, v52 row_shr:4                 // 000000000838: 286868FA FF011434
  s_nop         0x0001                                  // 000000000840: BF800001
  v_or_b32      v52, v52, v52 row_shr:8                 // 000000000844: 286868FA FF011834
  s_nop         0x0001                                  // 00000000084C: BF800001
  v_or_b32      v52, v52, v52 row_bcast:15 row_mask:0xa // 000000000850: 286868FA AF014234
  s_nop         0x0001                                  // 000000000858: BF800001
  v_or_b32      v52, v52, v52 row_bcast:31 row_mask:0xc // 00000000085C: 286868FA CF014334
  s_mov_b64     exec, s[4:5]                            // 000000000864: BEFE0104
  v_readlane_b32  s4, v52, 63                           // 000000000868: D2890004 00017F34
  v_mov_b32     v8, 0                                   // 000000000870: 7E100280
  v_mov_b32     v11, 0                                  // 000000000874: 7E160280
  v_mov_b32     v14, 0                                  // 000000000878: 7E1C0280
  s_nop         0x0000                                  // 00000000087C: BF800000
label_0220:
  s_cmp_eq_i32  s4, 0                                   // 000000000880: BF008004
  s_cbranch_scc1  label_030A                            // 000000000884: BF8500E8
  s_ff1_i32_b32  s5, s4                                 // 000000000888: BE851004
  s_lshl_b32    s9, s5, 6                               // 00000000088C: 8E098605
  s_add_u32     s10, s9, 0x00000c40                     // 000000000890: 800AFF09 00000C40
  s_buffer_load_dwordx8  s[32:39], s[12:15], s10        // 000000000898: C02C0806 0000000A
  s_add_u32     s9, s9, 0x00000c60                      // 0000000008A0: 8009FF09 00000C60
  s_buffer_load_dwordx8  s[40:47], s[12:15], s9         // 0000000008A8: C02C0A06 00000009
  s_mul_i32     s9, s5, 48                              // 0000000008B0: 9209B005
  s_add_u32     s10, s9, 0x00000050                     // 0000000008B4: 800AFF09 00000050
  s_add_u32     s11, s9, 0x00000060                     // 0000000008BC: 800BFF09 00000060
  s_waitcnt     lgkmcnt(0)                              // 0000000008C4: BF8C007F
  v_mul_f32     v17, s35, v12                           // 0000000008C8: 0A221823
  v_mac_f32     v17, s39, v13                           // 0000000008CC: 2C221A27
  v_mac_f32     v17, s43, v0                            // 0000000008D0: 2C22002B
  v_add_f32     v17, s47, v17                           // 0000000008D4: 0222222F
  v_mul_f32     v35, s32, v12                           // 0000000008D8: 0A461820
  v_mul_f32     v36, s33, v12                           // 0000000008DC: 0A481821
  v_rcp_f32     v17, v17                                // 0000000008E0: 7E224511
  v_mac_f32     v35, s36, v13                           // 0000000008E4: 2C461A24
  v_mac_f32     v36, s37, v13                           // 0000000008E8: 2C481A25
  v_mac_f32     v35, s40, v0                            // 0000000008EC: 2C460028
  v_mac_f32     v36, s41, v0                            // 0000000008F0: 2C480029
  v_add_f32     v35, s44, v35                           // 0000000008F4: 0246462C
  v_add_f32     v36, s45, v36                           // 0000000008F8: 0248482D
  v_mul_f32     v35, v35, v17                           // 0000000008FC: 0A462323
  v_mul_f32     v36, v36, v17                           // 000000000900: 0A482324
  image_sample_lz  v[35:36], v[35:38], s[24:31], s[0:3] dmask:0x3 // 000000000904: F09C0300 00062323
  s_buffer_load_dwordx4  s[32:35], s[12:15], s10        // 00000000090C: C0280806 0000000A
  s_buffer_load_dwordx4  s[36:39], s[12:15], s11        // 000000000914: C0280906 0000000B
  s_waitcnt     lgkmcnt(0)                              // 00000000091C: BF8C007F
  v_subrev_f32  v17, s32, v12                           // 000000000920: 06221820
  v_subrev_f32  v40, s33, v13                           // 000000000924: 06501A21
  v_mul_f32     v41, s36, v17                           // 000000000928: 0A522224
  v_subrev_f32  v42, s34, v0                            // 00000000092C: 06540022
  v_mac_f32     v41, s37, v40                           // 000000000930: 2C525025
  v_mac_f32     v41, s38, v42                           // 000000000934: 2C525426
  s_waitcnt     vmcnt(0)                                // 000000000938: BF8C0F70
  v_cmp_ngt_f32  vcc, v41, v35                          // 00000000093C: 7C964729
  s_and_saveexec_b64  s[10:11], vcc                     // 000000000940: BE8A206A
  v_mov_b32     v35, 1.0                                // 000000000944: 7E4602F2
  s_andn2_b64   exec, s[10:11], exec                    // 000000000948: 89FE7E0A
  v_mad_f32     v36, -v35, v35, v36                     // 00000000094C: D1C10024 24924723
  s_cbranch_execz  label_0262                           // 000000000954: BF88000C
  v_max_f32     v36, 0x3727c5ac, v36                    // 000000000958: 164848FF 3727C5AC
  v_subrev_f32  v35, v35, v41                           // 000000000960: 06465323
  v_mad_f32     v35, v35, v35, v36                      // 000000000964: D1C10023 04924723
  v_rcp_f32     v35, v35                                // 00000000096C: 7E464523
  v_madak_f32   v35, v36, v35, 0xbe800000               // 000000000970: 30464724 BE800000
  s_mov_b32     s18, 0x3faaaaab                         // 000000000978: BE9200FF 3FAAAAAB
  v_mul_f32     v35, v35, s18 clamp                     // 000000000980: D1058023 00002523
label_0262:
  s_mov_b64     exec, s[10:11]                          // 000000000988: BEFE010A
  v_sub_f32     v36, s32, v12                           // 00000000098C: 04481820
  v_sub_f32     v41, s33, v13                           // 000000000990: 04521A21
  v_mul_legacy_f32  v43, v36, v36                       // 000000000994: 08564924
  v_sub_f32     v44, s34, v0                            // 000000000998: 04580022
  v_mad_f32     v45, v41, v41, v43                      // 00000000099C: D1C1002D 04AE5329
  v_mac_f32     v45, v44, v44                           // 0000000009A4: 2C5A592C
  v_rsq_f32     v45, v45                                // 0000000009A8: 7E5A492D
  v_mul_legacy_f32  v36, v36, v45                       // 0000000009AC: 08485B24
  v_mul_legacy_f32  v46, v41, v45                       // 0000000009B0: 085C5B29
  v_mad_legacy_f32  v47, v19, v21, v36                  // 0000000009B4: D1C0002F 04922B13
  v_mul_legacy_f32  v45, v44, v45                       // 0000000009BC: 085A5B2C
  v_mad_legacy_f32  v48, v20, v21, v46                  // 0000000009C0: D1C00030 04BA2B14
  v_mul_legacy_f32  v49, v47, v47                       // 0000000009C8: 08625F2F
  v_mad_legacy_f32  v50, v22, v21, v45                  // 0000000009CC: D1C00032 04B62B16
  v_mac_f32     v49, v48, v48                           // 0000000009D4: 2C626130
  v_mac_f32     v49, v50, v50                           // 0000000009D8: 2C626532
  v_rsq_f32     v49, v49                                // 0000000009DC: 7E624931
  v_mul_legacy_f32  v47, v47, v49                       // 0000000009E0: 085E632F
  v_mul_legacy_f32  v48, v48, v49                       // 0000000009E4: 08606330
  v_mul_f32     v51, v30, v47                           // 0000000009E8: 0A665F1E
  v_mul_legacy_f32  v49, v50, v49                       // 0000000009EC: 08626332
  v_mac_f32     v51, v48, v31                           // 0000000009F0: 2C663F30
  v_mac_f32     v51, v49, v33                           // 0000000009F4: 2C664331
  s_mov_b32     s10, 0x3a83126f                         // 0000000009F8: BE8A00FF 3A83126F
  v_mul_legacy_f32  v50, v17, v17                       // 000000000A00: 08642311
  s_add_u32     s11, s9, 0x0000005c                     // 000000000A04: 800BFF09 0000005C
  v_med3_f32    v51, v51, s10, 1.0                      // 000000000A0C: D1D60033 03C81533
  s_buffer_load_dword  s11, s[12:15], s11               // 000000000A14: C02002C6 0000000B
  v_mac_f32     v50, v40, v40                           // 000000000A1C: 2C645128
  s_add_u32     s18, s9, 0x0000004c                     // 000000000A20: 8012FF09 0000004C
  v_sub_f32     v51, 1.0, v51                           // 000000000A28: 046666F2
  s_buffer_load_dword  s18, s[12:15], s18               // 000000000A2C: C0200486 00000012
  v_mac_f32     v50, v42, v42                           // 000000000A34: 2C64552A
  v_mul_f32     v36, v4, v36                            // 000000000A38: 0A484904
  v_log_f32     v51, v51                                // 000000000A3C: 7E664333
  v_mad_legacy_f32  v41, v41, v41, v43                  // 000000000A40: D1C00029 04AE5329
  v_rsq_f32     v43, v50                                // 000000000A48: 7E564932
  v_mac_f32     v36, v7, v46                            // 000000000A4C: 2C485D07
  v_mul_f32     v46, v4, v47                            // 000000000A50: 0A5C5F04
  v_mad_legacy_f32  v41, v44, v44, v41                  // 000000000A54: D1C00029 04A6592C
  s_add_u32     s19, s9, 0x0000006c                     // 000000000A5C: 8013FF09 0000006C
  v_mac_f32     v36, v1, v45                            // 000000000A64: 2C485B01
  v_mac_f32     v46, v7, v48                            // 000000000A68: 2C5C6107
  s_buffer_load_dword  s19, s[12:15], s19               // 000000000A6C: C02004C6 00000013
  v_sqrt_f32    v41, v41                                // 000000000A74: 7E524F29
  v_med3_f32    v36, v36, s10, 1.0                      // 000000000A78: D1D60024 03C81524
  v_mac_f32     v46, v1, v49                            // 000000000A80: 2C5C6301
  s_mov_b32     s10, 0x38d1b717                         // 000000000A84: BE8A00FF 38D1B717
  s_waitcnt     lgkmcnt(0)                              // 000000000A8C: BF8C007F
  v_mov_b32     v44, s11                                // 000000000A90: 7E58020B
  v_mul_legacy_f32  v45, 0x40a00000, v51                // 000000000A94: 085A66FF 40A00000
  v_med3_f32    v46, v46, s10, 1.0                      // 000000000A9C: D1D6002E 03C8152E
  v_mul_f32     v47, v34, v36                           // 000000000AA4: 0A5E4922
  v_mul_legacy_f32  v17, v17, v43                       // 000000000AA8: 08225711
  v_subrev_f32  v44, s18, v44                           // 000000000AAC: 06585812
  v_exp_f32     v45, v45                                // 000000000AB0: 7E5A412D
  v_mad_f32     v48, v25, v46, -v46                     // 000000000AB4: D1C10030 84BA5D19
  v_madmk_f32   v47, v5, 0x3e000000, v47                // 000000000ABC: 2E5E5F05 3E000000
  v_mul_legacy_f32  v40, v40, v43                       // 000000000AC4: 08505728
  v_mul_f32     v17, s36, v17                           // 000000000AC8: 0A222224
  v_rcp_f32     v44, v44                                // 000000000ACC: 7E58452C
  v_mad_f32     v46, v46, v48, 1.0                      // 000000000AD0: D1C1002E 03CA612E
  v_mul_f32     v47, v24, v47                           // 000000000AD8: 0A5E5F18
  v_max_f32     v41, 0x3dcccccd, v41                    // 000000000ADC: 165252FF 3DCCCCCD
  v_mul_legacy_f32  v42, v42, v43                       // 000000000AE4: 0854572A
  v_mac_f32     v17, s37, v40                           // 000000000AE8: 2C225025
  v_mov_b32     v40, 0xbf666666                         // 000000000AEC: 7E5002FF BF666666
  v_mul_f32     v43, 0x40490f5d, v46                    // 000000000AF4: 0A565CFF 40490F5D
  v_max_f32     v47, 0x3a83126f, v47                    // 000000000AFC: 165E5EFF 3A83126F
  v_mac_f32     v17, s38, v42                           // 000000000B04: 2C225426
  v_mac_f32     v40, s19, v41                           // 000000000B08: 2C505213
  s_mov_b32     s10, 0x411ffffe                         // 000000000B0C: BE8A00FF 411FFFFE
  v_mul_f32     v42, v46, v43                           // 000000000B14: 0A54572E
  v_rcp_f32     v43, v47                                // 000000000B18: 7E56452F
  v_subrev_f32  v17, s18, v17                           // 000000000B1C: 06222212
  v_mul_f32     v40, v40, s10 clamp                     // 000000000B20: D1058028 00001528
  s_add_u32     s9, s9, 64                              // 000000000B28: 8009C009
  v_mul_f32     v41, v41, v41                           // 000000000B2C: 0A525329
  v_mad_legacy_f32  v46, v39, v45, v15                  // 000000000B30: D1C0002E 043E5B27
  v_rcp_f32     v42, v42                                // 000000000B38: 7E54452A
  s_buffer_load_dwordx4  s[32:35], s[12:15], s9         // 000000000B3C: C0280806 00000009
  v_mul_f32     v17, v17, v44 clamp                     // 000000000B44: D1058011 00025911
  v_mul_f32     v44, v40, v40                           // 000000000B4C: 0A585128
  v_madak_f32   v40, -2.0, v40, 0x40400000              // 000000000B50: 305050F5 40400000
  v_rcp_f32     v41, v41                                // 000000000B58: 7E524529
  v_mad_legacy_f32  v47, v37, v45, v9                   // 000000000B5C: D1C0002F 04265B25
  v_mad_legacy_f32  v45, v38, v45, v10                  // 000000000B64: D1C0002D 042A5B26
  v_sub_f32     v48, 1.0, v46                           // 000000000B6C: 04605CF2
  v_mul_f32     v49, v17, v17                           // 000000000B70: 0A622311
  v_madak_f32   v17, -2.0, v17, 0x40400000              // 000000000B74: 302222F5 40400000
  v_mad_f32     v40, -v44, v40, 1.0                     // 000000000B7C: D1C10028 23CA512C
  v_sub_f32     v44, 1.0, v47                           // 000000000B84: 04585EF2
  v_sub_f32     v50, 1.0, v45                           // 000000000B88: 04645AF2
  v_mul_f32     v48, v36, v48                           // 000000000B8C: 0A606124
  v_mul_f32     v51, 0x3ea2f9e9, v36                    // 000000000B90: 0A6648FF 3EA2F9E9
  v_mul_f32     v17, v49, v17                           // 000000000B98: 0A222331
  v_mul_f32     v35, v35, v40                           // 000000000B9C: 0A465123
  v_mul_f32     v40, 0x3e800000, v43                    // 000000000BA0: 0A5056FF 3E800000
  v_mul_f32     v43, v51, v44                           // 000000000BA8: 0A565933
  v_mul_f32     v44, v51, v50                           // 000000000BAC: 0A586533
  v_mul_f32     v48, 0x3ea2f9e9, v48                    // 000000000BB0: 0A6060FF 3EA2F9E9
  v_mul_f32     v17, v17, v35                           // 000000000BB8: 0A224711
  v_mul_f32     v35, v25, v42                           // 000000000BBC: 0A465519
  v_mul_f32     v42, v47, v40                           // 000000000BC0: 0A54512F
  v_mul_f32     v45, v45, v40                           // 000000000BC4: 0A5A512D
  v_mul_f32     v40, v46, v40                           // 000000000BC8: 0A50512E
  v_mul_f32     v43, v26, v43                           // 000000000BCC: 0A56571A
  v_mul_f32     v44, v27, v44                           // 000000000BD0: 0A58591B
  v_mul_f32     v46, v28, v48                           // 000000000BD4: 0A5C611C
  v_mul_f32     v17, v17, v41                           // 000000000BD8: 0A225311
  v_mul_f32     v41, v35, v42                           // 000000000BDC: 0A525523
  v_mul_f32     v42, v35, v45                           // 000000000BE0: 0A545B23
  v_mul_f32     v35, v35, v40                           // 000000000BE4: 0A465123
  v_mul_f32     v40, v6, v43                            // 000000000BE8: 0A505706
  v_mul_f32     v43, v6, v44                            // 000000000BEC: 0A565906
  v_mul_f32     v44, v6, v46                            // 000000000BF0: 0A585D06
  s_waitcnt     lgkmcnt(0)                              // 000000000BF4: BF8C007F
  v_mul_f32     v45, s32, v17                           // 000000000BF8: 0A5A2220
  v_mul_f32     v46, s33, v17                           // 000000000BFC: 0A5C2221
  v_mul_f32     v17, s34, v17                           // 000000000C00: 0A222222
  v_mac_f32     v40, v36, v41                           // 000000000C04: 2C505324
  v_mac_f32     v43, v36, v42                           // 000000000C08: 2C565524
  v_mac_f32     v44, v36, v35                           // 000000000C0C: 2C584724
  s_lshl_b32    s5, 1, s5                               // 000000000C10: 8E050581
  v_mac_f32     v8, v45, v40                            // 000000000C14: 2C10512D
  v_mac_f32     v11, v46, v43                           // 000000000C18: 2C16572E
  v_mac_f32     v14, v17, v44                           // 000000000C1C: 2C1C5911
  s_xor_b32     s4, s4, s5                              // 000000000C20: 88040504
  s_branch      label_0220                              // 000000000C24: BF82FF16
label_030A:
  s_waitcnt     lgkmcnt(0)                              // 000000000C28: BF8C007F
  s_load_dwordx8  s[24:31], s[16:17], 0xf0              // 000000000C2C: C00E0608 000000F0
  s_load_dwordx4  s[0:3], s[16:17], 0x110               // 000000000C34: C00A0008 00000110
  s_and_b64     exec, exec, s[64:65]                    // 000000000C3C: 86FE407E
  v_mov_b32     v52, v18                                // 000000000C40: 7E680312
  s_orn2_saveexec_b64  s[4:5], 0                        // 000000000C44: BE842480
  v_mov_b32     v52, 0                                  // 000000000C48: 7E680280
  s_nand_b64    exec, 0, 0                              // 000000000C4C: 8BFE8080
  s_nop         0x0000                                  // 000000000C50: BF800000
  v_or_b32      v52, v52, v52 row_shr:1                 // 000000000C54: 286868FA FF011134
  s_nop         0x0001                                  // 000000000C5C: BF800001
  v_or_b32      v52, v52, v52 row_shr:2                 // 000000000C60: 286868FA FF011234
  s_nop         0x0001                                  // 000000000C68: BF800001
  v_or_b32      v52, v52, v52 row_shr:4                 // 000000000C6C: 286868FA FF011434
  s_nop         0x0001                                  // 000000000C74: BF800001
  v_or_b32      v52, v52, v52 row_shr:8                 // 000000000C78: 286868FA FF011834
  s_nop         0x0001                                  // 000000000C80: BF800001
  v_or_b32      v52, v52, v52 row_bcast:15 row_mask:0xa // 000000000C84: 286868FA AF014234
  s_nop         0x0001                                  // 000000000C8C: BF800001
  v_or_b32      v52, v52, v52 row_bcast:31 row_mask:0xc // 000000000C90: 286868FA CF014334
  s_mov_b64     exec, s[4:5]                            // 000000000C98: BEFE0104
  v_readlane_b32  s4, v52, 63                           // 000000000C9C: D2890004 00017F34
  s_nop         0x0000                                  // 000000000CA4: BF800000
  s_nop         0x0000                                  // 000000000CA8: BF800000
  s_nop         0x0000                                  // 000000000CAC: BF800000
  s_nop         0x0000                                  // 000000000CB0: BF800000
  s_nop         0x0000                                  // 000000000CB4: BF800000
  s_nop         0x0000                                  // 000000000CB8: BF800000
  s_nop         0x0000                                  // 000000000CBC: BF800000
label_0330:
  s_cmp_eq_i32  s4, 0                                   // 000000000CC0: BF008004
  s_cbranch_scc1  label_03F6                            // 000000000CC4: BF8500C4
  s_ff1_i32_b32  s5, s4                                 // 000000000CC8: BE851004
  s_mul_i32     s9, s5, 48                              // 000000000CCC: 9209B005
  s_add_u32     s10, s9, 0x00000650                     // 000000000CD0: 800AFF09 00000650
  s_buffer_load_dwordx4  s[32:35], s[12:15], s10        // 000000000CD8: C0280806 0000000A
  s_lshl_b32    s10, s5, 5                              // 000000000CE0: 8E0A8505
  s_add_u32     s10, s10, 0x00001450                    // 000000000CE4: 800AFF0A 00001450
  s_buffer_load_dword  s10, s[12:15], s10               // 000000000CEC: C0200286 0000000A
  s_waitcnt     lgkmcnt(0)                              // 000000000CF4: BF8C007F
  v_subrev_f32  v17, s32, v12                           // 000000000CF8: 06221820
  v_subrev_f32  v18, s33, v13                           // 000000000CFC: 06241A21
  v_subrev_f32  v35, s34, v0                            // 000000000D00: 06460022
  v_cubema_f32  v36, v17, v18, v35                      // 000000000D04: D1C70024 048E2511
  v_rcp_f32     v36, abs(v36)                           // 000000000D0C: D1620124 00000124
  v_cubetc_f32  v40, v17, v18, v35                      // 000000000D14: D1C60028 048E2511
  v_cubesc_f32  v41, v17, v18, v35                      // 000000000D1C: D1C50029 048E2511
  v_cubeid_f32  v42, v17, v18, v35                      // 000000000D24: D1C4002A 048E2511
  v_rndne_f32   v43, s10                                // 000000000D2C: 7E563C0A
  v_madak_f32   v45, v40, v36, 0x3fc00000               // 000000000D30: 305A4928 3FC00000
  v_madak_f32   v44, v41, v36, 0x3fc00000               // 000000000D38: 30584929 3FC00000
  v_madmk_f32   v46, v43, 0x41000000, v42               // 000000000D40: 2E5C552B 41000000
  image_sample_lz  v[40:41], v[44:47], s[24:31], s[0:3] dmask:0x3 da // 000000000D48: F09C4300 0006282C
  v_mul_legacy_f32  v36, v17, v17                       // 000000000D50: 08482311
  v_mad_f32     v42, v18, v18, v36                      // 000000000D54: D1C1002A 04922512
  v_mac_f32     v42, v35, v35                           // 000000000D5C: 2C544723
  v_rsq_f32     v42, v42                                // 000000000D60: 7E54492A
  v_max3_f32    v43, abs(v17), abs(v18), abs(v35)       // 000000000D64: D1D3072B 048E2511
  v_mul_legacy_f32  v17, -v17, v42                      // 000000000D6C: D1040011 20025511
  v_mul_legacy_f32  v44, -v18, v42                      // 000000000D74: D104002C 20025512
  v_mul_legacy_f32  v42, -v35, v42                      // 000000000D7C: D104002A 20025523
  s_waitcnt     vmcnt(0)                                // 000000000D84: BF8C0F70
  v_cmp_ngt_f32  vcc, v43, v40                          // 000000000D88: 7C96512B
  s_and_saveexec_b64  s[10:11], vcc                     // 000000000D8C: BE8A206A
  v_mov_b32     v40, 1.0                                // 000000000D90: 7E5002F2
  s_andn2_b64   exec, s[10:11], exec                    // 000000000D94: 89FE7E0A
  v_mad_f32     v41, -v40, v40, v41                     // 000000000D98: D1C10029 24A65128
  s_cbranch_execz  label_0375                           // 000000000DA0: BF88000C
  v_max_f32     v41, 0x3727c5ac, v41                    // 000000000DA4: 165252FF 3727C5AC
  v_sub_f32     v40, v43, v40                           // 000000000DAC: 0450512B
  v_mad_f32     v40, v40, v40, v41                      // 000000000DB0: D1C10028 04A65128
  v_rcp_f32     v40, v40                                // 000000000DB8: 7E504528
  v_madak_f32   v40, v41, v40, 0xbe800000               // 000000000DBC: 30505129 BE800000
  s_mov_b32     s18, 0x3faaaaab                         // 000000000DC4: BE9200FF 3FAAAAAB
  v_mul_f32     v40, v40, s18 clamp                     // 000000000DCC: D1058028 00002528
label_0375:
  s_mov_b64     exec, s[10:11]                          // 000000000DD4: BEFE010A
  v_mad_legacy_f32  v41, v19, v21, v17                  // 000000000DD8: D1C00029 04462B13
  v_mad_legacy_f32  v43, v20, v21, v44                  // 000000000DE0: D1C0002B 04B22B14
  v_mul_legacy_f32  v45, v41, v41                       // 000000000DE8: 085A5329
  v_mad_legacy_f32  v46, v22, v21, v42                  // 000000000DEC: D1C0002E 04AA2B16
  v_mac_f32     v45, v43, v43                           // 000000000DF4: 2C5A572B
  v_mac_f32     v45, v46, v46                           // 000000000DF8: 2C5A5D2E
  v_rsq_f32     v45, v45                                // 000000000DFC: 7E5A492D
  v_mul_legacy_f32  v41, v41, v45                       // 000000000E00: 08525B29
  v_mul_legacy_f32  v43, v43, v45                       // 000000000E04: 08565B2B
  v_mul_f32     v47, v30, v41                           // 000000000E08: 0A5E531E
  v_mul_legacy_f32  v45, v46, v45                       // 000000000E0C: 085A5B2E
  v_mac_f32     v47, v43, v31                           // 000000000E10: 2C5E3F2B
  v_mac_f32     v47, v45, v33                           // 000000000E14: 2C5E432D
  s_mov_b32     s10, 0x3a83126f                         // 000000000E18: BE8A00FF 3A83126F
  v_med3_f32    v46, v47, s10, 1.0                      // 000000000E20: D1D6002E 03C8152F
  v_sub_f32     v46, 1.0, v46                           // 000000000E28: 045C5CF2
  v_mul_f32     v17, v4, v17                            // 000000000E2C: 0A222304
  v_log_f32     v46, v46                                // 000000000E30: 7E5C432E
  v_mac_f32     v17, v7, v44                            // 000000000E34: 2C225907
  v_mul_f32     v41, v4, v41                            // 000000000E38: 0A525304
  v_mad_legacy_f32  v18, v18, v18, v36                  // 000000000E3C: D1C00012 04922512
  v_mac_f32     v17, v1, v42                            // 000000000E44: 2C225501
  v_mac_f32     v41, v7, v43                            // 000000000E48: 2C525707
  v_mad_legacy_f32  v18, v35, v35, v18                  // 000000000E4C: D1C00012 044A4723
  s_add_u32     s11, s9, 0x0000066c                     // 000000000E54: 800BFF09 0000066C
  v_med3_f32    v17, v17, s10, 1.0                      // 000000000E5C: D1D60011 03C81511
  v_mac_f32     v41, v1, v45                            // 000000000E64: 2C525B01
  s_mov_b32     s10, 0x38d1b717                         // 000000000E68: BE8A00FF 38D1B717
  s_buffer_load_dword  s11, s[12:15], s11               // 000000000E70: C02002C6 0000000B
  v_sqrt_f32    v18, v18                                // 000000000E78: 7E244F12
  v_mul_legacy_f32  v35, 0x40a00000, v46                // 000000000E7C: 08465CFF 40A00000
  v_med3_f32    v36, v41, s10, 1.0                      // 000000000E84: D1D60024 03C81529
  v_mul_f32     v41, v34, v17                           // 000000000E8C: 0A522322
  v_exp_f32     v35, v35                                // 000000000E90: 7E464123
  v_mad_f32     v42, v25, v36, -v36                     // 000000000E94: D1C1002A 84924919
  v_madmk_f32   v41, v5, 0x3e000000, v41                // 000000000E9C: 2E525305 3E000000
  v_mad_f32     v36, v36, v42, 1.0                      // 000000000EA4: D1C10024 03CA5524
  v_mul_f32     v41, v24, v41                           // 000000000EAC: 0A525318
  v_mul_f32     v42, 0x40490f5d, v36                    // 000000000EB0: 0A5448FF 40490F5D
  v_max_f32     v41, 0x3a83126f, v41                    // 000000000EB8: 165252FF 3A83126F
  v_max_f32     v18, 0x3dcccccd, v18                    // 000000000EC0: 162424FF 3DCCCCCD
  v_mov_b32     v43, 0xbf666666                         // 000000000EC8: 7E5602FF BF666666
  v_mul_f32     v36, v36, v42                           // 000000000ED0: 0A485524
  v_rcp_f32     v41, v41                                // 000000000ED4: 7E524529
  s_waitcnt     lgkmcnt(0)                              // 000000000ED8: BF8C007F
  v_mac_f32     v43, s11, v18                           // 000000000EDC: 2C56240B
  s_mov_b32     s10, 0x411ffffe                         // 000000000EE0: BE8A00FF 411FFFFE
  s_add_u32     s9, s9, 0x00000640                      // 000000000EE8: 8009FF09 00000640
  v_mul_f32     v18, v18, v18                           // 000000000EF0: 0A242512
  v_mad_legacy_f32  v42, v39, v35, v15                  // 000000000EF4: D1C0002A 043E4727
  v_rcp_f32     v36, v36                                // 000000000EFC: 7E484524
  s_buffer_load_dwordx4  s[32:35], s[12:15], s9         // 000000000F00: C0280806 00000009
  v_mul_f32     v43, v43, s10 clamp                     // 000000000F08: D105802B 0000152B
  v_rcp_f32     v18, v18                                // 000000000F10: 7E244512
  v_mad_legacy_f32  v44, v37, v35, v9                   // 000000000F14: D1C0002C 04264725
  v_mad_legacy_f32  v35, v38, v35, v10                  // 000000000F1C: D1C00023 042A4726
  v_sub_f32     v45, 1.0, v42                           // 000000000F24: 045A54F2
  v_mul_f32     v46, v43, v43                           // 000000000F28: 0A5C572B
  v_madak_f32   v43, -2.0, v43, 0x40400000              // 000000000F2C: 305656F5 40400000
  v_sub_f32     v47, 1.0, v44                           // 000000000F34: 045E58F2
  v_sub_f32     v48, 1.0, v35                           // 000000000F38: 046046F2
  v_mul_f32     v45, v17, v45                           // 000000000F3C: 0A5A5B11
  v_mul_f32     v49, 0x3ea2f9e9, v17                    // 000000000F40: 0A6222FF 3EA2F9E9
  v_mad_f32     v43, -v46, v43, 1.0                     // 000000000F48: D1C1002B 23CA572E
  v_mul_f32     v41, 0x3e800000, v41                    // 000000000F50: 0A5252FF 3E800000
  v_mul_f32     v46, v49, v47                           // 000000000F58: 0A5C5F31
  v_mul_f32     v47, v49, v48                           // 000000000F5C: 0A5E6131
  v_mul_f32     v45, 0x3ea2f9e9, v45                    // 000000000F60: 0A5A5AFF 3EA2F9E9
  v_mul_f32     v40, v40, v43                           // 000000000F68: 0A505728
  v_mul_f32     v36, v25, v36                           // 000000000F6C: 0A484919
  v_mul_f32     v43, v44, v41                           // 000000000F70: 0A56532C
  v_mul_f32     v35, v35, v41                           // 000000000F74: 0A465323
  v_mul_f32     v41, v42, v41                           // 000000000F78: 0A52532A
  v_mul_f32     v42, v26, v46                           // 000000000F7C: 0A545D1A
  v_mul_f32     v44, v27, v47                           // 000000000F80: 0A585F1B
  v_mul_f32     v45, v28, v45                           // 000000000F84: 0A5A5B1C
  v_mul_f32     v18, v40, v18                           // 000000000F88: 0A242528
  v_mul_f32     v40, v36, v43                           // 000000000F8C: 0A505724
  v_mul_f32     v35, v36, v35                           // 000000000F90: 0A464724
  v_mul_f32     v36, v36, v41                           // 000000000F94: 0A485324
  v_mul_f32     v41, v6, v42                            // 000000000F98: 0A525506
  v_mul_f32     v42, v6, v44                            // 000000000F9C: 0A545906
  v_mul_f32     v43, v6, v45                            // 000000000FA0: 0A565B06
  s_waitcnt     lgkmcnt(0)                              // 000000000FA4: BF8C007F
  v_mul_f32     v44, s32, v18                           // 000000000FA8: 0A582420
  v_mul_f32     v45, s33, v18                           // 000000000FAC: 0A5A2421
  v_mul_f32     v18, s34, v18                           // 000000000FB0: 0A242422
  v_mac_f32     v41, v17, v40                           // 000000000FB4: 2C525111
  v_mac_f32     v42, v17, v35                           // 000000000FB8: 2C544711
  v_mac_f32     v43, v17, v36                           // 000000000FBC: 2C564911
  s_lshl_b32    s5, 1, s5                               // 000000000FC0: 8E050581
  v_mac_f32     v8, v44, v41                            // 000000000FC4: 2C10532C
  v_mac_f32     v11, v45, v42                           // 000000000FC8: 2C16552D
  v_mac_f32     v14, v18, v43                           // 000000000FCC: 2C1C5712
  s_xor_b32     s4, s4, s5                              // 000000000FD0: 88040504
  s_branch      label_0330                              // 000000000FD4: BF82FF3A
label_03F6:
  s_waitcnt     lgkmcnt(0)                              // 000000000FD8: BF8C007F
  s_buffer_load_dword  s0, s[20:23], 0x20               // 000000000FDC: C022000A 00000020
  s_buffer_load_dwordx2  s[2:3], s[20:23], 0xf8         // 000000000FE4: C026008A 000000F8
  v_add_f32     v0, 1.0, v16                            // 000000000FEC: 020020F2
  s_load_dwordx8  s[20:27], s[16:17], 0x60              // 000000000FF0: C00E0508 00000060
  v_log_f32     v0, v0                                  // 000000000FF8: 7E004300
  s_load_dwordx4  s[12:15], s[16:17], 0x80              // 000000000FFC: C00A0308 00000080
  s_waitcnt     lgkmcnt(0)                              // 000000001004: BF8C007F
  v_mul_f32     v1, s2, v2                              // 000000001008: 0A020402
  v_mul_f32     v2, s3, v3                              // 00000000100C: 0A040603
  v_mul_f32     v3, s0, v0                              // 000000001010: 0A060000
  image_sample_lz  v[0:3], v[1:4], s[20:27], s[12:15] dmask:0xf // 000000001014: F09C0F00 00650001
  v_add_f32     v4, v23, v8                             // 00000000101C: 02081117
  v_add_f32     v5, v29, v11                            // 000000001020: 020A171D
  v_add_f32     v6, v32, v14                            // 000000001024: 020C1D20
  v_add_f32     v4, s6, v4                              // 000000001028: 02080806
  v_add_f32     v5, s7, v5                              // 00000000102C: 020A0A07
  v_add_f32     v6, s8, v6                              // 000000001030: 020C0C08
  s_waitcnt     vmcnt(0)                                // 000000001034: BF8C0F70
  v_mac_f32     v0, v4, v3                              // 000000001038: 2C000704
  v_mac_f32     v1, v5, v3                              // 00000000103C: 2C020705
  v_mac_f32     v2, v6, v3                              // 000000001040: 2C040706
  v_cvt_pkrtz_f16_f32  v0, v0, v1                       // 000000001044: D2960000 00020300
  v_cvt_pkrtz_f16_f32  v1, v2, 0                        // 00000000104C: D2960001 00010102
  s_mov_b64     exec, s[64:65]                          // 000000001054: BEFE0140
  exp           mrt0, v0, v0, v1, v1 done compr vm      // 000000001058: C4001C0F 00000100
  s_endpgm                                              // 000000001060: BF810000
end

; ----------------- PS Data ------------------------
; Input Semantic Mappings
;   [0] generic,  usageIdx 0, channelMask  3, param0, paramSlot0, DefaultVal={0,0,0,0}
;   [1] generic,  usageIdx 1, channelMask  7, param1, paramSlot1, DefaultVal={0,0,0,0}
;   [2] generic,  usageIdx 2, channelMask 15, param2, paramSlot2, DefaultVal={0,0,0,0}
;   [3] generic,  usageIdx 3, channelMask  7, param3, paramSlot3, DefaultVal={0,0,0,0}

codeLenInByte        = 4196 bytes;
pm4CrcCodeLength     = 4196 bytes;
pm4Crc               = 16741eadacb189ac

; launchModeFlags    = 0x00000400
externalLogicalBinding = TRUE;

userElementCount     = 0;
extUserElementCount  = 0;
NumVgprs             = 53;
NumSgprs             = 68;
FloatMode            = 192;
IeeeMode             = 0;
FlatPtr32            = 0;
ScratchSize          = 0 dwords/thread;
LDSByteSize          = 0 bytes/workgroup (compile time only);
ScratchWaveOffsetReg = s65535;

; Optimization Report
Intent           = SI_OPT_INTENT_UNSET

; Shader Stats
NumInst         = 794
uNumVALUInst    = 585
uNumVMemInst    = 9
uNumDSInst      = 0
uNumExportInst  = 1
uNumCFlowInst   = 63
uNumBranchInst   = 11
uNumSALUInst    = 87
uNumSMemInst    = 49
uNumAtomicOper    = 0
uNumLocalAtomicOper    = 0
uNumGlobalAtomicOper    = 0
uNumLocalLoads    = 0
uNumGlobalLoads    = 49
uNumLocalStores    = 0
uNumGlobalStores    = 0
uNumWait    = 30
uNumTexReads    = 0
uNumTexWrites    = 0
uNumF16Inst    = 0
uNumF32Inst    = 504
uNumFpInst    = 504
uNumInt16Inst    = 0
uNumInt32Inst    = 15
uNumIntInst    = 15
uNumCompilerSpills   = 0
uNumCompilerVectorSpills    = 0
uNumCompilerScalarSpills    = 0
uNumSMovInst    = 38
uNumVMovInst    = 22
uNumMovInst    = 60
MemSizePerThread (dwords/thread)    =0
; Scheduler stats
Highest schedule scores for top 3 basic blocks: 
                = 689.00 638.00 578.00 

; SPI_SHADER_PGM_RSRC2_PS = 0x0000001E
SSPRP:SCRATCH_EN            = 0
SSPRP:USER_SGPR             = 15
SSPRP:TRAP_PRESENT          = 0
SSPRP:WAVE_CNT_EN           = 0
SSPRP:EXTRA_LDS_SIZE        = 0
SSPRP:EXCP_EN               = 0
; SPI_SHADER_Z_FORMAT     = 0x00000000
SPZF:Z_EXPORT_FORMAT        = 0; SPI_SHADER_ZERO
; SPI_PS_IN_CONTROL       = 0x00000004
SPIC:NUM_INTERP             = 4
SPIC:PARAM_GEN              = 0
SPIC:FOG_ADDR               = 0
SPIC:BC_OPTIMIZE_DISABLE    = 0
SPIC:PASS_FOG_THROUGH_PS    = 0
; SPI_PS_INPUT_ADDR       = 0x00001302
SPIA:PERSP_CENTER_ENA       = 1
SPIA:POS_X_FLOAT_ENA        = 1
SPIA:POS_Y_FLOAT_ENA        = 1
SPIA:FRONT_FACE_ENA         = 1
; DB_SHADER_CONTROL       = 0x00000010
DB:Z_ORDER                  = 1
DB:CONSERVATIVE_Z_EXPORT    = 0; EXPORT_ANY_Z
; CB_SHADER_MASK          = 0x0000000F
CB:OUTPUT0_ENABLE           = 15
=== STATISTICS ===

VGPRs: 53
SGPRs: 68
Scratch usage (bytes): 0
LDS usage (bytes): 0

SPIR-V: output.asm.txt

jiaolu commented 5 years ago

windows use amd internal source compiling backend. linux gpuopen use opensourced LLVM amd lightening backend. so the opensource is confined in llvm intrinsic e.g .

  1. OpenSource has to use mov.dpp to update the swizzled value but the internal source compiling backend "dpp modifier" can be applied directly to the add/min/max modified.

  2. OpenSouce does not control execution mask directly. so it resort to thread id turn/off thread. etc.