doe300 / VC4C

Compiler for the VC4CL OpenCL implementation
MIT License
118 stars 37 forks source link

Remain removable move instructions #87

Closed nomaddo closed 3 years ago

nomaddo commented 6 years ago
$ ./build/VC4C -O3 -fno-unroll-loops --asm -o /tmp/hoge.s testing/bugs/68_remove_moves_of_r4.cl
(gdb) p kernel.dumpInstructions()
[D] Tue Apr 24 23:12:14 2018: Basic block ----
[D] Tue Apr 24 23:12:14 2018: label: %start_of_function
[D] Tue Apr 24 23:12:14 2018: f32* %a = register unif
[D] Tue Apr 24 23:12:14 2018: i32 %n = register unif
[D] Tue Apr 24 23:12:14 2018: i32 %i.04 = i32 0 (0) (phi)
[D] Tue Apr 24 23:12:14 2018: br %tmp.1
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: Block end ----
[D] Tue Apr 24 23:12:14 2018: Basic block ----
[D] Tue Apr 24 23:12:14 2018: label: %tmp.2
[D] Tue Apr 24 23:12:14 2018: br %end_of_function
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: Block end ----
[D] Tue Apr 24 23:12:14 2018: Basic block ----
[D] Tue Apr 24 23:12:14 2018: label: %tmp.1
[D] Tue Apr 24 23:12:14 2018: i32 %use_with_literal.12 = i32 %i.04                                 // this move should be removed
[D] Tue Apr 24 23:12:14 2018: i32 %index_offset.4 = shl i32 %use_with_literal.12, i32 2 (2) // ... and use_widh_literal.12 should be replaced to %i.04
[D] Tue Apr 24 23:12:14 2018: f32* %arrayidx = add f32* %a, i32 %index_offset.4
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: register tmu0s = f32* %arrayidx
[D] Tue Apr 24 23:12:14 2018: nop (load_tmu0 )
[D] Tue Apr 24 23:12:14 2018: mutex_acq
[D] Tue Apr 24 23:12:14 2018: register vpw_setup = loadi vpm_setup(size: 16 words, stride: 1 rows, address: h32(0))
[D] Tue Apr 24 23:12:14 2018: register vpm = fadd register r4, f32 1.000000 (32)
[D] Tue Apr 24 23:12:14 2018: register vpw_setup = loadi vdw_setup(rows: 1, elements: 1 words, address: h32(0))
[D] Tue Apr 24 23:12:14 2018: register vpw_setup = loadi vdw_setup(stride: 0)
[D] Tue Apr 24 23:12:14 2018: register vpw_addr = f32* %arrayidx
[D] Tue Apr 24 23:12:14 2018: i32 %use_with_literal.13 = i32 %i.04
[D] Tue Apr 24 23:12:14 2018: i32 %inc = add i32 %use_with_literal.13, i32 1 (1)
[D] Tue Apr 24 23:12:14 2018: register - = register vpw_wait
[D] Tue Apr 24 23:12:14 2018: mutex_rel
[D] Tue Apr 24 23:12:14 2018: i32 %use_with_literal.14 = i32 %inc                   // this should be removed
[D] Tue Apr 24 23:12:14 2018: i32 %icomp.5 = max i32 %use_with_literal.14, i32 10 (10) // ... and use_with_literal.14 can be replaced to %inc
[D] Tue Apr 24 23:12:14 2018: register - = xor i32 %icomp.5, i32 %inc (setf )
[D] Tue Apr 24 23:12:14 2018: bool %cmp = v8min bool 1 (1), bool 1 (1) (ifzc ) and bool %cmp = xor bool 1 (1), bool 1 (1) (ifz )
[D] Tue Apr 24 23:12:14 2018: register - = bool %cmp (setf )
[D] Tue Apr 24 23:12:14 2018: i32 %i.04 = i32 %inc (ifzc phi)
[D] Tue Apr 24 23:12:14 2018: register - = or register elem_num, bool %cmp (setf )
[D] Tue Apr 24 23:12:14 2018: br.ifzc %tmp.1 (on bool %cmp) (ifzc )
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: br.ifz %tmp.2 (on bool %cmp) (ifz )
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: Block end ----
[D] Tue Apr 24 23:12:14 2018: Basic block ----
[D] Tue Apr 24 23:12:14 2018: label: %end_of_function
[D] Tue Apr 24 23:12:14 2018: i32 %group_loop_size = register unif
[D] Tue Apr 24 23:12:14 2018: register - = or register elem_num, i32 %group_loop_size (setf )
[D] Tue Apr 24 23:12:14 2018: br.ifzc %start_of_function (on i32 %group_loop_size) (ifzc )
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: register irq = not register qpu_num
[D] Tue Apr 24 23:12:14 2018: nop (thrend )
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: nop
[D] Tue Apr 24 23:12:14 2018: Block end ----
$1 = void
doe300 commented 5 years ago

@nomaddo I think this is resolved, what do you think?

nomaddo commented 5 years ago

I don't think so.

In the latest master branch (https://github.com/doe300/VC4C/commit/7968f6731adc3b85e1aa660403e9ad1bb1e9e891), the output is the follows:

$ /cmake-build-debug/src/VC4C -O3 -fno-unroll-loops --asm ./testing/bugs/68_remove_moves_of_r4.cl
// Module with 1 kernels, global data with 0 words (64-bit each), starting at offset 1 words and 0 words of stack-frame
// Kernel 'test' with 45 instructions, offset 2, with following parameters: __global in out float* a (4 B, 1 items), int n (4 B, 1 items)
// label: %start_of_function
or ra0, unif, unif
or -, unif, unif
or r1, 0 (0), 0 (0)
// label: %tmp.1
or -, mutex_acq, mutex_acq
ldi vpr_setup, vdr_setup(rows: 1, elements: 1 words, address: h32(0,0))
or r1, r1, r1                      // Still unnecessary instruction here. 
shl r0, r1, 2 (2)
add r2, ra0, r0
ldi vpr_setup, vdr_setup(memory pitch: 0 bytes)
or vpr_addr, r2, r2
add r1, r1, 1 (1)
or -, vpr_wait, vpr_wait
ldi vpr_setup, vpm_setup(num: 1, size: 16 words, stride: 1 rows, address: h32(0))
or r0, vpm, vpm
or mutex_rel, 1 (1), 1 (1)
or -, mutex_acq, mutex_acq
ldi vpw_setup, vpm_setup(size: 16 words, stride: 1 rows, address: h32(0))
ldi vpw_setup, vdw_setup(rows: 1, elements: 1 words, address: h32(0,0))
fadd vpm, r0, 1.000000 (32)
or r0, r1, r1
max r0, r0, 10 (10)
xor.setf -, r0, r1
or.ifzc r0, 1 (1), 1 (1)
ldi vpw_setup, vdw_setup(memory stride: 0 bytes)
or vpw_addr, r2, r2
xor.ifz r0, 1 (1), 1 (1)
or.setf -, r0, r0
or.ifzc r1, r1, r1
or -, vpw_wait, vpw_wait
or mutex_rel, 1 (1), 1 (1)
or.setf -, elem_num, r0
brr.ifallzc (pc+4) + -32 // to %tmp.1
nop.never 
nop.never 
nop.never 
// label: %tmp.2, label: %end_of_function
or r0, unif, unif
or.setf -, elem_num, r0
brr.ifallzc (pc+4) + -41 // to %start_of_function
nop.never 
nop.never 
nop.never 
not irq, qpu_num
nop.thrend.never 
nop.never 
nop.never 
doe300 commented 3 years ago

Solved with 41ba6a69fd6ba7e01538d39ea1fc4b995ed672c3