Closed nomaddo closed 3 years ago
@nomaddo I think this is resolved, what do you think?
I don't think so.
In the latest master branch (https://github.com/doe300/VC4C/commit/7968f6731adc3b85e1aa660403e9ad1bb1e9e891), the output is the follows:
$ /cmake-build-debug/src/VC4C -O3 -fno-unroll-loops --asm ./testing/bugs/68_remove_moves_of_r4.cl
// Module with 1 kernels, global data with 0 words (64-bit each), starting at offset 1 words and 0 words of stack-frame
// Kernel 'test' with 45 instructions, offset 2, with following parameters: __global in out float* a (4 B, 1 items), int n (4 B, 1 items)
// label: %start_of_function
or ra0, unif, unif
or -, unif, unif
or r1, 0 (0), 0 (0)
// label: %tmp.1
or -, mutex_acq, mutex_acq
ldi vpr_setup, vdr_setup(rows: 1, elements: 1 words, address: h32(0,0))
or r1, r1, r1 // Still unnecessary instruction here.
shl r0, r1, 2 (2)
add r2, ra0, r0
ldi vpr_setup, vdr_setup(memory pitch: 0 bytes)
or vpr_addr, r2, r2
add r1, r1, 1 (1)
or -, vpr_wait, vpr_wait
ldi vpr_setup, vpm_setup(num: 1, size: 16 words, stride: 1 rows, address: h32(0))
or r0, vpm, vpm
or mutex_rel, 1 (1), 1 (1)
or -, mutex_acq, mutex_acq
ldi vpw_setup, vpm_setup(size: 16 words, stride: 1 rows, address: h32(0))
ldi vpw_setup, vdw_setup(rows: 1, elements: 1 words, address: h32(0,0))
fadd vpm, r0, 1.000000 (32)
or r0, r1, r1
max r0, r0, 10 (10)
xor.setf -, r0, r1
or.ifzc r0, 1 (1), 1 (1)
ldi vpw_setup, vdw_setup(memory stride: 0 bytes)
or vpw_addr, r2, r2
xor.ifz r0, 1 (1), 1 (1)
or.setf -, r0, r0
or.ifzc r1, r1, r1
or -, vpw_wait, vpw_wait
or mutex_rel, 1 (1), 1 (1)
or.setf -, elem_num, r0
brr.ifallzc (pc+4) + -32 // to %tmp.1
nop.never
nop.never
nop.never
// label: %tmp.2, label: %end_of_function
or r0, unif, unif
or.setf -, elem_num, r0
brr.ifallzc (pc+4) + -41 // to %start_of_function
nop.never
nop.never
nop.never
not irq, qpu_num
nop.thrend.never
nop.never
nop.never
Solved with 41ba6a69fd6ba7e01538d39ea1fc4b995ed672c3