bagel99 / llvm-my66000

This is a fork of the LLVM project. The code in branch my66000 supports Mitch Alsup's MY66000. The code in branch mcore supports the Motorola MCore.
http://llvm.org
Other
2 stars 2 forks source link

Putting addresses into registers #39

Open tkoenig1 opened 1 year ago

tkoenig1 commented 1 year ago

The following code (somewhat artificial)

extern double a[],b[];

void foo(double x)
{
  a[0] = b[3] + x;
  a[1] = b[0] + x;
  a[2] = b[1] + x;
  a[3] = b[2] + x;  
  a[4] = b[7] + x;
  a[5] = b[4] + x;
  a[6] = b[5] + x;
  a[7] = b[6] + x;  
}

is currently translated to

        ldd     r2,[ip,b+24]
        fadd    r2,r2,r1
        std     r2,[ip,a]
        ldd     r2,[ip,b]
        fadd    r2,r2,r1
        std     r2,[ip,a+8]
        ldd     r2,[ip,b+8]
        fadd    r2,r2,r1
        std     r2,[ip,a+16]
        ldd     r2,[ip,b+16]
        fadd    r2,r2,r1
        std     r2,[ip,a+24]
        ldd     r2,[ip,b+56]
        fadd    r2,r2,r1
        std     r2,[ip,a+32]
        ldd     r2,[ip,b+32]
        fadd    r2,r2,r1
        std     r2,[ip,a+40]
        ldd     r2,[ip,b+40]
        fadd    r2,r2,r1
        std     r2,[ip,a+48]
        ldd     r2,[ip,b+48]
        fadd    r1,r2,r1
        std     r1,[ip,a+56]
        ret

where all of the loads and stores potentially have a 32-bit or 64-bit constant, which uses up a lot of code space.

It would probably be better to load the address of a and b into a register and then use that as a base register.

This is similar to issue #2 , but this time with addresses instead of constants.

By comparison, rv64cg translates this to

foo:                                    # @foo
.LBB0_1:                                # Label of block must be emitted
        auipc   a0, %got_pcrel_hi(b)
        ld      a0, %pcrel_lo(.LBB0_1)(a0)
        fld     ft0, 24(a0)
.LBB0_2:                                # Label of block must be emitted
        auipc   a1, %got_pcrel_hi(a)
        ld      a1, %pcrel_lo(.LBB0_2)(a1)
        fld     ft1, 0(a0)
        fadd.d  ft0, ft0, fa0
        fsd     ft0, 0(a1)
        fld     ft0, 8(a0)
        fadd.d  ft1, ft1, fa0
        fsd     ft1, 8(a1)
        fld     ft1, 16(a0)
        fadd.d  ft0, ft0, fa0
        fsd     ft0, 16(a1)
        fld     ft0, 56(a0)
        fadd.d  ft1, ft1, fa0
        fsd     ft1, 24(a1)
        fld     ft1, 32(a0)
        fadd.d  ft0, ft0, fa0
        fsd     ft0, 32(a1)
        fld     ft0, 40(a0)
        fadd.d  ft1, ft1, fa0
        fld     ft2, 48(a0)
        fsd     ft1, 40(a1)
        fadd.d  ft0, ft0, fa0
        fsd     ft0, 48(a1)
        fadd.d  ft0, ft2, fa0
        fsd     ft0, 56(a1)
        ret

which is shorter in code size.