beehive-lab / TornadoVM

TornadoVM: A practical and efficient heterogeneous programming framework for managed languages
https://www.tornadovm.org
Apache License 2.0
1.17k stars 110 forks source link

[fix] Added fp64 kind for radians operations in PTX compiler #488

Closed stratika closed 1 month ago

stratika commented 1 month ago

Description

This PR provides a fix for radians operations with double input types when PTX is used.

Problem description

The problem was that the radians implementation is implemented as (pi / 180) * degrees, since there is no instruction. And in the compiler we had a check to ensure that it is only supported for fp32.

In this PR, I added support for fp64 types.

Backend/s tested

Mark the backends affected by this PR.

OS tested

Mark the OS where this PR is tested.

Did you check on FPGAs?

If it is applicable, check your changes on FPGAs.

How to test the new patch?

Build TornadoVM with PTX:

make BACKEND=ptx

Run:

tornado-test -V --printKernel --fast uk.ac.manchester.tornado.unittests.math.TestTornadoMathCollection#testTornadoMathRadiansDouble

Expected output:

WARNING: Using incubator modules: jdk.incubator.vector
.version 7.6 
.target sm_86 
.address_size 64 

.visible .entry s0_t0_testtornadoradians_arrays_doublearray(.param .u64 .ptr .global .align 8 kernel_context, .param .u64 .ptr .global .align 8 a) {
    .reg .s32 rsi<6>;
    .reg .f64 rfd<3>;
    .reg .pred rpb<2>;
    .reg .u32 rui<5>;
    .reg .s64 rsd<3>;
    .reg .u64 rud<5>;

BLOCK_0:
    ld.param.u64    rud0, [kernel_context];
    ld.param.u64    rud1, [a];
    mov.u32 rui0, %nctaid.x;
    mov.u32 rui1, %ntid.x;
    mul.wide.u32    rud2, rui0, rui1;
    cvt.s32.u64 rsi0, rud2;
    mov.u32 rui2, %tid.x;
    mov.u32 rui3, %ctaid.x;
    mad.lo.s32  rsi1, rui3, rui1, rui2;

BLOCK_1:
    mov.s32 rsi2, rsi1;
LOOP_COND_1:
    setp.lt.s32 rpb0, rsi2, 128;
    @!rpb0 bra  BLOCK_3;

BLOCK_2:
    add.s32 rsi3, rsi2, 3;
    cvt.s64.s32 rsd0, rsi3;
    shl.b64 rsd1, rsd0, 3;
    add.u64 rud3, rud1, rsd1;
    ld.global.f64   rfd0, [rud3];
    mul.rn.f64  rfd1, 0D3F91DF4720000000, rfd0;
    st.global.f64   [rud3], rfd1;
    add.s32 rsi4, rsi0, rsi2;
    mov.s32 rsi2, rsi4;
    bra.uni LOOP_COND_1;

BLOCK_3:
    ret;
}

Test: class uk.ac.manchester.tornado.unittests.math.TestTornadoMathCollection#testTornadoMathRadiansDouble
    Running test: testTornadoMathRadiansDouble ................  [PASS] 
Test ran: 1, Failed: 0, Unsupported: 0