ROCm / clr

MIT License
85 stars 35 forks source link

h2rcp produces incorrect result on rocm5.6 #8

Closed Engininja2 closed 7 months ago

Engininja2 commented 11 months ago

Calling the h2rcp() in rocm5.6 looks like it's converting the underlying storage as a short into a float and doing the reciprocal on that. Instead of 1/4.0=0.25, it produces 0.000057.

I tested this with gfx1010 in the docker image rocm/dev-ubuntu-20.04:5.6-complete but targeting gfx1030 gives an identical kernel disassembly so the same error should happen.

https://github.com/ROCm-Developer-Tools/hipamd/blob/4209792929ddf54ba9530813b7879cfdee42df14/include/hip/amd_detail/amd_hip_fp16.h#L1677-L1680

#include <stdio.h>
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>

__device__ __forceinline__ __half2 __alternate_h2rcp(__half2 x) {
    return _Float16_2{static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half2_raw>(x).data.x)),
        static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half2_raw>(x).data.y))};
}

__global__ void do_rcp(half2* result, const half2* source) 
{
    result[0] = h2rcp(source[0]);
    result[1] = __alternate_h2rcp(source[1]);
}

int main(int argc, char *argv[]) 
{
    half2 *src_d, *result_d;
    half2 *src_h, *result_h;
    size_t N = 2;
    size_t Nbytes = N * sizeof(half2);

    src_h = (half2*)malloc(Nbytes);
    result_h = (half2*)malloc(Nbytes);
    src_h[0] = __floats2half2_rn(4.0f, 9.0f);
    src_h[1] = __floats2half2_rn(4.0f, 9.0f);

    hipMalloc(&src_d, Nbytes);
    hipMalloc(&result_d, Nbytes);
    hipMemcpy(src_d, src_h, Nbytes, hipMemcpyHostToDevice);

    hipLaunchKernelGGL(do_rcp, dim3(1), dim3(1), 0, 0,
        result_d, src_d);

    hipMemcpy(result_h, result_d, Nbytes, hipMemcpyDeviceToHost);

    printf("rocm: 1/%f = %f\n", __low2float(src_h[0]), __low2float(result_h[0]));
    printf("rocm: 1/%f = %f\n", __high2float(src_h[0]), __high2float(result_h[0]));
    printf("alternate: 1/%f = %f\n", __low2float(src_h[1]), __low2float(result_h[1]));
    printf("alternate: 1/%f = %f\n", __high2float(src_h[1]), __high2float(result_h[1]));
}
cjatin commented 11 months ago

Thanks for reporting this.

Will look into this.

cjatin commented 10 months ago

I have raised PR internally to fix it. It should be fixed in the next release.

cjatin commented 7 months ago

https://github.com/ROCm-Developer-Tools/clr/commit/295963fe7af03f51013ccacc8c290eda481a71c3