Open linehill opened 1 year ago
The following case fails with Clang-17 and -O2 on Intel hardware (intel-compute-runtime 23.26.26690.22):
-O2
#include <hip/hip_runtime.h> #ifndef SEPARATE __global__ void isInfOrNan(bool *Res, float Val) { *Res = (isinf(Val) || isnan(Val)); } #else __global__ void isInf(bool *Res, float Val) { *Res = isinf(Val); } __global__ void isNan(bool *Res, float Val) { *Res = isnan(Val); } #endif int main() { #ifndef SEPARATE bool InfOrNan = true, *ResD; (void)hipMalloc(&ResD, sizeof(bool)); isInfOrNan<<<1, 1>>>(ResD, 1.23f); (void)hipMemcpy(&InfOrNan, ResD, sizeof(bool), hipMemcpyDeviceToHost); printf("InfOrNan=%d\n", InfOrNan); return InfOrNan; // Should return zero. #else bool Inf = true, Nan = true, *ResD; (void)hipMalloc(&ResD, sizeof(bool)); isInf<<<1, 1>>>(ResD, 1.23f); (void)hipMemcpy(&Inf, ResD, sizeof(bool), hipMemcpyDeviceToHost); isNan<<<1, 1>>>(ResD, 3.21f); (void)hipMemcpy(&Nan, ResD, sizeof(bool), hipMemcpyDeviceToHost); printf("InfOrNan=%d\n", Inf || Nan); return Inf || Nan; // Should return zero. #endif }
The case passes on AMD and passes on chipStar if:
-O0
isinf
isnan
-O2 -DSEPARATE
I'm guessing that the appearances of new llvm.is.fpclass intrinsics since LLVM-17 with certain test bit combinations are causing an error in somewhere.
llvm.is.fpclass
The following case fails with Clang-17 and
-O2
on Intel hardware (intel-compute-runtime 23.26.26690.22):The case passes on AMD and passes on chipStar if:
-O0
,isinf
andisnan
is computed in separate kernels (-O2 -DSEPARATE
) orI'm guessing that the appearances of new
llvm.is.fpclass
intrinsics since LLVM-17 with certain test bit combinations are causing an error in somewhere.