Clang backend error for atomic_default_mem_order(acq_rel) for NVIDIA GPU

fel-cab commented 4 months ago

The following code doesn't compile when using: clang -fopenmp -O3 --offload-arch=sm_80

fatal error: error in backend: Cannot select: 0xc05d820: i32,ch = AtomicLoad<(dereferenceable load acquire (s32) from %ir.3)> 0xc1836b0, 0xc0a6ff0, example.c:22:11
  0xc0a6ff0: i64,ch = CopyFromReg 0xc1836b0, Register:i64 %2, example.c:22:11
    0xc027260: i64 = Register %2
In function: __omp_offloading_10301_104476_main_l12_omp_outlined

https://godbolt.org/z/fMhTvfbdP

Code:

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

#pragma omp requires atomic_default_mem_order(acq_rel)

int main() {

  int x = 0, y = 0;
  int errors = 0;

#pragma omp target parallel num_threads(2) map(tofrom: x, y, errors) 
   {
       int thrd = omp_get_thread_num();
       int tmp = 0;
       if (thrd == 0) {
          x = 10;
          #pragma omp atomic write 
          y = 1;
       } else {
          #pragma omp atomic read 
          tmp = y;
       }
       if (thrd != 0) {
          tmp = 0;
          while (tmp == 0) {
            #pragma omp atomic read 
            tmp = y;
          }
          if(x != 10) errors++;
       }
   }
   if(errors > 0)
     printf("Test Failed\n");
   else
     printf("Test Pass\n");

   return errors;
}

llvmbot commented 4 months ago

@llvm/issue-subscribers-openmp

Author: Felipe Cabarcas (fel-cab)

The following code doesn't compile when using: `clang -fopenmp -O3 --offload-arch=sm_80` ``` fatal error: error in backend: Cannot select: 0xc05d820: i32,ch = AtomicLoad<(dereferenceable load acquire (s32) from %ir.3)> 0xc1836b0, 0xc0a6ff0, example.c:22:11 0xc0a6ff0: i64,ch = CopyFromReg 0xc1836b0, Register:i64 %2, example.c:22:11 0xc027260: i64 = Register %2 In function: __omp_offloading_10301_104476_main_l12_omp_outlined ``` https://godbolt.org/z/fMhTvfbdP Code: ``` #include <omp.h> #include <stdio.h> #include <stdlib.h> #pragma omp requires atomic_default_mem_order(acq_rel) int main() { int x = 0, y = 0; int errors = 0; #pragma omp target parallel num_threads(2) map(tofrom: x, y, errors) { int thrd = omp_get_thread_num(); int tmp = 0; if (thrd == 0) { x = 10; #pragma omp atomic write y = 1; } else { #pragma omp atomic read tmp = y; } if (thrd != 0) { tmp = 0; while (tmp == 0) { #pragma omp atomic read tmp = y; } if(x != 10) errors++; } } if(errors > 0) printf("Test Failed\n"); else printf("Test Pass\n"); return errors; } ```

llvmbot commented 4 months ago

@llvm/issue-subscribers-offload

Author: Felipe Cabarcas (fel-cab)

The following code doesn't compile when using: `clang -fopenmp -O3 --offload-arch=sm_80` ``` fatal error: error in backend: Cannot select: 0xc05d820: i32,ch = AtomicLoad<(dereferenceable load acquire (s32) from %ir.3)> 0xc1836b0, 0xc0a6ff0, example.c:22:11 0xc0a6ff0: i64,ch = CopyFromReg 0xc1836b0, Register:i64 %2, example.c:22:11 0xc027260: i64 = Register %2 In function: __omp_offloading_10301_104476_main_l12_omp_outlined ``` https://godbolt.org/z/fMhTvfbdP Code: ``` #include <omp.h> #include <stdio.h> #include <stdlib.h> #pragma omp requires atomic_default_mem_order(acq_rel) int main() { int x = 0, y = 0; int errors = 0; #pragma omp target parallel num_threads(2) map(tofrom: x, y, errors) { int thrd = omp_get_thread_num(); int tmp = 0; if (thrd == 0) { x = 10; #pragma omp atomic write y = 1; } else { #pragma omp atomic read tmp = y; } if (thrd != 0) { tmp = 0; while (tmp == 0) { #pragma omp atomic read tmp = y; } if(x != 10) errors++; } } if(errors > 0) printf("Test Failed\n"); else printf("Test Pass\n"); return errors; } ```

jhuber6 commented 4 months ago

This is a never-ending issue because no one has had the time to implement all the atomics in the NVPTX backend. See https://github.com/llvm/llvm-project/issues/48651, https://github.com/llvm/llvm-project/issues/61411, and https://github.com/llvm/llvm-project/issues/54854.

llvm / llvm-project

Clang backend error for atomic_default_mem_order(acq_rel) for NVIDIA GPU #94024