flang-compiler / f18-llvm-project

Fork of llvm/llvm-project for f18. In sync with f18-mlir and f18.
http://llvm.org
28 stars 16 forks source link

Implementation status of OpenMP Threadprivate directive #1136

Open PeixinQiao opened 2 years ago

PeixinQiao commented 2 years ago

https://github.com/flang-compiler/f18-llvm-project/pull/1135 supports non-character scalar, character, array, character array, pointer, allocatable, pointer, derived type, common block, non-SAVEd non-initialized non-character scalar in main program (this is special case).

It supports use association and argument association, but don't support host assocation currently.

PeixinQiao commented 2 years ago

Test case 1: integer:

program test
  use omp_lib
  integer :: tid, y
  integer, save :: x

  !$omp threadprivate(x)

  x = -1
  y = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = 0,y = -1 changed: tid = 1,x = 11,y = 10 tid = 0,x = -1,y = 10 changed: tid = 0,x = 9,y = 20 middle: tid = 0,x = 9,y = 20 second loop: tid = 0,x = 9,y = 20 second loop changed: tid = 0,x = 19,y = 30 second loop: tid = 1,x = 11,y = 30 second loop changed: tid = 1,x = 22,y = 41

PeixinQiao commented 2 years ago

Test case 2: real

program test
  use omp_lib
  integer :: tid
  real, save :: x, y

  !$omp threadprivate(x)

  x = -1
  y = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 0,x = -1.,y = -1. changed: tid = 0,x = 9.,y = 9. tid = 1,x = 0.,y = 9. changed: tid = 1,x = 11.,y = 20. middle: tid = 0,x = 9.,y = 20. second loop: tid = 0,x = 9.,y = 20. second loop changed: tid = 0,x = 19.,y = 30. second loop: tid = 1,x = 11.,y = 30. second loop changed: tid = 1,x = 22.,y = 41.

PeixinQiao commented 2 years ago

Test case 3: complex

program test
  use omp_lib
  integer :: tid
  complex, save :: x, y

  !$omp threadprivate(x)

  x = cmplx(-1, -1)
  y = cmplx(-1, -1)

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + cmplx(10, 10)
  y = y + tid + cmplx(10, 10)
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + cmplx(10, 10)
  y = y + tid + cmplx(10, 10)
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = (0.,0.),y = (-1.,-1.) changed: tid = 1,x = (11.,10.),y = (10.,9.) tid = 0,x = (-1.,-1.),y = (10.,9.) changed: tid = 0,x = (9.,9.),y = (20.,19.) middle: tid = 0,x = (9.,9.),y = (20.,19.) second loop: tid = 0,x = (9.,9.),y = (20.,19.) second loop changed: tid = 0,x = (19.,19.),y = (30.,29.) second loop: tid = 1,x = (11.,10.),y = (30.,29.) second loop changed: tid = 1,x = (22.,20.),y = (41.,39.)

PeixinQiao commented 2 years ago

Test case 4: logical

program test
  use omp_lib
  integer :: tid
  logical, save :: x, y

  !$omp threadprivate(x)

  x = .false.
  y = .false.

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y
  x = .true.
  y = .true.
  print *,"middle changed: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  print *,"final: tid = ", tid, ",x = ", x, ",y = ", y
end

Expected result:

tid = 0,x = F ,y = F tid = 1,x = F ,y = F middle: tid = 0,x = F ,y = F middle changed: tid = 0,x = T ,y = T second loop: tid = 0,x = T ,y = T second loop: tid = 1,x = F ,y = T final: tid = 0,x = T ,y = T

PeixinQiao commented 2 years ago

Test case 5: array:

program test
  use omp_lib
  integer :: tid
  integer :: x(5), y(5)

  !$omp threadprivate(x)

  x = (/1,1,1,1,1/)
  y = (/1,1,1,1,1/)

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(1) = x(1) + 1
  x(2:3) = x(4:5) + 2
  x = x + 1
  print *,"changed: tid = ", tid, ",x = ", x(1), x(2), x(3), x(4), x(5), ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x(1), x(2), x(3), x(4), x(5), ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 0,x = 1 1 1 1 1,y = 1 1 1 1 1 tid = 1,x = 0 0 0 0 0,y = 1 1 1 1 1 changed: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 changed: tid = 1,x = 2 3 3 1 1,y = 1 1 1 1 1 middle: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 second loop: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 second loop: tid = 1,x = 2 3 3 1 1,y = 1 1 1 1 1

PeixinQiao commented 2 years ago

Test case 6: character:

program test
  use omp_lib
  integer :: tid
  character(len=5) :: x, y

  !$omp threadprivate(x)

  x = "aaaaa"
  y = "xxxxx"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(2:3) = "bb"
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = ,y = xxxxx changed: tid = 1,x = bb,y = xxxxx tid = 0,x = aaaaa,y = xxxxx changed: tid = 0,x = abbaa,y = xxxxx middle: tid = 0,x = abbaa,y = xxxxx second loop: tid = 0,x = abbaa,y = xxxxx second loop: tid = 1,x = bb,y = xxxxx

kiranchandramohan commented 2 years ago

Thanks for the detailed status and tracking the progress for threadprivate. I will create a similar one for data-sharing clauses as well. Support for commonblock is required for OpenMP 1.0 both in threadprivate as well as the data-sharing clauses. We should discuss this sometime this month/next month.

PeixinQiao commented 2 years ago

@kiranchandramohan OK. Maybe we can discuss in Thursday's meeting this week.

PeixinQiao commented 2 years ago

Test case 7: common block:

program test
  use omp_lib
  integer :: tid, a
  real :: b(2)
  complex :: c
  logical :: d
  character(5) :: e, f(2)
  common /blk/ a, b, c, d, e, f

  !$omp threadprivate(/blk/)

  a = 1
  b(2) = 1
  c = 1
  d = .true.
  e = "xx"
  f(2) = "xx"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ": ", a, b, c, d, e, f
  a = 2
  b(2) = 2
  c = 2
  d = .false.
  e = "yy"
  f(2) = "yy"
  print *,"changed: tid = ", tid, ": ", a, b, c, d, e, f
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ": ", a, b, c, d, e, f

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ": ", a, b, c, d, e, f
  !$omp end parallel
end

Expected results:

 tid =  0:  1 0. 1. (1.,0.) T xx   xx   
 changed: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 tid =  1:  0 0. 0. (0.,0.) F 
 changed: tid =  1:  2 0. 2. (2.,0.) F yy   yy   
 middle: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 second loop: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 second loop: tid =  1:  2 0. 2. (2.,0.) F yy   yy   
PeixinQiao commented 2 years ago

Test case 8: derived type:

program test
  use omp_lib
  integer :: tid
  type my_type
    integer :: t_i
    integer :: t_arr(5)
  end type my_type
  type(my_type), save :: x, y

  !$omp threadprivate(x)

  x%t_i = -1
  y%t_i = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x%t_i = 2
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected results:

 tid =  1,x =  0 0 0 0 0 0,y =  -1 0 0 0 0 0
 changed: tid =  1,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 tid =  0,x =  -1 0 0 0 0 0,y =  -1 0 0 0 0 0
 changed: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 middle: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 second loop: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 second loop: tid =  1,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
PeixinQiao commented 2 years ago

Test case 9: char array:

program test
  use omp_lib
  integer :: tid
  character(len=5) :: x(2), y(2)

  !$omp threadprivate(x)

  x(1) = "xxxxx"
  y(1) = "yyyyy"
  x(2) = "aaaaa"
  y(2) = "zzzzz"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(2) = "bb"
  !x(5) = "e"
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected results:

 tid =  1,x = ,y = yyyyyzzzzz
 changed: tid =  1,x = bb   ,y = yyyyyzzzzz
 tid =  0,x = xxxxxaaaaa,y = yyyyyzzzzz
 changed: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 middle: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 second loop: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 second loop: tid =  1,x = bb   ,y = yyyyyzzzzz
PeixinQiao commented 2 years ago

Test case 10: allocatable && pointer:

program main
  use omp_lib
  integer, allocatable, save :: x, x2(:)
  integer, pointer, save :: y, y2(:)
  integer, target :: z, z2(2)
  integer :: tid

  !$omp threadprivate(x, x2, y, y2)

  allocate(x)
  allocate(x2(2))

  x = 2
  x2(1) = 2
  x2(2) = 2
  z = 2
  z2(1) = 2
  z2(2) = 2
  y=>z
  y2=>z2
  print *, x, x2, y, y2
  !$omp parallel num_threads(2) private(tid)
    tid = omp_get_thread_num()
    if (tid .gt. 0) then
      allocate(x)
      allocate(x2(2))
      x = 1
      x2(1) = 1
      x2(2) = 1
      z = 1
      z2(1) = 1
      z2(2) = 1
      y=>z
      y2=>z2
    endif

    print *, "tid = ", tid, ": ", x, x2, y, y2
  !$omp end parallel

  tid = omp_get_thread_num()
  print *, "tid = ", tid, ": ", x, x2, y, y2

  !$omp parallel num_threads(2) private(tid)
    tid = omp_get_thread_num()
    print *, "tid = ", tid, ": ", x, x2, y, y2
  !$omp end parallel
end

Expected results:

 2 2 2 2 2 2
 tid =  0:  2 2 2 2 2 2
 tid =  1:  1 1 1 1 1 1
 tid =  0:  2 2 2 1 1 1
 tid =  0:  2 2 2 1 1 1
 tid =  1:  1 1 1 1 1 1
PeixinQiao commented 2 years ago

Test case 11: use assocation for non-common block

$ cat use-assoc.f90
module mod1
  use omp_lib
  integer :: x
  real :: y
  complex :: z
  logical :: l
  real, pointer :: a
  !$omp threadprivate(x,y,z,l,a)

contains
  subroutine sub()
    real, target :: b = 5.0
    !$omp parallel num_threads(2)
      if (omp_get_thread_num() == 1) then
        a=>b
      endif
      print *, x, y, z, l, a
    !$omp end parallel
  end
end
$ cat main.f90
program main
  use mod1
  real, target :: b = 4.0
  x = 1
  y = 2.0
  z = 3.0
  l = .true.
  a=>b
  call sub()
end

Expected results:

$ flang-new -fopenmp use-assoc.f90 -c
$ flang-new -fopenmp main.f90 -c
$ flang-new -fopenmp main.o use-assoc.o 
$ ./a.out 
 0 0. (0.,0.) F 5.
 1 2. (3.,0.) T 4.
PeixinQiao commented 2 years ago

Test case 12: use assocation for common block

$ cat use-assoc2.f90
module mod1
  use omp_lib
  integer :: x
  real :: y
  complex :: z
  logical :: l
  real, pointer :: a
  common /blk/ x,y,z,l,a
  !$omp threadprivate(/blk/)

contains
  subroutine sub()
    real, target :: b = 5.0
    !$omp parallel num_threads(2)
    if (omp_get_thread_num() == 1) then
      a=>b
    endif
      print *, x, y, z, l, a
    !$omp end parallel
  end
end
$ cat main2.f90 
program main
  use mod1
  integer :: x1
  real :: y1
  complex :: z1
  logical :: l1
  real, pointer :: a1
  common /blk/ x1,y1,z1,l1,a1
  !$omp threadprivate(/blk/)
  real, target :: b = 4.0
  x1 = 1
  y1 = 2.0
  z1 = 3.0
  l1 = .true.
  a1=>b
  call sub()
end

Expected results:

$ flang-new -fopenmp use-assoc2.f90 -c
$ flang-new -fopenmp main2.f90 -c
$ flang-new -fopenmp main2.o use-assoc2.o
$ ./a.out 
 0 0. (0.,0.) F 5.
 1 2. (3.,0.) T 4.
PeixinQiao commented 2 years ago

Test case 13: host assocation

$ cat hostassoc1.f90 
program main
  use omp_lib
  integer, save :: a
  !$omp threadprivate(a)
  call sub()
contains
  subroutine sub()
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a, omp_get_thread_num()
    !$omp end parallel
  end
end
$ cat hostassoc2.f90 
program main
  use omp_lib
  integer :: a
  !$omp threadprivate(a)
  call sub()
contains
  subroutine sub()
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a, omp_get_thread_num()
    !$omp end parallel
  end
end

Expected results:

           0           0
           2           2
           3           3
           1           1
           2           2
           3           3
           1           1
           0           0

gfortran 12 and ifort 2021 support hostassoc1.f90, but do not support hostassoc2.f90. For hostassoc2.f90, gfortran 12 gives wrong running results, while ifort reports the semantic error.

PeixinQiao commented 2 years ago

Test case 14: common block in threadprivate and used in multiple program units (revised from test case in classic flang)

program mian
  use omp_lib
  integer result(4)
  common/result/result
  call sub0
  print *, result
end
subroutine sub0
  use omp_lib
  common /com/ ic1, ic2
  !$omp   threadprivate ( /com/ )
  ic1 = 2
  ic2 = 4
  call sub1
  call sub2
end
subroutine sub1
  use omp_lib
integer :: tid
common /com/ ic1, ic2
!$omp   threadprivate ( /com/ )
!$omp   parallel num_threads(2)
tid = omp_get_thread_num()
ic1 = ic1 + tid
ic2 = ic2 + tid
!$omp end parallel
end
subroutine sub2
  use omp_lib
integer :: tid
integer result(4)
common/result/result
common /com/ ic1, ic2
!$omp   threadprivate ( /com/ )
!$omp   parallel num_threads(2)
tid = omp_get_thread_num()
result(1+tid) = ic1
result(3+tid) = ic2
!$omp   endparallel
end

Expected results:

 2 1 4 1
PeixinQiao commented 2 years ago

Test case 15: argument assocation

program main
  use omp_lib
  integer, save :: a
  !$omp threadprivate(a)
  call sub(a)
contains
  subroutine sub(a)
    integer :: a
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a
    !$omp end parallel
  end
end

Expected results:

 3 3
 3 0
 3 1
 3 2
 3
 3
 3
 3
kiranchandramohan commented 1 year ago

@PeixinQiao Can we migrate this to llvm-project issues? Let me know if you need help.

PeixinQiao commented 1 year ago

@kiranchandramohan Should I remove all the test cases (the whole issue) or unsupported test cases?

kiranchandramohan commented 1 year ago

I think you can file an isssue (or issues) for unsupported cases in llvm-project. You can add a link to this issue from the new issue or from the excel sheet where we track OpenMP issues.

PeixinQiao commented 1 year ago

I think you can file an isssue (or issues) for unsupported cases in llvm-project. You can add a link to this issue from the new issue or from the excel sheet where we track OpenMP issues.

OK. I will do it.