stfc / PSyclone

Domain-specific compiler and code transformation system for Finite Difference/Volume/Element Earth-system models in Fortran
BSD 3-Clause "New" or "Revised" License
103 stars 28 forks source link

Change to API for get_undf to facilitate different sized haloes #2502

Open thomasmelvin opened 7 months ago

thomasmelvin commented 7 months ago

LFRic #4055: [https://code.metoffice.gov.uk/trac/lfric/ticket/4055] introduces a method to have fields of different sized haloes. It does this by changing the default size of field arrays from vector_space%get_last_dof_halo() to vector_space%get_last_dof_halo(field_halo_depth) where field_halo_depth defaults to 1 unless it is passed in the field constructor as an optional argument. The result of this is that most field have a data array that is smaller then it previously was and importantly is less than vector_space%get_undf().

Currently the lfric api specifies that if a kernel metadata specifies a GH_FIELD is used then undf is also passed in from the function space. The propsosed solution is two fold

  1. instead of undf = field_proxy%vspace%get_undf() we use undf = field_proxy%vspace%get_last_dof_halo(1)
  2. If a stencil argument is used we also pass in undf_stencil_field = stencil_field_proxy%vspace%get_last_dof_halo(depth) where depth=stencil_depth for GH_READ kernels or depth=stencil_depth+1 for GH_INC kernels and stencil_depth is the algorithm level argument defining the stencil depth
thomasmelvin commented 7 months ago

As an example if we consider the current implementation of a kenrel with metadata

type, public, extends(kernel_type) :: test_kernel_type
  private
  type(arg_type) :: meta_args(2) = (/                                                       &
       arg_type(GH_FIELD,  GH_REAL, GH_WRITE, ANY_DISCONTINUOUS_SPACE_1),                   &
       arg_type(GH_FIELD,  GH_REAL, GH_READ,  ANY_DISCONTINUOUS_SPACE_2, STENCIL(CROSS2D))  &
       /)
  integer :: operates_on = CELL_COLUMN
contains
  procedure, nopass :: test_code
end type

and an algorithm level call

    call invoke( name="test_kernel", &
                 test_kernel_type(new_field, old_field, stencil_depth) )

then the psy_layer generated code is

    SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
      USE test_kernel_mod, ONLY: test_code
      USE mesh_mod, ONLY: mesh_type
      USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
      TYPE(field_type), intent(in) :: new_field, old_field
      INTEGER(KIND=i_def), intent(in) :: stencil_depth
      INTEGER(KIND=i_def) cell
      INTEGER(KIND=i_def) loop0_start, loop0_stop
      INTEGER(KIND=i_def) nlayers
      TYPE(field_proxy_type) new_field_proxy, old_field_proxy
      INTEGER(KIND=i_def), pointer :: map_adspc1_new_field(:,:) => null(), map_adspc2_old_field(:,:) => null()
      INTEGER(KIND=i_def) ndf_adspc1_new_field, undf_adspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
      INTEGER(KIND=i_def) max_halo_depth_mesh
      TYPE(mesh_type), pointer :: mesh => null()
      INTEGER(KIND=i_def) old_field_max_branch_length
      INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
      INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
      TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
      !
      ! Initialise field and/or operator proxies
      !
      new_field_proxy = new_field%get_proxy()
      old_field_proxy = old_field%get_proxy()
      !
      ! Initialise number of layers
      !
      nlayers = new_field_proxy%vspace%get_nlayers()
      !
      ! Create a mesh object
      !
      mesh => new_field_proxy%vspace%get_mesh()
      max_halo_depth_mesh = mesh%get_halo_depth()
      !
      ! Initialise stencil dofmaps
      !
      old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
      old_field_max_branch_length = stencil_depth + 1_i_def
      old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
      old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
      !
      ! Look-up dofmaps for each function space
      !
      map_adspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
      map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
      !
      ! Initialise number of DoFs for adspc1_new_field
      !
      ndf_adspc1_new_field = new_field_proxy%vspace%get_ndf()
      undf_adspc1_new_field = new_field_proxy%vspace%get_undf()
      !
      ! Initialise number of DoFs for adspc2_old_field
      !
      ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
      undf_adspc2_old_field = old_field_proxy%vspace%get_undf()
      !
      ! Set-up all of the loop bounds
      !
      loop0_start = 1
      loop0_stop = mesh%get_last_edge_cell()
      !
      ! Call kernels and communication routines
      !
      IF (old_field_proxy%is_dirty(depth=stencil_depth)) THEN
        CALL old_field_proxy%halo_exchange(depth=stencil_depth)
      END IF
      !
      !$omp parallel default(shared), private(cell)
      !$omp do schedule(static)
      DO cell=loop0_start,loop0_stop
        !
        CALL test_code(nlayers, &
                       new_field_proxy%data, &
                       old_field_proxy%data,  &
                       old_field_stencil_size(:,cell),  &
                       old_field_max_branch_length, &
                       old_field_stencil_dofmap(:,:,:,cell), &
                       ndf_adspc1_new_field, &
                       undf_adspc1_new_field, &
                       map_adspc1_new_field(:,cell), &
                       ndf_adspc2_old_field, &
                       undf_adspc2_old_field, &
                       map_adspc2_old_field(:,cell))
      END DO
      !$omp end do
      !$omp end parallel
      !
      ! Set halos dirty/clean for fields modified in the above loop(s)
      !
      CALL new_field_proxy%set_dirty()
      !
      ! End of set dirty/clean section for above loop(s)
      !
      !
    END SUBROUTINE invoke_test_kernel

and the kernel argument list is

subroutine test_code( nlayers,   &
                      new_field, &
                      old_field, &
                      smap_size, &
                      smap_max,  &
                      smap,      &
                      ndf1,      &
                      undf1,     &
                      map1,      &
                      ndf2,      &
                      undf2,     &
                      map2 )

  implicit none

  ! Arguments
  integer(kind=i_def), intent(in)                  :: nlayers
  integer(kind=i_def), intent(in)                  :: ndf1
  integer(kind=i_def), intent(in)                  :: undf1
  integer(kind=i_def), dimension(ndf1), intent(in) :: map1
  integer(kind=i_def), intent(in)                  :: ndf2
  integer(kind=i_def), intent(in)                  :: undf2
  integer(kind=i_def), dimension(ndf2), intent(in) :: map2

  integer(kind=i_def),                               intent(in) :: smap_max
  integer(kind=i_def), dimension(4),                 intent(in) :: smap_size
  integer(kind=i_def), dimension(ndf2, smap_max, 4), intent(in) :: smap

  real(kind=r_def), dimension(undf1), intent(inout) :: new_field
  real(kind=r_def), dimension(undf2), intent(in)    :: old_field
thomasmelvin commented 7 months ago

Whilst with proposed changes the algorithm and metadata are unchanged but the generated psylayer code becomes

    SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
      USE test_kernel_mod, ONLY: test_code
      USE mesh_mod, ONLY: mesh_type
      USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
      TYPE(field_type), intent(in) :: new_field, old_field
      INTEGER(KIND=i_def), intent(in) :: stencil_depth
      INTEGER(KIND=i_def) cell
      INTEGER(KIND=i_def) loop0_start, loop0_stop
      INTEGER(KIND=i_def) nlayers
      TYPE(field_proxy_type) new_field_proxy, old_field_proxy
      INTEGER(KIND=i_def), pointer :: map_adspc1_new_field(:,:) => null(), map_adspc2_old_field(:,:) => null()
      INTEGER(KIND=i_def) ndf_adspc1_new_field, undf_adspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
      INTEGER(KIND=i_def) max_halo_depth_mesh
      TYPE(mesh_type), pointer :: mesh => null()
      INTEGER(KIND=i_def) old_field_max_branch_length
      INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
      INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
      TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
      INTEGER(KIND=i_def) :: old_field_undf
      !
      ! Initialise field and/or operator proxies
      !
      new_field_proxy = new_field%get_proxy()
      old_field_proxy = old_field%get_proxy()
      !
      ! Initialise number of layers
      !
      nlayers = new_field_proxy%vspace%get_nlayers()
      !
      ! Create a mesh object
      !
      mesh => new_field_proxy%vspace%get_mesh()
      max_halo_depth_mesh = mesh%get_halo_depth()
      !
      ! Initialise stencil dofmaps
      !
      old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
      old_field_max_branch_length = stencil_depth + 1_i_def
      old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
      old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
      !
      ! Look-up dofmaps for each function space
      !
      map_adspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
      map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
      !
      ! Initialise number of DoFs for adspc1_new_field
      !
      ndf_adspc1_new_field = new_field_proxy%vspace%get_ndf()
      undf_adspc1_new_field = new_field_proxy%vspace%get_last_dof_halo(1)
      !
      ! Initialise number of DoFs for adspc2_old_field
      !
      ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
      undf_adspc2_old_field = old_field_proxy%vspace%get_last_dof_halo(1)
      undf_old_field = old_field_proxy%vspace%get_last_dof_halo(stencil_depth)
      !
      ! Set-up all of the loop bounds
      !
      loop0_start = 1
      loop0_stop = mesh%get_last_edge_cell()
      !
      ! Call kernels and communication routines
      !
      IF (old_field_proxy%is_dirty(depth=stencil_depth)) THEN
        CALL old_field_proxy%halo_exchange(depth=stencil_depth)
      END IF
      !
      !$omp parallel default(shared), private(cell)
      !$omp do schedule(static)
      DO cell=loop0_start,loop0_stop
        !
        CALL test_code(nlayers, &
                       new_field_proxy%data,  &
                       old_field_proxy%data,  &
                       old_field_stencil_size(:,cell),  &
                       old_field_max_branch_length, &
                       old_field_stencil_dofmap(:,:,:,cell),  &
                       old_field_undf, &
                       ndf_adspc1_new_field,  &
                       undf_adspc1_new_field,  &
                       map_adspc1_new_field(:,cell),  &
                       ndf_adspc2_old_field,  &
                       undf_adspc2_old_field, &
                       map_adspc2_old_field(:,cell))
      END DO
      !$omp end do
      !$omp end parallel
      !
      ! Set halos dirty/clean for fields modified in the above loop(s)
      !
      CALL new_field_proxy%set_dirty()
      !
      ! End of set dirty/clean section for above loop(s)
      !
      !
    END SUBROUTINE invoke_test_kernel

and the kernel arguement list becomes

subroutine test_code( nlayers,   &
                      new_field, &
                      old_field, &
                      smap_size, &
                      smap_max,  &
                      smap,      &
                      old_field_undf, &
                      ndf1,      &
                      undf1,     &
                      map1,      &
                      ndf2,      &
                      undf2,     &
                      map2 )

  implicit none

  ! Arguments
  integer(kind=i_def), intent(in)                  :: nlayers
  integer(kind=i_def), intent(in)                  :: ndf1
  integer(kind=i_def), intent(in)                  :: undf1
  integer(kind=i_def), dimension(ndf1), intent(in) :: map1
  integer(kind=i_def), intent(in)                  :: ndf2
  integer(kind=i_def), intent(in)                  :: undf2
  integer(kind=i_def), dimension(ndf2), intent(in) :: map2
  integer(kind=i_def)                              :: old_field_undf

  integer(kind=i_def),                               intent(in) :: smap_max
  integer(kind=i_def), dimension(4),                 intent(in) :: smap_size
  integer(kind=i_def), dimension(ndf2, smap_max, 4), intent(in) :: smap

  real(kind=r_def), dimension(undf1),          intent(inout) :: new_field
  real(kind=r_def), dimension(old_field_undf), intent(in)    :: old_field
thomasmelvin commented 7 months ago

And if the metdata was for GH_INC:

type, public, extends(kernel_type) :: test_kernel_type
  private
  type(arg_type) :: meta_args(2) = (/                                                      &
       arg_type(GH_FIELD,  GH_REAL, GH_INC,   ANY_SPACE_1),                                &
       arg_type(GH_FIELD,  GH_REAL, GH_READ,  ANY_DISCONTINUOUS_SPACE_2, STENCIL(CROSS2D)) &
       /)
  integer :: operates_on = CELL_COLUMN
contains
  procedure, nopass :: test_code
end type

the psycode would be

    SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
      USE test_kernel_mod, ONLY: test_code
      USE mesh_mod, ONLY: mesh_type
      USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
      TYPE(field_type), intent(in) :: new_field, old_field
      INTEGER(KIND=i_def), intent(in) :: stencil_depth
      INTEGER colour
      INTEGER(KIND=i_def) cell
      INTEGER(KIND=i_def) loop1_start
      INTEGER(KIND=i_def) loop0_start, loop0_stop
      INTEGER(KIND=i_def) nlayers
      TYPE(field_proxy_type) new_field_proxy, old_field_proxy
      INTEGER(KIND=i_def), pointer :: map_adspc2_old_field(:,:) => null(), map_aspc1_new_field(:,:) => null()
      INTEGER(KIND=i_def) ndf_aspc1_new_field, undf_aspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
      INTEGER(KIND=i_def), allocatable :: last_halo_cell_all_colours(:,:)
      INTEGER(KIND=i_def) ncolour
      INTEGER(KIND=i_def), pointer :: cmap(:,:)
      INTEGER(KIND=i_def) max_halo_depth_mesh
      TYPE(mesh_type), pointer :: mesh => null()
      INTEGER(KIND=i_def) old_field_max_branch_length
      INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
      INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
      TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
      INTEGER(KIND=i_def) :: old_field_undf
      !
      ! Initialise field and/or operator proxies
      !
      new_field_proxy = new_field%get_proxy()
      old_field_proxy = old_field%get_proxy()
      !
      ! Initialise number of layers
      !
      nlayers = new_field_proxy%vspace%get_nlayers()
      !
      ! Create a mesh object
      !
      mesh => new_field_proxy%vspace%get_mesh()
      max_halo_depth_mesh = mesh%get_halo_depth()
      !
      ! Get the colourmap
      !
      ncolour = mesh%get_ncolours()
      cmap => mesh%get_colour_map()
      !
      ! Initialise stencil dofmaps
      !
      old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
      old_field_max_branch_length = stencil_depth + 1_i_def
      old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
      old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
      !
      ! Look-up dofmaps for each function space
      !
      map_aspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
      map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
      !
      ! Initialise number of DoFs for aspc1_new_field
      !
      ndf_aspc1_new_field = new_field_proxy%vspace%get_ndf()
      undf_aspc1_new_field = new_field_proxy%vspace%get_last_dof_halo(1)
      !
      ! Initialise number of DoFs for adspc2_old_field
      !
      ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
      undf_adspc2_old_field = old_field_proxy%vspace%get_last_dof_halo(1)
      old_field_undf = old_field_proxy%vspace%get_last_dof_halo(stencil_depth + 1)
      !
      ! Initialise mesh properties
      !
      last_halo_cell_all_colours = mesh%get_last_halo_cell_all_colours()
      !
      ! Set-up all of the loop bounds
      !
      loop0_start = 1
      loop0_stop = ncolour
      loop1_start = 1
      !
      ! Call kernels and communication routines
      !
      IF (old_field_proxy%is_dirty(depth=stencil_depth+1)) THEN
        CALL old_field_proxy%halo_exchange(depth=stencil_depth+1)
      END IF
      !
      DO colour=loop0_start,loop0_stop
        !$omp parallel default(shared), private(cell)
        !$omp do schedule(static)
        DO cell=loop1_start,last_halo_cell_all_colours(colour,1)
          !
          CALL test_code(nlayers, &
                         new_field_proxy%data,  &
                         old_field_proxy%data,  &
                         old_field_stencil_size(:,cmap(colour, cell)),  &
                         old_field_max_branch_length, &
                         old_field_stencil_dofmap(:,:,:,cmap(colour, cell)),  &
                         old_field_undf, &
                         ndf_aspc1_new_field,  &
                         undf_aspc1_new_field,  &
                         map_aspc1_new_field(:,cmap(colour, cell)), &
                         ndf_adspc2_old_field,  &
                         undf_adspc2_old_field,  &
                         map_adspc2_old_field(:,cmap(colour, cell)))
        END DO
        !$omp end do
        !$omp end parallel
      END DO
      !
      ! Set halos dirty/clean for fields modified in the above loop
      !
      CALL new_field_proxy%set_dirty()
      !
      !
    END SUBROUTINE invoke_test_kernel

and the kernel remains unchanged