Open thomasmelvin opened 7 months ago
As an example if we consider the current implementation of a kenrel with metadata
type, public, extends(kernel_type) :: test_kernel_type
private
type(arg_type) :: meta_args(2) = (/ &
arg_type(GH_FIELD, GH_REAL, GH_WRITE, ANY_DISCONTINUOUS_SPACE_1), &
arg_type(GH_FIELD, GH_REAL, GH_READ, ANY_DISCONTINUOUS_SPACE_2, STENCIL(CROSS2D)) &
/)
integer :: operates_on = CELL_COLUMN
contains
procedure, nopass :: test_code
end type
and an algorithm level call
call invoke( name="test_kernel", &
test_kernel_type(new_field, old_field, stencil_depth) )
then the psy_layer generated code is
SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
USE test_kernel_mod, ONLY: test_code
USE mesh_mod, ONLY: mesh_type
USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
TYPE(field_type), intent(in) :: new_field, old_field
INTEGER(KIND=i_def), intent(in) :: stencil_depth
INTEGER(KIND=i_def) cell
INTEGER(KIND=i_def) loop0_start, loop0_stop
INTEGER(KIND=i_def) nlayers
TYPE(field_proxy_type) new_field_proxy, old_field_proxy
INTEGER(KIND=i_def), pointer :: map_adspc1_new_field(:,:) => null(), map_adspc2_old_field(:,:) => null()
INTEGER(KIND=i_def) ndf_adspc1_new_field, undf_adspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
INTEGER(KIND=i_def) max_halo_depth_mesh
TYPE(mesh_type), pointer :: mesh => null()
INTEGER(KIND=i_def) old_field_max_branch_length
INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
!
! Initialise field and/or operator proxies
!
new_field_proxy = new_field%get_proxy()
old_field_proxy = old_field%get_proxy()
!
! Initialise number of layers
!
nlayers = new_field_proxy%vspace%get_nlayers()
!
! Create a mesh object
!
mesh => new_field_proxy%vspace%get_mesh()
max_halo_depth_mesh = mesh%get_halo_depth()
!
! Initialise stencil dofmaps
!
old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
old_field_max_branch_length = stencil_depth + 1_i_def
old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
!
! Look-up dofmaps for each function space
!
map_adspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
!
! Initialise number of DoFs for adspc1_new_field
!
ndf_adspc1_new_field = new_field_proxy%vspace%get_ndf()
undf_adspc1_new_field = new_field_proxy%vspace%get_undf()
!
! Initialise number of DoFs for adspc2_old_field
!
ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
undf_adspc2_old_field = old_field_proxy%vspace%get_undf()
!
! Set-up all of the loop bounds
!
loop0_start = 1
loop0_stop = mesh%get_last_edge_cell()
!
! Call kernels and communication routines
!
IF (old_field_proxy%is_dirty(depth=stencil_depth)) THEN
CALL old_field_proxy%halo_exchange(depth=stencil_depth)
END IF
!
!$omp parallel default(shared), private(cell)
!$omp do schedule(static)
DO cell=loop0_start,loop0_stop
!
CALL test_code(nlayers, &
new_field_proxy%data, &
old_field_proxy%data, &
old_field_stencil_size(:,cell), &
old_field_max_branch_length, &
old_field_stencil_dofmap(:,:,:,cell), &
ndf_adspc1_new_field, &
undf_adspc1_new_field, &
map_adspc1_new_field(:,cell), &
ndf_adspc2_old_field, &
undf_adspc2_old_field, &
map_adspc2_old_field(:,cell))
END DO
!$omp end do
!$omp end parallel
!
! Set halos dirty/clean for fields modified in the above loop(s)
!
CALL new_field_proxy%set_dirty()
!
! End of set dirty/clean section for above loop(s)
!
!
END SUBROUTINE invoke_test_kernel
and the kernel argument list is
subroutine test_code( nlayers, &
new_field, &
old_field, &
smap_size, &
smap_max, &
smap, &
ndf1, &
undf1, &
map1, &
ndf2, &
undf2, &
map2 )
implicit none
! Arguments
integer(kind=i_def), intent(in) :: nlayers
integer(kind=i_def), intent(in) :: ndf1
integer(kind=i_def), intent(in) :: undf1
integer(kind=i_def), dimension(ndf1), intent(in) :: map1
integer(kind=i_def), intent(in) :: ndf2
integer(kind=i_def), intent(in) :: undf2
integer(kind=i_def), dimension(ndf2), intent(in) :: map2
integer(kind=i_def), intent(in) :: smap_max
integer(kind=i_def), dimension(4), intent(in) :: smap_size
integer(kind=i_def), dimension(ndf2, smap_max, 4), intent(in) :: smap
real(kind=r_def), dimension(undf1), intent(inout) :: new_field
real(kind=r_def), dimension(undf2), intent(in) :: old_field
Whilst with proposed changes the algorithm and metadata are unchanged but the generated psylayer code becomes
SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
USE test_kernel_mod, ONLY: test_code
USE mesh_mod, ONLY: mesh_type
USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
TYPE(field_type), intent(in) :: new_field, old_field
INTEGER(KIND=i_def), intent(in) :: stencil_depth
INTEGER(KIND=i_def) cell
INTEGER(KIND=i_def) loop0_start, loop0_stop
INTEGER(KIND=i_def) nlayers
TYPE(field_proxy_type) new_field_proxy, old_field_proxy
INTEGER(KIND=i_def), pointer :: map_adspc1_new_field(:,:) => null(), map_adspc2_old_field(:,:) => null()
INTEGER(KIND=i_def) ndf_adspc1_new_field, undf_adspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
INTEGER(KIND=i_def) max_halo_depth_mesh
TYPE(mesh_type), pointer :: mesh => null()
INTEGER(KIND=i_def) old_field_max_branch_length
INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
INTEGER(KIND=i_def) :: old_field_undf
!
! Initialise field and/or operator proxies
!
new_field_proxy = new_field%get_proxy()
old_field_proxy = old_field%get_proxy()
!
! Initialise number of layers
!
nlayers = new_field_proxy%vspace%get_nlayers()
!
! Create a mesh object
!
mesh => new_field_proxy%vspace%get_mesh()
max_halo_depth_mesh = mesh%get_halo_depth()
!
! Initialise stencil dofmaps
!
old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
old_field_max_branch_length = stencil_depth + 1_i_def
old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
!
! Look-up dofmaps for each function space
!
map_adspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
!
! Initialise number of DoFs for adspc1_new_field
!
ndf_adspc1_new_field = new_field_proxy%vspace%get_ndf()
undf_adspc1_new_field = new_field_proxy%vspace%get_last_dof_halo(1)
!
! Initialise number of DoFs for adspc2_old_field
!
ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
undf_adspc2_old_field = old_field_proxy%vspace%get_last_dof_halo(1)
undf_old_field = old_field_proxy%vspace%get_last_dof_halo(stencil_depth)
!
! Set-up all of the loop bounds
!
loop0_start = 1
loop0_stop = mesh%get_last_edge_cell()
!
! Call kernels and communication routines
!
IF (old_field_proxy%is_dirty(depth=stencil_depth)) THEN
CALL old_field_proxy%halo_exchange(depth=stencil_depth)
END IF
!
!$omp parallel default(shared), private(cell)
!$omp do schedule(static)
DO cell=loop0_start,loop0_stop
!
CALL test_code(nlayers, &
new_field_proxy%data, &
old_field_proxy%data, &
old_field_stencil_size(:,cell), &
old_field_max_branch_length, &
old_field_stencil_dofmap(:,:,:,cell), &
old_field_undf, &
ndf_adspc1_new_field, &
undf_adspc1_new_field, &
map_adspc1_new_field(:,cell), &
ndf_adspc2_old_field, &
undf_adspc2_old_field, &
map_adspc2_old_field(:,cell))
END DO
!$omp end do
!$omp end parallel
!
! Set halos dirty/clean for fields modified in the above loop(s)
!
CALL new_field_proxy%set_dirty()
!
! End of set dirty/clean section for above loop(s)
!
!
END SUBROUTINE invoke_test_kernel
and the kernel arguement list becomes
subroutine test_code( nlayers, &
new_field, &
old_field, &
smap_size, &
smap_max, &
smap, &
old_field_undf, &
ndf1, &
undf1, &
map1, &
ndf2, &
undf2, &
map2 )
implicit none
! Arguments
integer(kind=i_def), intent(in) :: nlayers
integer(kind=i_def), intent(in) :: ndf1
integer(kind=i_def), intent(in) :: undf1
integer(kind=i_def), dimension(ndf1), intent(in) :: map1
integer(kind=i_def), intent(in) :: ndf2
integer(kind=i_def), intent(in) :: undf2
integer(kind=i_def), dimension(ndf2), intent(in) :: map2
integer(kind=i_def) :: old_field_undf
integer(kind=i_def), intent(in) :: smap_max
integer(kind=i_def), dimension(4), intent(in) :: smap_size
integer(kind=i_def), dimension(ndf2, smap_max, 4), intent(in) :: smap
real(kind=r_def), dimension(undf1), intent(inout) :: new_field
real(kind=r_def), dimension(old_field_undf), intent(in) :: old_field
And if the metdata was for GH_INC
:
type, public, extends(kernel_type) :: test_kernel_type
private
type(arg_type) :: meta_args(2) = (/ &
arg_type(GH_FIELD, GH_REAL, GH_INC, ANY_SPACE_1), &
arg_type(GH_FIELD, GH_REAL, GH_READ, ANY_DISCONTINUOUS_SPACE_2, STENCIL(CROSS2D)) &
/)
integer :: operates_on = CELL_COLUMN
contains
procedure, nopass :: test_code
end type
the psycode would be
SUBROUTINE invoke_test_kernel(new_field, old_field, stencil_depth)
USE test_kernel_mod, ONLY: test_code
USE mesh_mod, ONLY: mesh_type
USE stencil_2D_dofmap_mod, ONLY: stencil_2D_dofmap_type, STENCIL_2D_CROSS
TYPE(field_type), intent(in) :: new_field, old_field
INTEGER(KIND=i_def), intent(in) :: stencil_depth
INTEGER colour
INTEGER(KIND=i_def) cell
INTEGER(KIND=i_def) loop1_start
INTEGER(KIND=i_def) loop0_start, loop0_stop
INTEGER(KIND=i_def) nlayers
TYPE(field_proxy_type) new_field_proxy, old_field_proxy
INTEGER(KIND=i_def), pointer :: map_adspc2_old_field(:,:) => null(), map_aspc1_new_field(:,:) => null()
INTEGER(KIND=i_def) ndf_aspc1_new_field, undf_aspc1_new_field, ndf_adspc2_old_field, undf_adspc2_old_field
INTEGER(KIND=i_def), allocatable :: last_halo_cell_all_colours(:,:)
INTEGER(KIND=i_def) ncolour
INTEGER(KIND=i_def), pointer :: cmap(:,:)
INTEGER(KIND=i_def) max_halo_depth_mesh
TYPE(mesh_type), pointer :: mesh => null()
INTEGER(KIND=i_def) old_field_max_branch_length
INTEGER(KIND=i_def), pointer :: old_field_stencil_size(:,:) => null()
INTEGER(KIND=i_def), pointer :: old_field_stencil_dofmap(:,:,:,:) => null()
TYPE(stencil_2D_dofmap_type), pointer :: old_field_stencil_map => null()
INTEGER(KIND=i_def) :: old_field_undf
!
! Initialise field and/or operator proxies
!
new_field_proxy = new_field%get_proxy()
old_field_proxy = old_field%get_proxy()
!
! Initialise number of layers
!
nlayers = new_field_proxy%vspace%get_nlayers()
!
! Create a mesh object
!
mesh => new_field_proxy%vspace%get_mesh()
max_halo_depth_mesh = mesh%get_halo_depth()
!
! Get the colourmap
!
ncolour = mesh%get_ncolours()
cmap => mesh%get_colour_map()
!
! Initialise stencil dofmaps
!
old_field_stencil_map => old_field_proxy%vspace%get_stencil_2D_dofmap(STENCIL_2D_CROSS,stencil_depth)
old_field_max_branch_length = stencil_depth + 1_i_def
old_field_stencil_dofmap => old_field_stencil_map%get_whole_dofmap()
old_field_stencil_size => old_field_stencil_map%get_stencil_sizes()
!
! Look-up dofmaps for each function space
!
map_aspc1_new_field => new_field_proxy%vspace%get_whole_dofmap()
map_adspc2_old_field => old_field_proxy%vspace%get_whole_dofmap()
!
! Initialise number of DoFs for aspc1_new_field
!
ndf_aspc1_new_field = new_field_proxy%vspace%get_ndf()
undf_aspc1_new_field = new_field_proxy%vspace%get_last_dof_halo(1)
!
! Initialise number of DoFs for adspc2_old_field
!
ndf_adspc2_old_field = old_field_proxy%vspace%get_ndf()
undf_adspc2_old_field = old_field_proxy%vspace%get_last_dof_halo(1)
old_field_undf = old_field_proxy%vspace%get_last_dof_halo(stencil_depth + 1)
!
! Initialise mesh properties
!
last_halo_cell_all_colours = mesh%get_last_halo_cell_all_colours()
!
! Set-up all of the loop bounds
!
loop0_start = 1
loop0_stop = ncolour
loop1_start = 1
!
! Call kernels and communication routines
!
IF (old_field_proxy%is_dirty(depth=stencil_depth+1)) THEN
CALL old_field_proxy%halo_exchange(depth=stencil_depth+1)
END IF
!
DO colour=loop0_start,loop0_stop
!$omp parallel default(shared), private(cell)
!$omp do schedule(static)
DO cell=loop1_start,last_halo_cell_all_colours(colour,1)
!
CALL test_code(nlayers, &
new_field_proxy%data, &
old_field_proxy%data, &
old_field_stencil_size(:,cmap(colour, cell)), &
old_field_max_branch_length, &
old_field_stencil_dofmap(:,:,:,cmap(colour, cell)), &
old_field_undf, &
ndf_aspc1_new_field, &
undf_aspc1_new_field, &
map_aspc1_new_field(:,cmap(colour, cell)), &
ndf_adspc2_old_field, &
undf_adspc2_old_field, &
map_adspc2_old_field(:,cmap(colour, cell)))
END DO
!$omp end do
!$omp end parallel
END DO
!
! Set halos dirty/clean for fields modified in the above loop
!
CALL new_field_proxy%set_dirty()
!
!
END SUBROUTINE invoke_test_kernel
and the kernel remains unchanged
LFRic #4055: [https://code.metoffice.gov.uk/trac/lfric/ticket/4055] introduces a method to have fields of different sized haloes. It does this by changing the default size of field arrays from
vector_space%get_last_dof_halo()
tovector_space%get_last_dof_halo(field_halo_depth)
wherefield_halo_depth
defaults to 1 unless it is passed in the field constructor as an optional argument. The result of this is that most field have a data array that is smaller then it previously was and importantly is less thanvector_space%get_undf()
.Currently the lfric api specifies that if a kernel metadata specifies a
GH_FIELD
is used thenundf
is also passed in from the function space. The propsosed solution is two foldundf = field_proxy%vspace%get_undf()
we useundf = field_proxy%vspace%get_last_dof_halo(1)
undf_stencil_field = stencil_field_proxy%vspace%get_last_dof_halo(depth)
wheredepth=stencil_depth
forGH_READ
kernels ordepth=stencil_depth+1
forGH_INC
kernels andstencil_depth
is the algorithm level argument defining the stencil depth