We tried many changes in the file, but many of them were not effective, as this code is a coded implementation of a physical formula.
So we are trying to use parallel computing to accelerate its running speed.
We added "$ acc loop" before most of the loops to run code in parallel.
Secondly, we found that the argsort function is essentially a selection sorting algorithm with high complexity. We changed it to a fast sorting algorithm to reduce its time complexity, in order to achieve optimization goals.
Primitive function:
pure function argsort(a) result(b)
implicit none
real(rk8) , intent(in) :: a(:)
integer(ik4) , dimension(size(a)) :: b
integer :: n , i , imin , temp1
real(rk8) :: temp2
real(rk8) , dimension(size(a)) :: a2
a2 = a
n = size(a)
do i = 1 , n
b(i) = i
end do
if ( n == 1 ) return
do i = 1 , n-1
imin = minloc(a2(i:),1) + i - 1
if ( imin /= i ) then
temp2 = a2(i)
a2(i) = a2(imin)
a2(imin) = temp2
temp1 = b(i)
b(i) = b(imin)
b(imin) = temp1
end if
end do
end function argsort
Modified function:
pure function argsort(a) result(b)
implicit none
real(rk8), intent(in) :: a(:)
integer(ik4), dimension(size(a)) :: b
integer :: n
n = size(a)
b = [(i, i=1, n)]
call quickSort(a, b, 1, n)
end function argsort
recursive subroutine quickSort(a, b, low, high)
implicit none
real(rk8), intent(in) :: a(:)
integer, intent(inout) :: b(:)
integer, intent(in) :: low, high
integer :: pi
if (low < high) then
pi = partition(a, b, low, high)
call quickSort(a, b, low, pi - 1)
call quickSort(a, b, pi + 1, high)
end if
end subroutine quickSort
function partition(a, b, low, high) result(pi)
implicit none
real(rk8), intent(in) :: a(:)
integer, intent(inout) :: b(:)
integer, intent(in) :: low, high
integer :: pi, i, j, temp
real(rk8) :: pivot, temp2
pivot = a(b(high))
i = low - 1
do j = low, high - 1
if (a(b(j)) <= pivot) then
i = i + 1
temp = b(i)
b(i) = b(j)
b(j) = temp
end if
end do
temp = b(i + 1)
b(i + 1) = b(high)
b(high) = temp
pi = i + 1
end function partition
We tried many changes in the file, but many of them were not effective, as this code is a coded implementation of a physical formula. So we are trying to use parallel computing to accelerate its running speed. We added "$ acc loop" before most of the loops to run code in parallel.
Secondly, we found that the argsort function is essentially a selection sorting algorithm with high complexity. We changed it to a fast sorting algorithm to reduce its time complexity, in order to achieve optimization goals. Primitive function: pure function argsort(a) result(b) implicit none real(rk8) , intent(in) :: a(:) integer(ik4) , dimension(size(a)) :: b integer :: n , i , imin , temp1 real(rk8) :: temp2 real(rk8) , dimension(size(a)) :: a2 a2 = a n = size(a) do i = 1 , n b(i) = i end do if ( n == 1 ) return do i = 1 , n-1 imin = minloc(a2(i:),1) + i - 1 if ( imin /= i ) then temp2 = a2(i) a2(i) = a2(imin) a2(imin) = temp2 temp1 = b(i) b(i) = b(imin) b(imin) = temp1 end if end do end function argsort
Modified function: pure function argsort(a) result(b) implicit none real(rk8), intent(in) :: a(:) integer(ik4), dimension(size(a)) :: b integer :: n
end subroutine nogtom