Open willirath opened 3 years ago
Hi Willi, I agree, the monthly means from 5d output do not make much sense. That's why I tried to improve cdfmoy_freq
a while ago (based on CDFTOOLS-3.0) by taking into account the part of the 7th bin that contributes to Jan. and the part of that bin that contributes to Feb. etc... I attach the code below (sorry, that was before I was using git...). This is still not a real monthly mean as I boldly state in my comments to the code, but I guess as close as you can get from 5d output.
PROGRAM cdfmoy_freq_realm
!!======================================================================
!! *** PROGRAM cdfmoy_freq_realm
!!=====================================================================
!! ** Purpose : Mainly in case of forcing file (gathered as yearly file)
!! compute annual mean, monthl mean or diurnal means.
!!
!! ** Method : Detect the frequency of the input file according to the
!! number of fields in the file.
!!
!! History : 2.1 : 06/2007 : P. Mathiot : Original code from cdfmoy
!! 3.0 : 06/2011 : J.M. Molines : Doctor norm + Lic.
!! 3.0 : 10/2011 : P. Mathiot : Add seasonal option and
!! allow file with 73 time steps
!! : 05/2015 : J.M. Molines : Rewrite to be compliant with XIOS
!! : 11/2016 : J.K. Rieck : Compute real monthly means
!!----------------------------------------------------------------------
USE cdfio
USE modcdfnames
!!----------------------------------------------------------------------
!! CDFTOOLS_3.0 , MEOM 2011
!! $Id: cdfmoy_freq.f90 853 2015-09-15 18:08:02Z molines $
!! Copyright (c) 2011, J.-M. Molines
!! Software governed by the CeCILL licence (Licence/CDFTOOLSCeCILL.txt)
!!----------------------------------------------------------------------
IMPLICIT NONE
INTEGER(KIND=4) :: jk, jvar ! dummy loop index
INTEGER(KIND=4) :: jv, jtt ! dummy loop index
INTEGER(KIND=4) :: jframe ! dummy loop index
INTEGER(KIND=4) :: it1, it2 ! box limits
INTEGER(KIND=4) :: ierr, itt ! working integer
INTEGER(KIND=4) :: narg, iargc !
INTEGER(KIND=4) :: ijarg, ijm, njmj
INTEGER(KIND=4) :: npiglo, npjglo ! size of the domain
INTEGER(KIND=4) :: npk ,npt ! size of the domain
INTEGER(KIND=4) :: ip, nf ! working integer
INTEGER(KIND=4) :: nvars ! Number of variables in a file
INTEGER(KIND=4) :: nframes ! Number of frames in the output file
INTEGER(KIND=4) :: ndyr, nhyr ! Days and hours per year
INTEGER(KIND=4) :: nhfri ! input freq in hours
INTEGER(KIND=4) :: ncout, ncout2
INTEGER(KIND=4), DIMENSION( 12) :: njm ! month vector
INTEGER(KIND=4), DIMENSION(:), ALLOCATABLE :: id_var, ipk, id_varout, ibox
REAL(KIND=4), DIMENSION(:), ALLOCATABLE :: iboxr
REAL(KIND=4), DIMENSION( 12) :: njmr ! month vector
REAL(KIND=4), DIMENSION(:), ALLOCATABLE :: rest
REAL(KIND=4) :: rest1, restr, rest11
REAL(KIND=4) :: rest2
REAL(KIND=4) :: nhfrir
REAL(KIND=4) :: nhfrd ! input freq in days
REAL(KIND=4), DIMENSION(:,:), ALLOCATABLE :: v2d, rmean
REAL(KIND=4), DIMENSION(:,:,:), ALLOCATABLE :: v3d
REAL(KIND=4), DIMENSION(:,:,:,:), ALLOCATABLE :: v4d
REAL(KIND=4), DIMENSION(:), ALLOCATABLE :: time, time_mean
REAL(KIND=8), DIMENSION(:,:), ALLOCATABLE :: dtab ! Arrays for cumulated values
REAL(KIND=8) :: dtotal_time
CHARACTER(LEN=256) :: cf_in !
CHARACTER(LEN=256) :: cf_out='cdfmoy_' ! file name
CHARACTER(LEN=256) :: cv_dep
CHARACTER(LEN=256) :: cfreq_o ! output frequency
CHARACTER(LEN=256) :: cldum ! dummy character arguments
CHARACTER(LEN=256), DIMENSION(:), ALLOCATABLE :: cv_names ! array of var nam
CHARACTER(LEN=2 ), DIMENSION(4) :: cfreq_a=(/'h ','d ','mo','y '/) ! authorized keys for frequency specif
CHARACTER(LEN=2 ) :: cfr_id ! current output freq id
TYPE (variable), DIMENSION(:), ALLOCATABLE :: stypvar
LOGICAL :: lcaltmean=.TRUE.
LOGICAL :: lleap
LOGICAL :: lerr
LOGICAL :: lnc4 = .FALSE.
LOGICAL :: lv3d = .FALSE.
LOGICAL :: lv4d = .FALSE.
!!----------------------------------------------------------------------
CALL ReadCdfNames()
narg= iargc()
IF ( narg == 0 ) THEN
PRINT *,' usage : cdfmoy_freq -i IN-file -f averaging-length [ -v3d] [-v4d] '
PRINT *,' [-nc4] [-o output root] '
PRINT *,' '
PRINT *,' PURPOSE :'
PRINT *,' This program takes a file covering 1 year of data (evenly spaced)'
PRINT *,' and sub-samples the data by performing box averages, which span is given'
PRINT *,' as argument. The original data sampling can be hours, days or monthes '
PRINT *,' or even seasons.'
PRINT *,' The program recognizes leap years, and when feb. 29 is found, it is '
PRINT *,' included in the current ''box'' (averaging length is thus increased'
PRINT *,' by 1 day.'
PRINT *,' '
PRINT *,' ARGUMENTS :'
PRINT *,' -i IN-file : gives the name of the yearly file containing either 365 '
PRINT *,' or 366 days'
PRINT *,' -f averaging-length : Set the time size of the averaging box. '
PRINT *,' Averaging length is specified using XIOS convention (e.g. 1d,'
PRINT *,' 5d, 1mo, 1y ; 4mo stands for seasonal means )'
PRINT *,' '
PRINT *,' OPTIONS :'
PRINT *,' [-v3d] : use 3d variable (x,y,t) : save execution time, increase memory'
PRINT *,' [-v4d] : use 4d variable (x,y,z,t): save execution time, increase memory'
PRINT *,' [-nc4] : use netcdf4 with chunking and deflation for the output file'
PRINT *,' [-o output_root] : specify the root of the output file name instead '
PRINT *,' of ',TRIM(cf_out),'. Final name will have <freq> appened'
PRINT *,' to the root.'
PRINT *,' '
PRINT *,' REQUIRED FILES :'
PRINT *,' none.'
PRINT *,' '
PRINT *,' OUTPUT : '
PRINT *,' netcdf file : cdfmoy_output<freq>.nc'
PRINT *,' variables : same as variables in input file.'
PRINT *,' '
PRINT *,' SEE ALSO :'
PRINT *,' cdfmoy, cdfmoy_weighted'
PRINT *,' '
STOP
ENDIF
! parse command line
ijarg = 1
DO WHILE ( ijarg <= narg )
CALL getarg(ijarg, cldum) ; ijarg = ijarg +1
SELECT CASE ( cldum )
CASE ( '-i' ) ; CALL getarg(ijarg, cf_in ) ; ijarg = ijarg + 1
CASE ( '-f' ) ; CALL getarg(ijarg, cfreq_o ) ; ijarg = ijarg + 1
CASE ( '-o' ) ; CALL getarg(ijarg, cf_out ) ; ijarg = ijarg + 1
CASE ( '-nc4' ) ; lnc4=.TRUE.
CASE ( '-v3d' ) ; lv3d=.TRUE.
CASE ( '-v4d' ) ; lv4d=.TRUE.
CASE DEFAULT
PRINT *,' +++ ERROR : Option ', TRIM(cldum) ,' not understood !'
STOP 1
END SELECT
END DO
IF ( chkfile ( cf_in ) ) STOP ! missing file
! parse the cfreqo to determine the output frequency.
! Allowed syntax is nf<cfr_id> where nf is an integer >0, <cfr_id> is h, d, mo or y
cfr_id='--'
DO jk =1, 4
ip=INDEX(cfreq_o, cfreq_a(jk) )
IF ( ip /= 0 ) THEN
cfr_id=cfreq_o(ip:)
READ(cfreq_o(1:ip-1), *) nf
EXIT
ENDIF
ENDDO
SELECT CASE ( cfr_id )
CASE ( '--' )
PRINT *, ' +++ ERROR : Cannot determine the output frequency'
PRINT *, ' You should use a character string such as 6h, 5d, 1mo, 1y '
CASE ( 'd' )
IF ( nf /= 1 .AND. nf/=5 ) THEN
PRINT *, ' +++ ERROR : only 1d or 5d are acceptable !'
STOP
ENDIF
CASE ( 'y' )
IF ( nf > 1 ) THEN
PRINT *, ' +++ ERROR : Cannot have output freq > 1 y !'
STOP
ENDIF
END SELECT
! get domain size from the input file
npiglo= getdim (cf_in, cn_x )
npjglo= getdim (cf_in, cn_y )
npk = getdim (cf_in, cn_z, cdtrue=cv_dep, kstatus=ierr)
IF (ierr /= 0 ) THEN
npk = getdim (cf_in,'z',cdtrue=cv_dep,kstatus=ierr)
IF (ierr /= 0 ) THEN
npk = getdim (cf_in,'sigma',cdtrue=cv_dep,kstatus=ierr)
IF ( ierr /= 0 ) THEN
PRINT *,' assume file with no depth'
npk=0
ENDIF
ENDIF
ENDIF
npt = getdim (cf_in, cn_t)
! Now look at input file and try to look for input frequency
! Note that we know that the file contains either 365d or 366 days of data
! We suppose that input file freq is a multiple of 1 hour.
lleap = .FALSE.
ndyr = 365 ! number of days per year
nhyr = ndyr*24 ! number of hours per year
IF ( MOD( nhyr, npt ) /= 0 ) THEN
ndyr = 366 ! try leap year
nhyr = ndyr*24 ! number of hours per leap year
IF ( MOD( nhyr, npt ) /= 0 ) THEN
PRINT *," +++ ERROR : npt do not fit in 365 nor 366 days "
STOP
ELSE
lleap=.TRUE.
ENDIF
ENDIF
nhfri = 24*ndyr/npt ! input frequency in hours
nhfrd = ndyr/npt ! input frequency in days
PRINT *, 'INPUT FILE : ', TRIM(cf_in)
PRINT *, 'NPIGLO = ', npiglo
PRINT *, 'NPJGLO = ', npjglo
PRINT *, 'NPK = ', npk
PRINT *, 'NPT = ', npt
PRINT *, ' '
PRINT *, ' LEAP YEAR : ', lleap
PRINT *, ' INPUT FREQ : ', nhfri,' hours '
! Now determines the number of frames in the output file and detect impossible case
! Also determines the number of input frames in boxes. (can be variable)
! number of day by month
njm(:)= (/ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 /)
IF ( lleap ) THEN
njm(:) = (/ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 /)
ENDIF
njmr(:)=njm(:)
lerr = .TRUE.
SELECT CASE ( cfr_id )
CASE ( 'h' )
IF ( MOD( nf, nhfri ) == 0 ) THEN
nframes = ndyr*24/nf
ALLOCATE (ibox( nframes) )
ibox(:)=nf/nhfri ; lerr=.FALSE.
ENDIF
CASE ( 'd' ) ! note : 365 = 73 *5 ( 366 = 73 *5 + 1 )
IF ( MOD ( nf*24, nhfri ) == 0 ) THEN
IF ( nf == 1 ) nframes = ndyr
IF ( nf == 5 ) nframes = 73 ! in case of leap year frame#12 will have 6day average
ALLOCATE (ibox( nframes) )
lerr=.FALSE.
ibox(:)=nf*24/nhfri
IF ( lleap .AND. nf == 5 ) ibox(12)=6*24/nhfri
ENDIF
CASE ( 'mo' )
!##################################
!JM : do not work if nhfri > 24 !!!
!##################################
IF ( MOD( 12, nf ) == 0 ) THEN
nframes=12/nf ; lerr=.FALSE.
ALLOCATE (ibox( nframes) ) ! 12 6 4 3 2
ALLOCATE (iboxr( nframes) )
ALLOCATE (rest( nframes) )
rest(:)=0.d0
nhfrir=nhfri
SELECT CASE ( nframes )
CASE ( 12 )
ibox(:) = njm(:)*24/nhfri
iboxr(:)= ibox(:)
rest(:)= (njmr(:)*24/nhfrir)-iboxr(:)
CASE ( 6 )
DO jframe= 1, nframes
ijm=jframe*2-1
ibox(jframe) = (njm(ijm)+njm(ijm+1)) *24/nhfri
ENDDO
CASE ( 4 )
DO jframe= 1, nframes
ijm=jframe*3-2
ibox(jframe) = (njm(ijm)+njm(ijm+1)+njm(ijm+2)) *24/nhfri
ENDDO
CASE ( 3 )
DO jframe= 1, nframes
ijm=jframe*4-3
ibox(jframe) = (njm(ijm)+njm(ijm+1)+njm(ijm+2)+njm(ijm+3)) *24/nhfri
ENDDO
CASE ( 2 )
DO jframe= 1, nframes
ijm=jframe*6-5
ibox(jframe) = (njm(ijm)+njm(ijm+1)+njm(ijm+2)+njm(ijm+3)+njm(ijm+4)+njm(ijm+5)) *24/nhfri
ENDDO
END SELECT
ENDIF
CASE ( 'y' ) ! 1y average all is to be taken !
nframes = 1
ALLOCATE (ibox( nframes) )
ibox(:) = npt
lerr = .FALSE.
END SELECT
IF ( lerr ) THEN
PRINT *, ' +++ ERROR : Input and output frequency incompatible.'
PRINT *, ' Input : ', nhfri,' hours '
PRINT *, ' Output : ', nf,' hours '
STOP
ENDIF
ALLOCATE( dtab(npiglo,npjglo), v2d(npiglo,npjglo) )
ALLOCATE( rmean(npiglo,npjglo) )
IF (lv3d) ALLOCATE( v3d(npiglo,npjglo,npt) )
nvars = getnvar(cf_in)
PRINT *,' nvars =', nvars
ALLOCATE (cv_names(nvars) )
ALLOCATE (stypvar(nvars) )
ALLOCATE (id_var(nvars), ipk(nvars), id_varout(nvars))
ALLOCATE( time( npt), time_mean(nframes) )
! get list of variable names and collect attributes in stypvar (optional)
cv_names(:)=getvarname(cf_in, nvars, stypvar)
id_var(:) = (/(jv, jv=1,nvars)/)
! ipk gives the number of level or 0 if not a T[Z]YX variable
ipk(:) = getipk (cf_in, nvars, cdep=cv_dep)
!
WHERE( ipk == 0 ) cv_names='none'
stypvar(:)%cname = cv_names
DO jv=1, nvars
stypvar(jv)%ichunk=(/npiglo,npjglo, 1, 1 /)
ENDDO
! create output file taking the sizes in cf_in
cf_out = TRIM(cf_out)//'_'//TRIM(cfreq_o)//'.nc'
ncout = create (cf_out, cf_in, npiglo, npjglo, npk, cdep=cv_dep , ld_nc4=lnc4 )
ierr = createvar (ncout, stypvar, nvars, ipk, id_varout , ld_nc4=lnc4 )
ierr = putheadervar(ncout, cf_in, npiglo, npjglo, npk, cdep=cv_dep )
time(:)=getvar1d(cf_in, cn_vtimec, npt)
DO jvar = 1,nvars
IF ( cv_names(jvar) == cn_vlon2d .OR. &
cv_names(jvar) == cn_vlat2d .OR. cv_names(jvar) == 'none') THEN
! skip these variable
ELSE
PRINT *,' Working with ', TRIM(cv_names(jvar))
IF (lv4d) ALLOCATE( v4d(npiglo, npjglo,ipk(jvar),npt) )
IF (lv4d) v4d(:,:,:,:) = getvar4d(cf_in, cv_names(jvar),npiglo, npjglo,ipk(jvar),npt)
DO jk=1,ipk(jvar)
! initialisation
dtab(:,:) = 0.d0 ; dtotal_time = 0.d0
! time loop
it1=1
itt=1
rest1=rest(1)
rest2=1.d0
IF ( lv3d ) v3d(:,:,:)=getvar3dt(cf_in, cv_names(jvar),jk,npiglo, npjglo, npt)
DO jframe = 1, nframes
it2=it1+ibox(jframe)-1
DO jtt=it1, it2
! load data
IF ( lv4d) THEN
v2d(:,:) = v4d(:,:,jk,jtt)
ELSE
IF ( lv3d ) THEN
v2d(:,:) = v3d(:,:,jtt)
ELSE
v2d(:,:) = getvar(cf_in, cv_names(jvar), jk, npiglo, npjglo, ktime=jtt )
ENDIF
ENDIF
IF ( jtt == it1 ) THEN
dtab(:,:) = dtab(:,:) + v2d(:,:)*rest2
ELSE
dtab(:,:) = dtab(:,:) + v2d(:,:)*1.d0
ENDIF
IF ( lcaltmean ) THEN
dtotal_time = dtotal_time + time(jtt)
ENDIF
itt=itt+1
ENDDO
IF ( NINT(rest1*10) == 0 .AND. ( jframe == 3 .OR. jframe == 12 ) ) THEN
rest11 = 1.d0
IF ( lv4d) THEN
v2d(:,:) = v4d(:,:,jk,jtt)
ELSE
IF ( lv3d ) THEN
v2d(:,:) = v3d(:,:,jtt)
ELSE
v2d(:,:) = getvar(cf_in, cv_names(jvar), jk, npiglo, npjglo, ktime=jtt )
ENDIF
ENDIF
dtab(:,:) = dtab(:,:) + v2d(:,:)*rest11
it2 = it2+1
ELSE
IF ( lv4d) THEN
v2d(:,:) = v4d(:,:,jk,jtt)
ELSE
IF ( lv3d ) THEN
v2d(:,:) = v3d(:,:,jtt)
ELSE
v2d(:,:) = getvar(cf_in, cv_names(jvar), jk, npiglo, npjglo, ktime=jtt )
ENDIF
ENDIF
dtab(:,:) = dtab(:,:) + v2d(:,:)*rest1
ENDIF
rmean(:,:) = dtab(:,:)/(iboxr(jframe) + rest(jframe))
ierr = putvar(ncout, id_varout(jvar) ,rmean, jk, npiglo, npjglo, ktime=jframe)
IF ( lcaltmean ) THEN
time_mean(jframe) = dtotal_time/ibox(jframe)
ENDIF
dtab(:,:) = 0.d0 ; dtotal_time = 0.d0; rmean(:,:) = 0.d0
it1 = it2 + 1
rest2=1.d0-rest1
restr=(njm(jframe+1)-(rest2*nhfrd))
rest1=(MOD( restr , nhfrd )/nhfrd)
!
ENDDO ! loop to next time
lcaltmean=.FALSE.
ENDDO ! loop to next level
IF (lv4d) DEALLOCATE( v4d )
END IF
END DO ! loop to next var in file
ierr = putvar1d(ncout, time_mean, nframes , 'T')
ierr = closeout(ncout)
END PROGRAM cdfmoy_freq_realm
If I'm reading the code of
cdfmoy_freq
correctly, there is a bug in how it associates the input periods to target averaging periods if the boundaries of the input periods do not exactly match those of the desired outputs.For the case of
5d
inputs and1mo
outputs, it will do the following (consideringnoleap
here):Based on the month lenghts in
njm
https://github.com/meom-group/CDFTOOLS/blob/22e3eb01077ec828332679ad2a2f24d28afb9dbe/src/cdfmoy_freq.f90#L212 and based on the number of hours in the input binsnhfri
https://github.com/meom-group/CDFTOOLS/blob/22e3eb01077ec828332679ad2a2f24d28afb9dbe/src/cdfmoy_freq.f90#L198 the number of full bins within each monthibox
is calculated: https://github.com/meom-group/CDFTOOLS/blob/22e3eb01077ec828332679ad2a2f24d28afb9dbe/src/cdfmoy_freq.f90#L244The resulting list of full bins per month is (I read this as a floor division, but this may be where I'm wrong)
(/ 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 /)
(eleven sixes and one five for February).Then, the resulting
ibox
is used to construct the iteration that groups input time bins without taking into account the truncation that happened in constructingibox
https://github.com/meom-group/CDFTOOLS/blob/22e3eb01077ec828332679ad2a2f24d28afb9dbe/src/cdfmoy_freq.f90#L317So this will lead to the first 6 bins (Jan-01 to Jan-30) to be counted for the January mean, the following 5 bins (Jan-31 to Feb-24) to go into the February mean, the following 6 bins (Feb-25 to Mar-26) to go into March, ..., and the 65th to 71st bins (Nov-21 to Dec-21) to be counted for December.