USGS-R / Rainmaker

Repo being permanently moved to: https://code.usgs.gov/water/analysis-tools/Rainmaker
https://code.usgs.gov/water/analysis-tools/Rainmaker
Other
18 stars 14 forks source link

RMevents_sample does not capture preceding rain events #64

Open lukeloken opened 4 years ago

lukeloken commented 4 years ago

I noticed that RMevents_sample does not correctly include rain for the period immediately preceding the start times. The bug appears if the first row of the precipitation dataframe beginRow has a calculated time difference greater than ieSec. If so, the code will not allow for rain preceding the start time to contribute to the rain totals. I updated the function and the code below should fix the issue.

I also changed the df argument so the initial data.frame is not overwritten on the first line. This makes it easier to diagnose issues.

https://github.com/USGS-R/Rainmaker/blob/0b5509d79fc8db459c469bb55feefce91707de82/R/RMevents_sample.R#L75

RMevents_sample <- function(df.orig,
                            ieHr=6,
                            rain="rain",
                            time="pdate",
                            dfsamples,
                            bdate="bpdate",
                            edate="epdate"){
  df <- df.orig
  df <- rbind(df[1,],df[df[,rain]>0,])
  timediff <- difftime(df[2:(nrow(df)),time],df[1:(nrow(df)-1),time],units="secs")
  timediff_min <- difftime(df[2:(nrow(df)),time],df[1:(nrow(df)-1),time],units="mins")
  df$timediff <- c(NA,timediff)
  df$timediff_min <- c(NA, timediff_min)
  #  dfsamples$Braindate <- dfsamples$bpdate
  #  dfsamples$Eraindate <- dfsamples$epdate

  ieSec <- ieHr * 3600 # compute interevent period in seconds to use with POSIX
  rainDepth <- numeric()
  startRainDates <- numeric()
  endRainDates <- numeric()
  tipsbystorm <- data.frame()

  rain_timezone <- lubridate::tz(df[,time])

  # for (i in 1:27){
    for (i in 1:nrow(dfsamples)){
      beginRow <- max(which(df[, time] < dfsamples[i, bdate])) + 1
      # this fails if you have an event that preceeds the rain record
    if(i ==1 & is.infinite(beginRow)) {
      startRainDates <- NA
      endRainDates <- NA
      rainDepth <- NA
      event <- 0
      next
    }

    if (i > 1 & is.infinite(beginRow)) {
      startRainDates <- c(startRainDates, NA)
      endRainDates <- c(endRainDates, NA)
      rainDepth <- c(rainDepth, NA)
      next
    }

    endRow <- max(which(df[,time]<dfsamples[i,edate]))
    subdf <- df[c(1:(beginRow-1)),]

    if (length(which(subdf$timediff>ieSec)) > 0) {
    startRainRow <- max(which(subdf$timediff>ieSec))
    } else {startRainRow = 1}

    # if end of last precedding storm is beyond ieSec use sample start time
    # Otherwise use the start of the last preceeding storm
    if (difftime(dfsamples[i,bdate], max(subdf[,time]), units = "secs") > ieSec) {
      BD <- df[beginRow,time]
    } else {
      BD <- subdf[startRainRow,time]
    }
    subdf2 <- df[c(startRainRow:endRow),]

    if(sum(subdf2[,rain] > 0) > 0) {
      ED <- subdf2[max(which(subdf2[,rain] > 0)), time]

      if(ED < BD) ED <- BD + 60

    } else {
      ED <- BD + 60
    }
    eventRows <- which(df.orig[,time] >= BD & df.orig[, time] <= ED)
    eventRows_tips <- which(df[,time] >= BD & df[, time] <= ED)

    eventRain <- ifelse(length(eventRows) > 0, sum(df.orig[eventRows, rain]), 0)
    rainDepth <- c(rainDepth, eventRain)

    # get data frame of all rain from this event, add event id column
    sub_tips <- df[eventRows_tips, ]

    startRainDates[i] <- BD
    endRainDates[i] <- ED

    if(i == 1) {

      if (nrow(sub_tips) > 0) {
        event <- 1
      } else {
        event <- 0
      }
      if (nrow(sub_tips) > 0){
        sub_tips$event <- event
      }
      tipsbystorm <- sub_tips
    } else {

      if (nrow(sub_tips) > 0) {
        event <- event + 1
      } else {
        event <- event
      }
      if (nrow(sub_tips) > 0){
        sub_tips$event <- event
      }

      tipsbystorm <- rbind(tipsbystorm, sub_tips)
    }
  }

  dfsamples$StartDate <- as.POSIXct(startRainDates, origin = '1970-01-01', tz = rain_timezone)
  dfsamples$EndDate <- as.POSIXct(endRainDates, origin = '1970-01-01', tz = rain_timezone)
  dfsamples$rain <- rainDepth
  dfsamples$stormnum <- 1:nrow(dfsamples)

  dfsamples <- dfsamples[,c('stormnum', 'StartDate', 'EndDate', 'rain')]
  timeInterval <- min(timediff_min, na.rm = T)
  tipsbystorm <- tipsbystorm[,c(rain, time, 'timediff_min', 'event')]
  names(tipsbystorm)[3] <- 'dif_time'

  out <- list(dfsamples, dfsamples, tipsbystorm, timeInterval)
  names(out) <- c('storms2', 'storms', 'tipsbystorm', 'timeInterval')

  return(out)
}