Open jordansread opened 7 years ago
library(dataRetrieval)
library(dplyr)
library(ggplot2)
# bounding box based on National Weather Service Des Moines, IA
# County Warning Area (DMX CWA), map of major flooding (Fig 6 in this
# report: https://www.weather.gov/media/dmx/SigEvents/2008_Central_Iowa_Floods.pdf).
# Also included Cedar Rapids which is not in the DMX CWA.
ia_flooding_bbox <- c(-94.294625, 41.015733, -91.424186, 43.102420)
# According to the report from above, heavy rainfall fell from late May
# into early June causing the flooding.
ia_flooding_start <- "2008-05-20"
ia_flooding_end <- "2008-07-05"
# find NWIS site ids for stream sites with streamflow data
ia_flooding_sites <- whatNWISsites(bBox=ia_flooding_bbox)
sites_fv <- ia_flooding_sites %>%
filter(site_tp_cd == "ST") # only stream sites
sites_fv_q <- whatNWISdata(sites_fv$site_no) %>%
filter(parm_cd == "00060")
# get streamflow data for these sites
ia_q <- readNWISuv(sites_fv_q$site_no,
parameterCd = "00060",
startDate = ia_flooding_start,
endDate = ia_flooding_end) %>%
renameNWISColumns()
# add month and day numeric columns
ia_q_md <- ia_q %>%
mutate(month_nu = as.numeric(format(dateTime, "%m")),
day_nu = as.numeric(format(dateTime, "%d")))
# get stats to compare if how high above "normal" floods
# during the selected time period were
# 10 limit query for stat service
req_index <- seq(1,nrow(sites_fv_q),by=10)
sites_fv_q_stat <- data.frame()
for(i in req_index) {
sites_fv_q_10 <- sites_fv_q$site_no[i:(i+9)]
current_sites <- readNWISstat(siteNumbers = sites_fv_q_10,
parameterCd = "00060",
statReportType = "daily",
statType = c("p50","p75","p90","mean"),
startDate = ia_flooding_start,
endDate = ia_flooding_end)
sites_fv_q_stat <- rbind(sites_fv_q_stat, current_sites)
}
# join the stats w/ the actual flow data
ia_q_stat <- left_join(ia_q_md, sites_fv_q_stat)
# add columns to compare the observed streamflow to the stat
ia_q_compare <- ia_q_stat %>% mutate(`is>mean` = Flow_Inst > mean_va,
`is>p50` = Flow_Inst > p50_va,
`is>p75` = Flow_Inst > p75_va,
`is>p90` = Flow_Inst > p90_va) %>%
rowwise() %>%
# consider the flow "very high" if it's above 75 or 90th percentiles
mutate(vhigh = all(`is>p75`, `is>p90`, na.rm=TRUE)) %>%
ungroup() %>%
filter(vhigh) # keep only the ones that are "very high"
# summarize each site number by how many "very high" days of flow
# it had during the selected time period
ia_q_compare_sum <- ia_q_compare %>%
group_by(site_no) %>%
summarize(ndays_vhigh = n()) %>%
arrange(desc(ndays_vhigh))
# keep only the upper half of sites with very high flow
ia_q_select_high <- head(ia_q_compare_sum, round(nrow(ia_q_compare_sum)/2, digits=0))
ia_q_select_ids <- ia_q_select_high$site_no
ia_q_select_ids
# filter the data by those sites to see a simple grid of hydrographs
ia_q_select <- ia_q %>% filter(site_no %in% ia_q_select_ids)
# simple hydrograph plotting to show that time period
# appropriately captured length of flood
ia_flooding_hydrograph <- ggplot(ia_q_select, aes(dateTime, Flow_Inst)) +
geom_point(size=0.5) +
facet_wrap(~site_no, scales="free_y")
ia_flooding_hydrograph
rough hydrographs created at end to verify that the start/end dates were appropriate:
great work @lindsaycarr
@lindsaycarr can you adjust your code to output the site ids as strings (the look to be numeric, so the leading zero gets dropped)?
@jread-usgs just ran this code and values in ia_q_select$site_no
are character. Where are you seeing potential numerics with dropped leading zeros?
in the file you uploaded to the JIRA, the strings should probably be quoted so they aren't dropped by excel or other readers.
Ah OK, yes. Will update that momentarily
New CSV file is attached to the ticket now.