USGS-R / delaware-model-prep

Data and scripts for collecting and formatting data in the Delaware River Basin in prep for ML and DA modeling
2 stars 13 forks source link

Add delaware SNTemp/PGDL/obs time series plots to 8_viz #61

Open jordansread opened 3 years ago

jordansread commented 3 years ago

I don't have this repo up to date and I think there is some work I need to do to make sure I don't have conflicts w/ return lines and task makefiles, so creating this issue right now as a placeholder.

Turn this into a visualization output that ties into the pipeline:


# assumes you have `compare` from Sam's script, the PGDL outputs, SNTemp outputs, and obs

viz_time_performance <- function(compare, start = "2011-01-20", end = "2012-07-27", reach, fileout){
  png(fileout, width = 12.9, height = 7.5, units = 'in', res = 250) # not quite to the edge

  #layout(matrix(c(1,1,1,2),byrow = TRUE))

  par(omi = c(0.1,0,0.3,0), mai = c(0.6,1,0,0), las = 1, mgp = c(3.3,0.8,0))

  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(-0.5,30), xlab = "",
       ylab = 'Water temperature (°C)', axes = FALSE, xaxs = 'i', yaxs = 'i', cex.lab = 2)

  axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'month'), to = lubridate::ceiling_date(as.Date(end), unit = 'month'), 'month')

  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  par(mgp = c(3.3,0.2,0))
  axis(1, date_ticks+15, labels = format(date_ticks, '%b'), tck = NA, lwd = NA, cex.axis = 1.1)

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::ceiling_date(as.Date(end), unit = 'year'), 'year')

  axis(1, date_ticks, labels = NA, tck = -0.05, lwd = 1.5)
  par(mgp = c(3.3,2,0))
  axis(1, date_ticks+365/2, labels = format(date_ticks, '%Y'), cex.axis = 2, tck = NA, lwd = NA)

  x_mod <- compare %>% filter(seg_id_nat == reach) %>% pull(date)
  y_SN <- compare %>% filter(seg_id_nat == reach) %>% pull(sntemp)
  y_PG <- compare %>% filter(seg_id_nat == reach) %>% pull(pgrnn_all)

  x_obs <- obs %>% filter(seg_id_nat == reach) %>% pull(date)
  y_obs <- obs %>% filter(seg_id_nat == reach) %>% pull(temp_c)

  points(x_mod, y_SN, pch = 20, col = '#1b9e7766', type = "l", lwd = 3, cex = 0.3) # 40% alpha 66

  points(x_mod, y_PG, pch = 20, col = '#7570b3', type = "l", lwd = 3, cex = 0.3)

  points(x_obs, y_obs, pch = 20, col = 'black')

  dev.off()
}

viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples

  png("8_visualize/out/DRB_samples.png", width = 13.33, height = 7.5, units = 'in', res = 250)

  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))

  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::floor_date(as.Date(end), unit = 'year'), 'year') 
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  tcks <- date_ticks[seq(1, to = length(date_ticks), by = 2)]
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  tcks <- date_ticks[seq(2, to = length(date_ticks), by = 2)] %>% head(-1) # hack to get rid of 2017
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)

  top <- 1
  height <- 1/(length(rank$seg_id_nat))

  for (site in rank$seg_id_nat){
    this_site <- filter(model_obs, seg_id_nat == site)
    for (date in this_site$date){
      rect(date - 0.75, ybottom = top-height, ytop = top, xright = date+0.75, 
           col = 'black',  border = NA) #scales::alpha('lightblue', 0.5),
    }

    # labels <- filter(model_obs, seg_id_nat == site) %>% pull(subseg_id) %>% head(1) %>% 
    #   stringr::str_remove('_1') %>% paste0('s', .)
    text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, col = 'black', cex = 1.2)
    top <- top - height
    names <- tail(names, -1)
  }

  dev.off()
}

viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples

  png("8_visualize/out/DRB_samples_599_1-2033.png", width = 13.33, height = 7.5, units = 'in', res = 250)

  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))

  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')

  #axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::floor_date(as.Date(end), unit = 'year'), 'year') 
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  tcks <- date_ticks[seq(1, to = length(date_ticks), by = 2)]
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  tcks <- date_ticks[seq(2, to = length(date_ticks), by = 2)] %>% head(-1) # hack to get rid of 2017
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)

  top <- 1
  height <- 1/(length(rank$seg_id_nat))

  for (site in rank$seg_id_nat){
    this_site <- filter(model_obs, seg_id_nat == site)
    if (site == '2033'){
      for (date in this_site$date){
        rect(date - 0.75, ybottom = top-height, ytop = top, xright = date+0.75, 
             col = 'black',  border = NA) #scales::alpha('lightblue', 0.5),
      }

      # labels <- filter(model_obs, seg_id_nat == site) %>% pull(subseg_id) %>% head(1) %>% 
      #   stringr::str_remove('_1') %>% paste0('s', .)
      # 
      text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, cex = 1.2)
    } 
    top <- top - height
    names <- tail(names, -1)
  }

  dev.off()
}

viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples

  png("8_visualize/out/DRB_samples_only_border.png", width = 13.33, height = 7.5, units = 'in', res = 250)

  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))

  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')

  #axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)

  top <- 1
  height <- 1/(length(rank$seg_id_nat))

  for (site in rank$seg_id_nat){
    text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, col = 'black', cex = 1.2)
    top <- top - height
    names <- tail(names, -1)
  }

  dev.off()
}

# need test, train, predict data

get_WRR_data <- function(depths = c(0, 10, 18), exper_n = 2){

  library(dplyr)
  library(readr)
  library(stringr)
  library(tidyr)
  library(sbtools) # see https://github.com/USGS-R/sbtools

  test_data <- item_file_download('5d925066e4b0c4f70d0d0599', names = 'me_test.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv(col_types = 'Ddddc') %>% filter(exper_type == 'season') %>% 
    group_by(date, depth) %>% summarize(temp = mean(temp)) %>% mutate(test = TRUE)

  train_data <- item_file_download('5d8a837fe4b0c4f70d0ae8ac', names = 'me_season_training.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv(col_types = 'Ddddc') %>% 
    group_by(date, depth) %>% summarize(temp = mean(temp)) %>% mutate(test = FALSE)

  pgdl_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_pgdl.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'pgdl') %>% 
    mutate(depth = as.numeric(depth))

  dl_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_dl.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'dl') %>% 
    mutate(depth = as.numeric(depth))

  pb_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_pb.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'pb') %>% 
    mutate(depth = as.numeric(depth))

  # combine all test and train data, flatten and remove duplicates:
  observed <- rbind(test_data, train_data) %>% filter(depth %in% depths) %>% rename(observed = temp)
  modeled <- inner_join(pgdl_data, y = dl_data, by = c('date','exper_n','exper_id','depth')) %>% 
    inner_join(pb_data, by = c('date','exper_n','exper_id','depth')) %>% 
    filter(depth %in% depths, exper_n == !!exper_n) %>% select(-exper_n, -exper_id)

  return(left_join(modeled, observed, by = c('date','depth')))
}

viz_wrr_oobounds <- function(wrr_data, start = "2011-01-20", end = "2012-07-27", fileout = '~/Downloads/wrr_oobounds.png', type = c('obs','pgdl','pb','dl')){
  type <- match.arg(type)

  text_key <- c(pb = "Process-based", dl = 'Deep learning', pgdl = 'Process-guided DL')

  wrr_data <- mutate(wrr_data, col = case_when(
    depth == 0 ~ "#FDE725FF",
    depth == 10 ~ "#21908CFF",
    depth == 18 ~ "#440154FF"
  ))
  png(fileout, width = 13.33, height = 7.5, units = 'in', res = 250, bg = NA) # not quite to the edge

  par(omi = c(0,0,0.2,0.1), mai = c(0.8,1,0.2,0), las = 1, mgp = c(3,0.8,0))

  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(-0.5,37), xlab = "",
       ylab = 'Water temperature (°C)', axes = FALSE, xaxs = 'i', yaxs = 'i', cex.lab = 2)

  axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'month'), to = lubridate::ceiling_date(as.Date(end), unit = 'month'), 'month')

  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  par(mgp = c(3,0.4,0))
  axis(1, date_ticks+15, labels = format(date_ticks, '%b'), tck = NA, lwd = NA, cex.axis = 1.2)

  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::ceiling_date(as.Date(end), unit = 'year'), 'year')

  axis(1, date_ticks, labels = NA, tck = -0.05, lwd = 1.5)
  par(mgp = c(3,2.1,0))
  axis(1, date_ticks+365/2, labels = format(date_ticks, '%Y'), cex.axis = 1.5, tck = NA, lwd = NA)

  x1 <- as.Date(start)+9
  y1 <- 31.5

  if (type == 'obs'){
    points(wrr_data$date, wrr_data$observed, pch = 16, col = wrr_data$col)

    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(x1, y1-i*1.1+1, pch = 16, col = wrr_data$col[i])
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Observed', cex = 1.5, pos = 4)

  } else {

    # #plot the other models in grey
    # for (mod in c('pb','dl','pgdl')){
    #   if (mod == type) break
    #   points(wrr_data$date, wrr_data[[mod]], pch = 16, col = "#99999966", type ='p', cex = 0.7)
    # }
    #plot obs
    points(wrr_data$date, wrr_data$observed, pch = 16, col = paste0(substring(wrr_data$col, first = 1, last = 7), '66'))

    x_txt <- mean(as.Date(c(start, end)))+30
    for (depth in unique(wrr_data$depth)){
      this_data <- filter(wrr_data, depth == !!depth)
      points(this_data$date, this_data[[type]], pch = 16, col = this_data$col, type ='l', lwd = 3, cex = 0.3) # 40% alpha 66
      x0 <- as.Date(start)+220
      y0 <- filter(this_data, date == !!x0)[[type]]

      segments(x0 = x0, x1 = x_txt, y0 = y0, y1 = 21, col = this_data$col)
    }

    text(x_txt, 21, text_key[[type]], cex = 2, offset = 0.6, pos = 4)

    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(x1, y1-i*1.1+1, pch = 16, col =  paste0(substring(wrr_data$col[i], first = 1, last = 7), '66'))
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Observed', cex = 1.5, pos = 4)

    y1 <- y1-4
    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(c(x1-2, x1+2), c(y1-i*1.1+1, y1-i*1.1+1), pch = 16, col = wrr_data$col[i], type ='o', lwd = 3, cex = 0.3)
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Modeled', cex = 1.5, pos = 4)
  }
  dev.off()
}

see compare file

aappling-usgs commented 3 years ago

Looks like a useful starting point 👍 . Is the get_WRR_data function above actually used for the plotting? This is for stream predictions, right?

jordansread commented 3 years ago

That was for another figure given in the same talk. get_WRR_data() and viz_wrr_oobounds() aren't part of DRB and shouldn't be added. Good catch