Finalize hypso resampling

hcorson-dosch-usgs commented 2 years ago

Fixes #293

Add code to resample hypsography prior to creation of the 7_config_merge/out/nml_list.rds.ind target.

This code is set up such that:

Hypsography is only resampled for lakes with > 2 values in the H and A vectors (those lakes with additional bathymetric information beyond max depth)
No duplicate elevation-area pairings are generated
The resampled area values increase monotonically with elevation
The elevation intervals to which the area values are resampled is determined based on lake depth, with an interval of 1m for deep lakes, 0.5m for medium-deep lakes, and 0.25m for shallow lakes. The current depth thresholds (8m, 5m, and 3m, respectively) were pulled from the values used to determine max_layer_thick here but can easily be adjusted
The radaii rather than the area values are resampled with approx() and are then used to calculate the area values, per Lindsay's method used in lake-temperature-out

hcorson-dosch-usgs commented 2 years ago

Marked this as draft b/c Lindsay and I are working to assess what impact this change will have on the TOHA calculations. Lindsay's done some digging and will add her notes once Github allows her to post.

lindsayplatt commented 2 years ago

I was just exploring what impact different resolutions would have on our thermal habitat code. I thought that maybe because we are just linearly interpolating, it wouldn't matter if we had 1m or 0.5m. I put together some test code and it looks like it might? Not sure what we should do about this? @jread-usgs any thoughts?

# Set up temp profiles every 0.5m
depth_0.5m <- seq(0,5,0.5)
set.seed(9)
temp_0.5m <- sort(rnorm(11, mean=10, sd=3),decreasing = T)

# Get every other value in order to use every 1m
depth_1m <- depth_0.5m[c(T,F)]
temp_1m <- temp_0.5m[c(T,F)]

# See if we get the same value using both resolutions 
approx(temp_0.5m, depth_0.5m, xout = 12)$y
approx(temp_1m, depth_1m, xout = 12)$y

# OUTPUT
> approx(temp_0.5m, depth_0.5m, xout = 12)$y
[1] 0.8475811
> approx(temp_1m, depth_1m, xout = 12)$y
[1] 0.7261741

hcorson-dosch-usgs commented 2 years ago

For some context, here is one spot where Lindsay is resampling hypso to get the areas at specific depths as part of calculating TOHA

And I plotted up examples of what my resampling code generates in a shallow, medium, and deep lake:

hcorson-dosch-usgs commented 2 years ago

@lindsayplatt - circling back to your mockup example. One key factor - the temperature predictions output from GLM will be at 0.5m intervals regardless of how we resample the hypsography.

In case it's helpful I ran some of your code from here with a dummy dataset and a 0.5m and 1m hypso:

library(tidyverse)

resample_hypso <- function(hypso, new_depths) {

  # `rule = 2`: repeats top or bottom known hypso for any depths outside of known hypso
  hypso$radii <- sqrt(hypso$areas / pi)
  new_radii <- approx(hypso$depths, hypso$radii, xout=new_depths, rule = 2)$y

  # Now calculate area from new radii
  new_areas <- pi * new_radii^2

  # Create hypso at these new depths
  return(list(depths = new_depths, areas = new_areas))
}

get_tha <- function(wtr_df, hypso, wtr_lower, wtr_upper) {
  # run code pulled from function
  # https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L162-L209
  z_wtr <- rLakeAnalyzer::get.offsets(wtr_df)
  z_max <- max(hypso$depths, z_wtr) # bottom could be in hypso or wtr
  z_surface <- 0 # always 0, might be in hypso, might not

  wtr_surface <- wtr_df[[which.min(z_wtr)]] # wtr_surface will be whatever the top-most wtr is
  wtr_bottom <- wtr_df[[which.max(z_wtr)]] # wtr_bottom will be whatever the bottom-most wtr is

  ##### Find exact depths of wtr thresholds 

  Z1_Z2 <- apply(wtr_df, MARGIN = 1, function(wtr_row) {
    # Using apply since these two actions need to be done per row
    wtr <- as.vector(t(wtr_row))
    if(length(unique(wtr)) == 1) {
      # In a well-mixed lake, it is possible for all wtr values to be the same
      # which causes approx to throw an error. Return NAs for the Zs and let
      # checks below determine if benth area is all (or partially) above or 
      # below the lake and set Zs based on that.
      Z1_Z2 <- c(NA,NA)
    } else {
      Z1_Z2 <- approx(wtr, z_wtr, xout=c(wtr_upper, wtr_lower))$y
    }
    return(Z1_Z2)
  })
  Z1 <- Z1_Z2[1,]
  Z2 <- Z1_Z2[2,]

  ##### Checks before calculating areas
  completely_below_lake <- wtr_bottom > wtr_upper # if THA is completely below lake, benthic area is 0 (too hot)
  completely_above_lake <- wtr_surface < wtr_lower # if THA is completely above lake, benthic area is 0 (too cold)
  benth_0 <- completely_below_lake | completely_above_lake

  # If completely above or completely below, the Zs need to reflect that
  Z1[completely_below_lake] <- z_max
  Z2[completely_below_lake] <- z_max
  Z1[completely_above_lake] <- z_surface
  Z2[completely_above_lake] <- z_surface

  # if THA extends above lake, use the surface as Z1 to calc THA
  extends_above_lake <- wtr_surface < wtr_upper & !completely_above_lake
  Z1[extends_above_lake] <- z_surface

  # if THA extends below lake, use the bottom as Z2 to calc THA
  extends_below_lake <- wtr_bottom > wtr_lower & !completely_below_lake
  Z2[extends_below_lake] <- z_max

  ##### Create hypso at these different depths
  A1 <- resample_hypso(hypso, Z1)[["areas"]]
  A2 <- resample_hypso(hypso, Z2)[["areas"]]

  return(list(A1=A1, A2=A2))
}

# set up inputs
wtr_df <- tibble(temp_0=sort(rnorm(11, mean=20, sd=3),decreasing = T), 
                 temp_0.5=sort(rnorm(11, mean=19, sd=3),decreasing = T),
                 temp_1=sort(rnorm(11, mean=18, sd=3),decreasing = T),
                 temp_1.5=sort(rnorm(11, mean=16, sd=3),decreasing = T),
                 temp_2=sort(rnorm(11, mean=13, sd=3),decreasing = T),
                 temp_2.5=sort(rnorm(11, mean=11, sd=3),decreasing = T),
                 temp_3=sort(rnorm(11, mean=10, sd=3),decreasing = T),
                 temp_3.5=sort(rnorm(11, mean=9, sd=3),decreasing = T),
                 temp_4=sort(rnorm(11, mean=9, sd=3),decreasing = T),
                 temp_4.5=sort(rnorm(11, mean=9, sd=3),decreasing = T),
                 temp_5=sort(rnorm(11, mean=9, sd=3),decreasing = T))

depth_0.5m <- seq(0,5,0.5)
set.seed(9)
depth_1m <- depth_0.5m[c(T,F)]

hypso_0.5m <- tibble(depths=depth_0.5m,
                     areas=sort(rnorm(11, mean=150, sd=3),decreasing = T))
interval<-1
hypso_1m <- tibble(depths=seq(floor(min(hypso_0.5m$depths)/interval)*interval, floor(max(hypso_0.5m$depths)/interval)*interval, by=interval),
                   areas=approx(hypso_0.5m$depths,hypso_0.5m$areas,xout=depths, rule=2)$y)

wtr_lower <- 11
wtr_upper <- 25

#### check created hypso using two different hypsos
results_0.5m <- get_tha(wtr_df,hypso_0.5m,wtr_lower,wtr_upper)
results_1m <- get_tha(wtr_df,hypso_1m,wtr_lower,wtr_upper)

A1_0.5m <- results_0.5m$A1
A1_1m <- results_1m$A1
A2_0.5m <- results_0.5m$A2
A2_1m <- results_1m$A2

> A1_0.5m
 [1] 153.7486 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327
 [9] 153.8327 153.8327 153.8327
> A1_1m
 [1] 153.4180 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327
 [9] 153.8327 153.8327 153.8327
> A2_0.5m
 [1] 146.4394 146.4394 147.6473 148.4014 149.0518 149.2334 149.2576 149.5047
 [9] 149.5550 149.8858 150.5272
> A2_1m
 [1] 146.4394 146.4394 147.4780 148.1243 148.8359 149.3197 149.3724 149.5303
 [9] 149.5623 150.3013 150.8115

jordansread commented 2 years ago

Cool/useful analysis.

One thing I wanted to quick add is that

One key factor - the temperature predictions output from GLM will be at 0.5m intervals regardless of how we resample the hypsography.

we can actually ask glmtools to resample GLM at any depths we want, they don't have to be regular intervals. Just adding this so it isn't viewed as a constraint that we can't control.

lindsayplatt commented 2 years ago

Coming back to this and re-wrapping my head around the problem space. Based on these checks and short analyses, it looks like there are changes to the resulting lake areas, but they are pretty minimal (the step that finds the A1 and A2 for the temperature range). I think the next stage of checking the potential impacts of resampling (and maybe more impacted by resampling) is the next part of the TOHA calcs, which is getting the cumulative benthic area of the habitat using calc_benthic_area_incl_hypso() and the cum_ba (calculated from hypso here in lake-temperature-out).

However, all that said, I may be missing a key issue. I have been using the hypso in the NML files. The GLM outputs return temp at depth intervals of 0.5, but the hypso intervals vary per lake (some are already more sparse than 1 m).

library(tidyverse)

# FUNCTIONS USED
calc_benthic_area <- function(Z1, Z2, A1, A2) {
  trap_length <- sqrt(A2 * pi) + sqrt(A1 * pi)
  trap_height <- sqrt( (Z1 - Z2)^2 + (sqrt(A2/pi) - sqrt(A1/pi))^2 )
  benthic_area <- trap_length * trap_height

  return(benthic_area)
}
resample_hypso <- function(hypso, new_depths) {

  # `rule = 2`: repeats top or bottom known hypso for any depths outside of known hypso
  hypso$radii <- sqrt(hypso$areas / pi)
  new_radii <- approx(hypso$depths, hypso$radii, xout=new_depths, rule = 2)$y

  # Now calculate area from new radii
  new_areas <- pi * new_radii^2

  # Create hypso at these new depths
  return(list(depths = new_depths, areas = new_areas))
}

all_nml <- readRDS('1_prep/in/nml_list.rds')
all_morphometry <- purrr::map(all_nml, `[`, c("latitude", "longitude", "H", "A"))
site_morphometry <- all_morphometry[["nhdhr_105954667"]]

hypso <- data.frame(H = site_morphometry$H, A = site_morphometry$A) %>% 
  mutate(depths = max(H) - H, areas = A) %>% 
  arrange(depths) %>% 
  select(depths, areas)

cum_benthic_area <- cumsum(calc_benthic_area(
  hypso$depths[-length(hypso$depths)],
  hypso$depths[-1],
  hypso$areas[-length(hypso$areas)],
  hypso$areas[-1]))

# Do that for resampled hypsos
hypso_resampled <- resample_hypso(hypso, seq(min(hypso$depths), max(hypso$depths), by = 1))
cum_benthic_area_resampled <- cumsum(calc_benthic_area(
  hypso_resampled$depths[-length(hypso_resampled$depths)],
  hypso_resampled$depths[-1],
  hypso_resampled$areas[-length(hypso_resampled$areas)],
  hypso_resampled$areas[-1]))

# Compare benthic areas before/after:
cum_benthic_area
cum_benthic_area_resampled

# OUTPUT VERY DIFFERENT
> cum_benthic_area
[1]  70943.62 195706.77 371622.21 433841.57 454889.24
> cum_benthic_area_resampled
[1]  47228.33 112530.16 192144.65 316994.65 393248.15 431131.78 453476.83

3/30/2022 EDIT: The above outputs of cumulative benthic areas are actually not as different as I originally thought. If you compare the areas at the depths that are similar, the areas are pretty similar (see table below). More to come on this in a comment below.

depth_orig	cum_benthic_area_orig	depth_resampled	cum_benthic_area_resampled
		1	47228.33
1.5240	70943.62
		2	112530.16
3.0480	195706.77	3	192144.65
		4	316994.65
4.5720	371622.21
		5	393248.15
6.0960	433841.57	6	431131.78
		7	453476.83
7.3152	454889.24

hcorson-dosch-usgs commented 2 years ago

However, all that said, I may be missing a key issue. I have been using the hypso in the NML files. The GLM outputs return temp at depth intervals of 0.5, but the hypso intervals vary per lake (some are already more sparse than 1 m).

Wait now I'm confused -- what key issue do you feel like you're missing? Moving forward you would continue to use the hypso from the NML files, as you have been. It would just be the resampled hypso instead of the original hypso. Is your thought that to date (and potentially moving forward), the resolution of the temperature outputs would not match that of the hypsography?

hcorson-dosch-usgs commented 2 years ago

Okay Lindsay and I regrouped on this today and got some clarity. Lindsay ran a code snippet to compare the cumulative benthic area calculated with the original hypsography to that calculated with the resampled hypsography. In the process we realized that I probably should use the actual minimum elevation-area pair, just as I am using the actual maximum elevation-area pair, so I'm going to make that change now

hcorson-dosch-usgs commented 2 years ago

Okay that did what we wanted it to - making the start and end values for the hypso identical to the original:

lindsayplatt commented 2 years ago

Building on my edit that the cumulative benthic areas are not as different as I originally thought once you line them up in depth order, we compared the THA/OHA calc outputs for two lakes by using the same Z1/Z1 (the depth at which the example THA/OHA range intersects our lake) and calculating the corresponding lake areas at those depths (A1 and A2), then calculating the benthic area. The habitat area values came out to be the same regardless of whether we used the original hypso or the resampled hypso!!! This matches our original assumption because ultimately, we are just linearly interpolating between known points to get the resampled hypso, so that final benthic area math should give us the same result.

# FXNS needed
# resample_hypso(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L302-L313
# calc_benthic_area(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L315-L321
# calc_benthic_area_incl_hypso(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L323-L377

Z1 <- 2 # Depth in lake for the top of the O/T range
Z2 <- 3 # Depth in lake for the bottom of the O/T range

# Current lake hypso
hypso_orig <- structure(list(
  depths = c(0, 1.524, 3.048, 4.572, 6.096, 7.3152), 
  areas = c(454739.276528491, 383881.802459821, 259155.913693587, 
            83253.6667468361, 21045.3337177386, 0)), 
  class = "data.frame", row.names = c(NA, -6L))

# Create a resampled version
max_depth_orig <- max(hypso_orig$depths)
hypso_resampled <- as_tibble(resample_hypso(hypso_orig, c(seq(0,max_depth_orig,by=0.5), max_depth_orig)))

# Calc benthic area using original hypso
A1_orig <- resample_hypso(hypso_orig, Z1)[["areas"]]
A2_orig <- resample_hypso(hypso_orig, Z2)[["areas"]]
cum_ba_orig <- cumsum(calc_benthic_area(
  hypso_orig$depths[-length(hypso_orig$depths)],
  hypso_orig$depths[-1],
  hypso_orig$areas[-length(hypso_orig$areas)],
  hypso_orig$areas[-1]))
habitat_orig <- calc_benthic_area_incl_hypso(Z1, Z2, A1_orig, A2_orig, hypso_orig, cum_ba_orig)

# Calc benthic area using resampled hypso
A1_resampled <- resample_hypso(hypso_resampled, Z1)[["areas"]]
A2_resampled <- resample_hypso(hypso_resampled, Z2)[["areas"]]
cum_ba_resampled <- cumsum(calc_benthic_area(
  hypso_resampled$depths[-length(hypso_resampled$depths)],
  hypso_resampled$depths[-1],
  hypso_resampled$areas[-length(hypso_resampled$areas)],
  hypso_resampled$areas[-1]))
habitat_resampled <- calc_benthic_area_incl_hypso(Z1, Z2, A1_resampled, A2_resampled, hypso_resampled, cum_ba_resampled)

# They are the same!!
> habitat_resampled
[1] 79614.48
> habitat_orig
[1] 79614.48

We are assuming the same Z1/Z2 as input here for both hypso and hypso_resampled because running those actual calculations using example GLM output would give the same Z1/Z2 depths because those were calculated based on the GLM output (currently the 0.5 m depth intervals), not the hypso themselves (though hypso is an input to GLM).

lindsayplatt commented 2 years ago

Adding one more double check since the example above was a site where we upsampled. Wanted to create an example that could be plotted and points actually seen, so I didn't use an actual lake example; I made one up.

Upsampled (adding more depths to the hypso) results come out to be the exact same, while downsampled results (removing depths from the hypso) come out similar but not the exact same. I think this is because when we remove points from the depth profile, the slope of the line between depths changes so when we calculate the area of the trapezoid, it is different.

# Upsampled output
> habitat_orig
[1] 40.20565
> habitat_resampled
[1] 40.20565

# Downsampled output
> habitat_orig
[1] 93.96417
> habitat_resampled
[1] 90.42993

# Code used to get the plots above and results, need the functions noted in the previous comment.
# Upsample hypso example
hypso_orig <- tibble(depths = c(0,3,4,6,8), areas = c(144,120,80,36,0))
hypso_resampled <- resample_hypso(hypso_orig, seq(0,8,by=0.5))

# Downsample hypso example
set.seed(10)
hypso_orig <- tibble(depths = seq(0,6,0.5), areas = sort(sample(1:300, 13), decreasing = T))
hypso_resampled <- resample_hypso(hypso_orig, 0:6)

# Change what `hypso_orig` and `hypso_resampled` are and run the 
# following to create the plots and generate the habitat area values
habitat_orig <- calc_benthic_area_incl_hypso(
  Z1, Z2, 
  A1 = resample_hypso(hypso_orig, Z1)[["areas"]], 
  A2 = resample_hypso(hypso_orig, Z2)[["areas"]], 
  hypso = hypso_orig, 
  cum_ba = cumsum(calc_benthic_area(
    hypso_orig$depths[-length(hypso_orig$depths)],
    hypso_orig$depths[-1],
    hypso_orig$areas[-length(hypso_orig$areas)],
    hypso_orig$areas[-1])))

habitat_resampled <- calc_benthic_area_incl_hypso(
  Z1, Z2, 
  A1 = resample_hypso(hypso_resampled, Z1)[["areas"]], 
  A2 = resample_hypso(hypso_resampled, Z2)[["areas"]], 
  hypso = hypso_resampled, 
  cum_ba = cumsum(calc_benthic_area(
    hypso_resampled$depths[-length(hypso_resampled$depths)],
    hypso_resampled$depths[-1],
    hypso_resampled$areas[-length(hypso_resampled$areas)],
    hypso_resampled$areas[-1])))

plot(hypso_orig$areas, -hypso_orig$depths, type = "b", pch = 16)
points(hypso_resampled$areas, -hypso_resampled$depths, pch = 16, col = "red")
lines(hypso_resampled$areas, -hypso_resampled$depths, pch = 16, col = "red")
legend("top", col=c('black', 'red'), pch=16, legend = c('original', 'resampled'))

jordansread commented 2 years ago

Sorry! I hadn't hit submit on my review

DOI-USGS / lake-temperature-model-prep

Finalize hypso resampling #319