Closed hcorson-dosch-usgs closed 2 years ago
Marked this as draft b/c Lindsay and I are working to assess what impact this change will have on the TOHA calculations. Lindsay's done some digging and will add her notes once Github allows her to post.
I was just exploring what impact different resolutions would have on our thermal habitat code. I thought that maybe because we are just linearly interpolating, it wouldn't matter if we had 1m or 0.5m. I put together some test code and it looks like it might? Not sure what we should do about this? @jread-usgs any thoughts?
# Set up temp profiles every 0.5m
depth_0.5m <- seq(0,5,0.5)
set.seed(9)
temp_0.5m <- sort(rnorm(11, mean=10, sd=3),decreasing = T)
# Get every other value in order to use every 1m
depth_1m <- depth_0.5m[c(T,F)]
temp_1m <- temp_0.5m[c(T,F)]
# See if we get the same value using both resolutions
approx(temp_0.5m, depth_0.5m, xout = 12)$y
approx(temp_1m, depth_1m, xout = 12)$y
# OUTPUT
> approx(temp_0.5m, depth_0.5m, xout = 12)$y
[1] 0.8475811
> approx(temp_1m, depth_1m, xout = 12)$y
[1] 0.7261741
For some context, here is one spot where Lindsay is resampling hypso to get the areas at specific depths as part of calculating TOHA
And I plotted up examples of what my resampling code generates in a shallow, medium, and deep lake:
@lindsayplatt - circling back to your mockup example. One key factor - the temperature predictions output from GLM will be at 0.5m intervals regardless of how we resample the hypsography.
In case it's helpful I ran some of your code from here with a dummy dataset and a 0.5m and 1m hypso:
library(tidyverse)
resample_hypso <- function(hypso, new_depths) {
# `rule = 2`: repeats top or bottom known hypso for any depths outside of known hypso
hypso$radii <- sqrt(hypso$areas / pi)
new_radii <- approx(hypso$depths, hypso$radii, xout=new_depths, rule = 2)$y
# Now calculate area from new radii
new_areas <- pi * new_radii^2
# Create hypso at these new depths
return(list(depths = new_depths, areas = new_areas))
}
get_tha <- function(wtr_df, hypso, wtr_lower, wtr_upper) {
# run code pulled from function
# https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L162-L209
z_wtr <- rLakeAnalyzer::get.offsets(wtr_df)
z_max <- max(hypso$depths, z_wtr) # bottom could be in hypso or wtr
z_surface <- 0 # always 0, might be in hypso, might not
wtr_surface <- wtr_df[[which.min(z_wtr)]] # wtr_surface will be whatever the top-most wtr is
wtr_bottom <- wtr_df[[which.max(z_wtr)]] # wtr_bottom will be whatever the bottom-most wtr is
##### Find exact depths of wtr thresholds
Z1_Z2 <- apply(wtr_df, MARGIN = 1, function(wtr_row) {
# Using apply since these two actions need to be done per row
wtr <- as.vector(t(wtr_row))
if(length(unique(wtr)) == 1) {
# In a well-mixed lake, it is possible for all wtr values to be the same
# which causes approx to throw an error. Return NAs for the Zs and let
# checks below determine if benth area is all (or partially) above or
# below the lake and set Zs based on that.
Z1_Z2 <- c(NA,NA)
} else {
Z1_Z2 <- approx(wtr, z_wtr, xout=c(wtr_upper, wtr_lower))$y
}
return(Z1_Z2)
})
Z1 <- Z1_Z2[1,]
Z2 <- Z1_Z2[2,]
##### Checks before calculating areas
completely_below_lake <- wtr_bottom > wtr_upper # if THA is completely below lake, benthic area is 0 (too hot)
completely_above_lake <- wtr_surface < wtr_lower # if THA is completely above lake, benthic area is 0 (too cold)
benth_0 <- completely_below_lake | completely_above_lake
# If completely above or completely below, the Zs need to reflect that
Z1[completely_below_lake] <- z_max
Z2[completely_below_lake] <- z_max
Z1[completely_above_lake] <- z_surface
Z2[completely_above_lake] <- z_surface
# if THA extends above lake, use the surface as Z1 to calc THA
extends_above_lake <- wtr_surface < wtr_upper & !completely_above_lake
Z1[extends_above_lake] <- z_surface
# if THA extends below lake, use the bottom as Z2 to calc THA
extends_below_lake <- wtr_bottom > wtr_lower & !completely_below_lake
Z2[extends_below_lake] <- z_max
##### Create hypso at these different depths
A1 <- resample_hypso(hypso, Z1)[["areas"]]
A2 <- resample_hypso(hypso, Z2)[["areas"]]
return(list(A1=A1, A2=A2))
}
# set up inputs
wtr_df <- tibble(temp_0=sort(rnorm(11, mean=20, sd=3),decreasing = T),
temp_0.5=sort(rnorm(11, mean=19, sd=3),decreasing = T),
temp_1=sort(rnorm(11, mean=18, sd=3),decreasing = T),
temp_1.5=sort(rnorm(11, mean=16, sd=3),decreasing = T),
temp_2=sort(rnorm(11, mean=13, sd=3),decreasing = T),
temp_2.5=sort(rnorm(11, mean=11, sd=3),decreasing = T),
temp_3=sort(rnorm(11, mean=10, sd=3),decreasing = T),
temp_3.5=sort(rnorm(11, mean=9, sd=3),decreasing = T),
temp_4=sort(rnorm(11, mean=9, sd=3),decreasing = T),
temp_4.5=sort(rnorm(11, mean=9, sd=3),decreasing = T),
temp_5=sort(rnorm(11, mean=9, sd=3),decreasing = T))
depth_0.5m <- seq(0,5,0.5)
set.seed(9)
depth_1m <- depth_0.5m[c(T,F)]
hypso_0.5m <- tibble(depths=depth_0.5m,
areas=sort(rnorm(11, mean=150, sd=3),decreasing = T))
interval<-1
hypso_1m <- tibble(depths=seq(floor(min(hypso_0.5m$depths)/interval)*interval, floor(max(hypso_0.5m$depths)/interval)*interval, by=interval),
areas=approx(hypso_0.5m$depths,hypso_0.5m$areas,xout=depths, rule=2)$y)
wtr_lower <- 11
wtr_upper <- 25
#### check created hypso using two different hypsos
results_0.5m <- get_tha(wtr_df,hypso_0.5m,wtr_lower,wtr_upper)
results_1m <- get_tha(wtr_df,hypso_1m,wtr_lower,wtr_upper)
A1_0.5m <- results_0.5m$A1
A1_1m <- results_1m$A1
A2_0.5m <- results_0.5m$A2
A2_1m <- results_1m$A2
> A1_0.5m
[1] 153.7486 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327
[9] 153.8327 153.8327 153.8327
> A1_1m
[1] 153.4180 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327 153.8327
[9] 153.8327 153.8327 153.8327
> A2_0.5m
[1] 146.4394 146.4394 147.6473 148.4014 149.0518 149.2334 149.2576 149.5047
[9] 149.5550 149.8858 150.5272
> A2_1m
[1] 146.4394 146.4394 147.4780 148.1243 148.8359 149.3197 149.3724 149.5303
[9] 149.5623 150.3013 150.8115
Cool/useful analysis.
One thing I wanted to quick add is that
One key factor - the temperature predictions output from GLM will be at 0.5m intervals regardless of how we resample the hypsography.
we can actually ask glmtools
to resample GLM at any depths we want, they don't have to be regular intervals. Just adding this so it isn't viewed as a constraint that we can't control.
Coming back to this and re-wrapping my head around the problem space. Based on these checks and short analyses, it looks like there are changes to the resulting lake areas, but they are pretty minimal (the step that finds the A1
and A2
for the temperature range). I think the next stage of checking the potential impacts of resampling (and maybe more impacted by resampling) is the next part of the TOHA calcs, which is getting the cumulative benthic area of the habitat using calc_benthic_area_incl_hypso()
and the cum_ba
(calculated from hypso here in lake-temperature-out
).
However, all that said, I may be missing a key issue. I have been using the hypso in the NML files. The GLM outputs return temp at depth intervals of 0.5, but the hypso intervals vary per lake (some are already more sparse than 1 m).
library(tidyverse)
# FUNCTIONS USED
calc_benthic_area <- function(Z1, Z2, A1, A2) {
trap_length <- sqrt(A2 * pi) + sqrt(A1 * pi)
trap_height <- sqrt( (Z1 - Z2)^2 + (sqrt(A2/pi) - sqrt(A1/pi))^2 )
benthic_area <- trap_length * trap_height
return(benthic_area)
}
resample_hypso <- function(hypso, new_depths) {
# `rule = 2`: repeats top or bottom known hypso for any depths outside of known hypso
hypso$radii <- sqrt(hypso$areas / pi)
new_radii <- approx(hypso$depths, hypso$radii, xout=new_depths, rule = 2)$y
# Now calculate area from new radii
new_areas <- pi * new_radii^2
# Create hypso at these new depths
return(list(depths = new_depths, areas = new_areas))
}
all_nml <- readRDS('1_prep/in/nml_list.rds')
all_morphometry <- purrr::map(all_nml, `[`, c("latitude", "longitude", "H", "A"))
site_morphometry <- all_morphometry[["nhdhr_105954667"]]
hypso <- data.frame(H = site_morphometry$H, A = site_morphometry$A) %>%
mutate(depths = max(H) - H, areas = A) %>%
arrange(depths) %>%
select(depths, areas)
cum_benthic_area <- cumsum(calc_benthic_area(
hypso$depths[-length(hypso$depths)],
hypso$depths[-1],
hypso$areas[-length(hypso$areas)],
hypso$areas[-1]))
# Do that for resampled hypsos
hypso_resampled <- resample_hypso(hypso, seq(min(hypso$depths), max(hypso$depths), by = 1))
cum_benthic_area_resampled <- cumsum(calc_benthic_area(
hypso_resampled$depths[-length(hypso_resampled$depths)],
hypso_resampled$depths[-1],
hypso_resampled$areas[-length(hypso_resampled$areas)],
hypso_resampled$areas[-1]))
# Compare benthic areas before/after:
cum_benthic_area
cum_benthic_area_resampled
# OUTPUT VERY DIFFERENT
> cum_benthic_area
[1] 70943.62 195706.77 371622.21 433841.57 454889.24
> cum_benthic_area_resampled
[1] 47228.33 112530.16 192144.65 316994.65 393248.15 431131.78 453476.83
3/30/2022 EDIT: The above outputs of cumulative benthic areas are actually not as different as I originally thought. If you compare the areas at the depths that are similar, the areas are pretty similar (see table below). More to come on this in a comment below.
depth_orig | cum_benthic_area_orig | depth_resampled | cum_benthic_area_resampled | |
---|---|---|---|---|
1 | 47228.33 | |||
1.5240 | 70943.62 | |||
2 | 112530.16 | |||
3.0480 | 195706.77 | 3 | 192144.65 | |
4 | 316994.65 | |||
4.5720 | 371622.21 | |||
5 | 393248.15 | |||
6.0960 | 433841.57 | 6 | 431131.78 | |
7 | 453476.83 | |||
7.3152 | 454889.24 |
However, all that said, I may be missing a key issue. I have been using the hypso in the NML files. The GLM outputs return temp at depth intervals of 0.5, but the hypso intervals vary per lake (some are already more sparse than 1 m).
Wait now I'm confused -- what key issue do you feel like you're missing? Moving forward you would continue to use the hypso from the NML files, as you have been. It would just be the resampled hypso instead of the original hypso. Is your thought that to date (and potentially moving forward), the resolution of the temperature outputs would not match that of the hypsography?
Okay Lindsay and I regrouped on this today and got some clarity. Lindsay ran a code snippet to compare the cumulative benthic area calculated with the original hypsography to that calculated with the resampled hypsography. In the process we realized that I probably should use the actual minimum elevation-area pair, just as I am using the actual maximum elevation-area pair, so I'm going to make that change now
Okay that did what we wanted it to - making the start and end values for the hypso identical to the original:
Building on my edit that the cumulative benthic areas are not as different as I originally thought once you line them up in depth order, we compared the THA/OHA calc outputs for two lakes by using the same Z1/Z1 (the depth at which the example THA/OHA range intersects our lake) and calculating the corresponding lake areas at those depths (A1 and A2), then calculating the benthic area. The habitat area values came out to be the same regardless of whether we used the original hypso or the resampled hypso!!! This matches our original assumption because ultimately, we are just linearly interpolating between known points to get the resampled hypso, so that final benthic area math should give us the same result.
# FXNS needed
# resample_hypso(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L302-L313
# calc_benthic_area(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L315-L321
# calc_benthic_area_incl_hypso(): https://github.com/USGS-R/lake-temperature-out/blob/main/2_process/src/calculate_toha.R#L323-L377
Z1 <- 2 # Depth in lake for the top of the O/T range
Z2 <- 3 # Depth in lake for the bottom of the O/T range
# Current lake hypso
hypso_orig <- structure(list(
depths = c(0, 1.524, 3.048, 4.572, 6.096, 7.3152),
areas = c(454739.276528491, 383881.802459821, 259155.913693587,
83253.6667468361, 21045.3337177386, 0)),
class = "data.frame", row.names = c(NA, -6L))
# Create a resampled version
max_depth_orig <- max(hypso_orig$depths)
hypso_resampled <- as_tibble(resample_hypso(hypso_orig, c(seq(0,max_depth_orig,by=0.5), max_depth_orig)))
# Calc benthic area using original hypso
A1_orig <- resample_hypso(hypso_orig, Z1)[["areas"]]
A2_orig <- resample_hypso(hypso_orig, Z2)[["areas"]]
cum_ba_orig <- cumsum(calc_benthic_area(
hypso_orig$depths[-length(hypso_orig$depths)],
hypso_orig$depths[-1],
hypso_orig$areas[-length(hypso_orig$areas)],
hypso_orig$areas[-1]))
habitat_orig <- calc_benthic_area_incl_hypso(Z1, Z2, A1_orig, A2_orig, hypso_orig, cum_ba_orig)
# Calc benthic area using resampled hypso
A1_resampled <- resample_hypso(hypso_resampled, Z1)[["areas"]]
A2_resampled <- resample_hypso(hypso_resampled, Z2)[["areas"]]
cum_ba_resampled <- cumsum(calc_benthic_area(
hypso_resampled$depths[-length(hypso_resampled$depths)],
hypso_resampled$depths[-1],
hypso_resampled$areas[-length(hypso_resampled$areas)],
hypso_resampled$areas[-1]))
habitat_resampled <- calc_benthic_area_incl_hypso(Z1, Z2, A1_resampled, A2_resampled, hypso_resampled, cum_ba_resampled)
# They are the same!!
> habitat_resampled
[1] 79614.48
> habitat_orig
[1] 79614.48
We are assuming the same Z1/Z2 as input here for both hypso and hypso_resampled because running those actual calculations using example GLM output would give the same Z1/Z2 depths because those were calculated based on the GLM output (currently the 0.5 m depth intervals), not the hypso themselves (though hypso is an input to GLM).
Adding one more double check since the example above was a site where we upsampled. Wanted to create an example that could be plotted and points actually seen, so I didn't use an actual lake example; I made one up.
Upsampled (adding more depths to the hypso) results come out to be the exact same, while downsampled results (removing depths from the hypso) come out similar but not the exact same. I think this is because when we remove points from the depth profile, the slope of the line between depths changes so when we calculate the area of the trapezoid, it is different.
# Upsampled output
> habitat_orig
[1] 40.20565
> habitat_resampled
[1] 40.20565
# Downsampled output
> habitat_orig
[1] 93.96417
> habitat_resampled
[1] 90.42993
# Code used to get the plots above and results, need the functions noted in the previous comment.
# Upsample hypso example
hypso_orig <- tibble(depths = c(0,3,4,6,8), areas = c(144,120,80,36,0))
hypso_resampled <- resample_hypso(hypso_orig, seq(0,8,by=0.5))
# Downsample hypso example
set.seed(10)
hypso_orig <- tibble(depths = seq(0,6,0.5), areas = sort(sample(1:300, 13), decreasing = T))
hypso_resampled <- resample_hypso(hypso_orig, 0:6)
# Change what `hypso_orig` and `hypso_resampled` are and run the
# following to create the plots and generate the habitat area values
habitat_orig <- calc_benthic_area_incl_hypso(
Z1, Z2,
A1 = resample_hypso(hypso_orig, Z1)[["areas"]],
A2 = resample_hypso(hypso_orig, Z2)[["areas"]],
hypso = hypso_orig,
cum_ba = cumsum(calc_benthic_area(
hypso_orig$depths[-length(hypso_orig$depths)],
hypso_orig$depths[-1],
hypso_orig$areas[-length(hypso_orig$areas)],
hypso_orig$areas[-1])))
habitat_resampled <- calc_benthic_area_incl_hypso(
Z1, Z2,
A1 = resample_hypso(hypso_resampled, Z1)[["areas"]],
A2 = resample_hypso(hypso_resampled, Z2)[["areas"]],
hypso = hypso_resampled,
cum_ba = cumsum(calc_benthic_area(
hypso_resampled$depths[-length(hypso_resampled$depths)],
hypso_resampled$depths[-1],
hypso_resampled$areas[-length(hypso_resampled$areas)],
hypso_resampled$areas[-1])))
plot(hypso_orig$areas, -hypso_orig$depths, type = "b", pch = 16)
points(hypso_resampled$areas, -hypso_resampled$depths, pch = 16, col = "red")
lines(hypso_resampled$areas, -hypso_resampled$depths, pch = 16, col = "red")
legend("top", col=c('black', 'red'), pch=16, legend = c('original', 'resampled'))
Sorry! I hadn't hit submit on my review
Fixes #293
Add code to resample hypsography prior to creation of the
7_config_merge/out/nml_list.rds.ind
target.This code is set up such that:
max_layer_thick
here but can easily be adjustedapprox()
and are then used to calculate the area values, per Lindsay's method used inlake-temperature-out