traitecoevo / fg_spectral_diversity

0 stars 0 forks source link

rewrite chv formula #30

Closed adelegem closed 1 week ago

wcornwell commented 1 month ago
calculate_chv <- function(pixel_values_df, subplots, wavelengths) {
  subplot_of_interest <- deparse(substitute(subplots))

  PCA <- pixel_values_df %>%
    select(c({{wavelengths}})) %>%
    rda(scale = F)

  CHV <- data.frame(PCA$CA$u) %>%
    select(c(1:3)) %>%
    cbind({{pixel_values_df}}) %>%
    select(c('PC1', 'PC2', 'PC3', {{subplots}})) %>%
    group_split({{subplots}}) %>%
    set_names(map(., ~unique(.[[subplot_of_interest]]))) %>%
    map(~convhulln(.x[-4], option = 'FA')) %>%
    map_dbl('vol') %>%
    as.data.frame() %>%
    rownames_to_column('subplot_id') %>%
    rename(CHV = '.')

  rm(subplot_of_interest) 

  return(CHV)
}
wcornwell commented 1 month ago

@adelegem can you point me to an example input?

adelegem commented 1 month ago

calculate_chv runs through calculate_metrics so that it calculates for each site, so could do

set.seed(123)

identifier <- c("NSABHC0009", "NSABHC0010", "NSABHC0011", "NSAHBC0012")

subplot_id <- unlist(lapply(1:5, function(i) paste(i, 1:5, sep="_")))

blue <- runif(20000, min = 0, max = 1)
green <- runif(20000, min = 0, max = 1)
red <- runif(20000, min = 0, max = 1)
red_edge <- runif(20000, min = 0, max = 1)
nir <- runif(20000, min = 0, max = 1)

df <- data.frame(
  identifier = rep(identifier, each = 5000),
  subplot_id = subplot_id,
  blue = blue,
  green = green,
  red = red,
  red_edge = red_edge,
  nir = nir
)

example_metrics <- calculate_metrics(df, masked = TRUE, c('blue', 'green', 'red', 'red_edge', 'nir'))

also pushed the latest version of my code so might need to pull it before you run (i think the old one didn't have the band_wavelengths argument).

if you don't want to run all the metrics/sites you could:

example_chv <- calculate_chv(df, subplot_id, c("blue":"nir"))

let me know if thats not what you mean

wcornwell commented 1 month ago

BTW, the non-standard evaluation stuff in that function makes me feel very old.

wcornwell commented 1 month ago
library(tidyverse)

do_rda <- function(pixel_values_df, wavelengths) {
  PCA <- pixel_values_df %>%
    select(c({{wavelengths}})) %>%
    vegan::rda(scale = F)
  return(PCA)
}

calculate_chv <- function(matrix, dim) {
  CHV_df <- data.frame(matrix) %>%
    select(c(1:dim)) 
  CHV <- geometry::convhulln(CHV_df, option = 'FA')
  return(CHV)
}
wcornwell commented 1 month ago

and testing code:

# Wrapper function to vary subsample fraction and calculate CHV volume
vary_subsample_and_calculate_chv <- function(df, wavelengths, dim = 3, subsample_fractions = seq(0.1, 1, by = 0.1), n_subsamples = 10) {
  results <- tibble(subsample_fraction = double(), chv_volume = double())
  # Perform PCA/RDA on the whole sample
  pca_test <- do_rda(df, wavelengths)

  for (frac in subsample_fractions) {
    chv_volumes <- vector("numeric", n_subsamples)

    for (i in 1:n_subsamples) {
      # Subsample the dataframe
      df_subsample <- data.frame(pca_test$CA$u) %>% sample_frac(frac)

      # Calculate the CHV and store the volume
      chv_out <- calculate_chv(df_subsample, dim = dim)
      chv_volumes[i] <- chv_out$vol
    }

    # Store results in the tibble
    results <- results %>% 
      add_row(subsample_fraction = rep(frac, n_subsamples), chv_volume = chv_volumes)
  }

  return(results)
}

# Example usage
set.seed(123)
df <- data.frame(
  identifier = rep(c("NSABHC0009", "NSABHC0010", "NSABHC0011", "NSAHBC0012"), each = 5000),
  subplot_id = rep(unlist(lapply(1:4, function(i) paste(i, 1:5, sep="_"))), each = 1000),
  blue = runif(20000, min = 0, max = 1),
  green = runif(20000, min = 0, max = 1),
  red = runif(20000, min = 0, max = 1),
  red_edge = runif(20000, min = 0, max = 1),
  nir = runif(20000, min = 0, max = 1)
)

# Call the function to vary subsample fractions and calculate CHV volumes
results <- vary_subsample_and_calculate_chv(df, names(df)[3:7], dim = 3, subsample_fractions = seq(0.1, 1, by = 0.1), n_subsamples = 20)

# Plot the results
ggplot(results, aes(x = subsample_fraction, y = chv_volume)) +
  geom_point() +
  labs(title = "CHVolume vs Subsample Fraction", x = "Subsample Fraction", y = "CHVolume") +
  theme_minimal()