FrancescoGrossi-unimi commented 2 weeks ago

Using the phydro branch and setting the params_siml use_gs to true the model yield very negative le value

exmple le

`library(rsofun) library(dplyr) library(ggplot2) library(ggthemes) library(RColorBrewer)

analyse_modobs <- function( df, mod, obs, relative = FALSE, xlim = NULL, ylim = NULL, use_factor = NULL, shortsubtitle = FALSE, plot_subtitle = TRUE, plot_linmod = TRUE, ... ){

rename to 'mod' and 'obs' and remove rows with NA in mod or obs

df <- df %>% as_tibble() %>% ungroup() %>% dplyr::select(mod = mod, obs = obs) %>% tidyr::drop_na(mod, obs)

get linear regression (coefficients)

linmod <- lm(obs ~ mod, data = df)

construct metrics table using the 'yardstick' library

df_metrics <- df %>% yardstick::metrics(obs, mod) %>% dplyr::bind_rows(tibble(.metric = "n", .estimator = "standard", .estimate = summarise(df, numb = n()) %>% unlist())) %>% dplyr::bind_rows(tibble(.metric = "slope", .estimator = "standard", .estimate = coef(linmod)[2])) %>%

dplyr::bind_rows( tibble( .metric = "nse", .estimator = "standard", .estimate = hydroGOF::NSE( obs, mod, na.rm=TRUE ) ) ) %>%

dplyr::bind_rows(tibble(.metric = "mean_obs", .estimator = "standard", .estimate = summarise(df, mean = mean(obs, na.rm = TRUE)) %>% unlist())) %>%
dplyr::bind_rows(tibble(
  .metric = "prmse", .estimator = "standard",
  .estimate = dplyr::filter(., .metric == "rmse") %>% dplyr::select(.estimate) %>% unlist() /
    dplyr::filter(., .metric == "mean_obs") %>%
    dplyr::select(.estimate) %>%
    unlist()
)) %>%
dplyr::bind_rows(tibble(
  .metric = "pmae", .estimator = "standard",
  .estimate = dplyr::filter(., .metric == "mae") %>% dplyr::select(.estimate) %>% unlist() /
    dplyr::filter(., .metric == "mean_obs") %>%
    dplyr::select(.estimate) %>%
    unlist()
)) %>%
dplyr::bind_rows(tibble(.metric = "bias", .estimator = "standard", .estimate = dplyr::summarise(df, mean((mod - obs), na.rm = TRUE)) %>% unlist())) %>%
dplyr::bind_rows(tibble(.metric = "pbias", .estimator = "standard", .estimate = dplyr::summarise(df, mean((mod - obs) / obs, na.rm = TRUE)) %>% unlist()))

rsq_val <- df_metrics %>% dplyr::filter(.metric == "rsq") %>% dplyr::select(.estimate) %>% unlist() %>% unname() rmse_val <- df_metrics %>% dplyr::filter(.metric == "rmse") %>% dplyr::select(.estimate) %>% unlist() %>% unname() mae_val <- df_metrics %>% dplyr::filter(.metric == "mae") %>% dplyr::select(.estimate) %>% unlist() %>% unname() bias_val <- df_metrics %>% dplyr::filter(.metric == "bias") %>% dplyr::select(.estimate) %>% unlist() %>% unname() slope_val <- df_metrics %>% dplyr::filter(.metric == "slope") %>% dplyr::select(.estimate) %>% unlist() %>% unname() n_val <- df_metrics %>% dplyr::filter(.metric == "n") %>% dplyr::select(.estimate) %>% unlist() %>% unname()

if (relative) { rmse_val <- rmse_val / mean(df$obs, na.rm = TRUE) bias_val <- bias_val / mean(df$obs, na.rm = TRUE) }

rsq_lab <- format(rsq_val, digits = 2) rmse_lab <- format(rmse_val, digits = 3) mae_lab <- format(mae_val, digits = 3) bias_lab <- format(bias_val, digits = 3) slope_lab <- format(slope_val, digits = 3) n_lab <- format(n_val, digits = 3)

results <- tibble(rsq = rsq_val, rmse = rmse_val, mae = mae_val, bias = bias_val, slope = slope_val, n = n_val)

if (shortsubtitle) { subtitle <- bquote(italic(R)^2 == .(rsq_lab) ~ ~ RMSE == .(rmse_lab)) } else { subtitle <- bquote(italic(R)^2 == .(rsq_lab) ~ ~ RMSE == .(rmse_lab) ~ ~ bias == .(bias_lab) ~ ~ slope == .(slope_lab) ~ ~ italic(N) == .(n_lab)) }

ggplot hexbin

gg <- df %>% ggplot2::ggplot(aes(x = mod, y = obs)) + geom_hex(bins = 60) + scale_fill_gradientn( colours = colorRampPalette(c("gray65", "navy", "red", "yellow"))(5), trans = "log" ) + geom_abline(intercept = 0, slope = 1, linetype = "dotted") +

coord_fixed() +

# xlim(0,NA) +
# ylim(0,NA) +
theme_classic() +
labs(x = mod, y = obs)

if (plot_subtitle) gg <- gg + labs(subtitle = subtitle) if (plot_linmod) gg <- gg + geom_smooth(method = "lm", color = "red", size = 0.5, se = FALSE)

return(list(df_metrics = df_metrics, gg = gg, linmod = linmod, results = results)) }

p_model_drivers = readRDS(file = here::here("data/p_model_drivers_newformat.rds"))

p_model_validation = readRDS(file = here::here("data/p_model_validation_newformat.rds"))

params_modl <- list( kphio = 0.04998, # setup ORG in Stocker et al. 2020 GMD kphio_par_a = 0.0, # set to zero to disable temperature-dependence of kphio kphio_par_b = 1.0, soilm_thetastar = 0.6 * 240, # to recover old setup with soil moisture stress soilm_betao = 0.0, beta_unitcostratio = 146.0, rd_to_vcmax = 0.014, # value from Atkin et al. 2015 for C3 herbaceous tau_acclim = 30.0, kc_jmax = 0.41, whc = p_model_drivers$site_info[[1]]$whc )

p_model_drivers <- p_model_drivers |> mutate( params_siml = params_siml |> mutate(use_gs = TRUE) |> list() )

output <- rsofun::runread_pmodel_f( p_model_drivers, par = params_modl )

df_plot <- output$data[[1]] %>% select(date, gpp_mod = gpp, le_mod = le) %>% left_join( p_model_validation$data[[1]] %>% select(date, gpp_obs = gpp, le_obs = le), by = join_by(date) ) |> as_tibble()

out_le <- analyse_modobs( df_plot, "le_mod", "le_obs" )

out_le$gg + labs( title = "LE" ) `

stineb commented 2 weeks ago

I haven't checked whether I can reproduce the error. Can you format this so that the code is ready for adoption (by copy/paste)?

The current code in phydro, used with vignettes/pmodel_use_newdata.Rmd, generates sensible outputs that don't seem to match your patterns of negative values.

To bug-fix, write out values for LE and for variables used for calculating LE at each step of the code - from the forcing to the outputs.

FrancescoGrossi-unimi commented 2 weeks ago

I run the vignette and take this result. I wrtie only the necessary code to reproduce the plot which was inside the vignette

library(rsofun)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(RColorBrewer)

analyse_modobs <- function(
df,
mod,
obs,
relative = FALSE,
xlim = NULL,
ylim = NULL,
use_factor = NULL,
shortsubtitle = FALSE,
plot_subtitle = TRUE,
plot_linmod = TRUE,
...
){

rename to 'mod' and 'obs' and remove rows with NA in mod or obs
df <- df %>%
as_tibble() %>%
ungroup() %>%
dplyr::select(mod = mod, obs = obs) %>%
tidyr::drop_na(mod, obs)

get linear regression (coefficients)
linmod <- lm(obs ~ mod, data = df)

construct metrics table using the 'yardstick' library
df_metrics <- df %>%
yardstick::metrics(obs, mod) %>%
dplyr::bind_rows(tibble(.metric = "n", .estimator = "standard", .estimate = summarise(df, numb = n()) %>% unlist())) %>%
dplyr::bind_rows(tibble(.metric = "slope", .estimator = "standard", .estimate = coef(linmod)[2])) %>%
# dplyr::bind_rows( tibble( .metric = "nse", .estimator = "standard", .estimate = hydroGOF::NSE( obs, mod, na.rm=TRUE ) ) ) %>%
dplyr::bind_rows(tibble(.metric = "mean_obs", .estimator = "standard", .estimate = summarise(df, mean = mean(obs, na.rm = TRUE)) %>% unlist())) %>%
dplyr::bind_rows(tibble(
.metric = "prmse", .estimator = "standard",
.estimate = dplyr::filter(., .metric == "rmse") %>% dplyr::select(.estimate) %>% unlist() /
dplyr::filter(., .metric == "mean_obs") %>%
dplyr::select(.estimate) %>%
unlist()
)) %>%
dplyr::bind_rows(tibble(
.metric = "pmae", .estimator = "standard",
.estimate = dplyr::filter(., .metric == "mae") %>% dplyr::select(.estimate) %>% unlist() /
dplyr::filter(., .metric == "mean_obs") %>%
dplyr::select(.estimate) %>%
unlist()
)) %>%
dplyr::bind_rows(tibble(.metric = "bias", .estimator = "standard", .estimate = dplyr::summarise(df, mean((mod - obs), na.rm = TRUE)) %>% unlist())) %>%
dplyr::bind_rows(tibble(.metric = "pbias", .estimator = "standard", .estimate = dplyr::summarise(df, mean((mod - obs) / obs, na.rm = TRUE)) %>% unlist()))

rsq_val <- df_metrics %>%
dplyr::filter(.metric == "rsq") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()
rmse_val <- df_metrics %>%
dplyr::filter(.metric == "rmse") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()
mae_val <- df_metrics %>%
dplyr::filter(.metric == "mae") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()
bias_val <- df_metrics %>%
dplyr::filter(.metric == "bias") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()
slope_val <- df_metrics %>%
dplyr::filter(.metric == "slope") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()
n_val <- df_metrics %>%
dplyr::filter(.metric == "n") %>%
dplyr::select(.estimate) %>%
unlist() %>%
unname()

if (relative) {
rmse_val <- rmse_val / mean(df$obs, na.rm = TRUE)
bias_val <- bias_val / mean(df$obs, na.rm = TRUE)
}

rsq_lab <- format(rsq_val, digits = 2)
rmse_lab <- format(rmse_val, digits = 3)
mae_lab <- format(mae_val, digits = 3)
bias_lab <- format(bias_val, digits = 3)
slope_lab <- format(slope_val, digits = 3)
n_lab <- format(n_val, digits = 3)

results <- tibble(rsq = rsq_val, rmse = rmse_val, mae = mae_val, bias = bias_val, slope = slope_val, n = n_val)

if (shortsubtitle) {
subtitle <- bquote(italic(R)^2 == .(rsq_lab) ~ ~
RMSE == .(rmse_lab))
} else {
subtitle <- bquote(italic(R)^2 == .(rsq_lab) ~ ~
RMSE == .(rmse_lab) ~ ~
bias == .(bias_lab) ~ ~
slope == .(slope_lab) ~ ~
italic(N) == .(n_lab))
}

ggplot hexbin
gg <- df %>%
ggplot2::ggplot(aes(x = mod, y = obs)) +
geom_hex(bins = 60) +
scale_fill_gradientn(
colours = colorRampPalette(c("gray65", "navy", "red", "yellow"))(5),
trans = "log"
) +
geom_abline(intercept = 0, slope = 1, linetype = "dotted") +
# coord_fixed() +
# xlim(0,NA) +
# ylim(0,NA) +
theme_classic() +
labs(x = mod, y = obs)

if (plot_subtitle) gg <- gg + labs(subtitle = subtitle)
if (plot_linmod) gg <- gg + geom_smooth(method = "lm", color = "red", size = 0.5, se = FALSE)

return(list(df_metrics = df_metrics, gg = gg, linmod = linmod, results = results))
}

p_model_drivers = readRDS(file = here::here("data/p_model_drivers_newformat.rds"))

p_model_validation = readRDS(file = here::here("data/p_model_validation_newformat.rds"))

params_modl <- list(
kphio = 0.04998, # setup ORG in Stocker et al. 2020 GMD
kphio_par_a = 0.0, # set to zero to disable temperature-dependence of kphio
kphio_par_b = 1.0,
soilm_thetastar = 0.6 * 240, # to recover old setup with soil moisture stress
soilm_betao = 0.0,
beta_unitcostratio = 146.0,
rd_to_vcmax = 0.014, # value from Atkin et al. 2015 for C3 herbaceous
tau_acclim = 30.0,
kc_jmax = 0.41,
whc = p_model_drivers$site_info[[1]]$whc
)

p_model_drivers <-
p_model_drivers |>
mutate(
params_siml = params_siml |>
mutate(use_gs = TRUE) |>
list()
)

output <- rsofun::runread_pmodel_f(
p_model_drivers,
par = params_modl
)

df_plot <- output$data[[1]] %>%
select(date, gpp_mod = gpp, le_mod = le) %>%
left_join(
p_model_validation$data[[1]] %>%
select(date, gpp_obs = gpp, le_obs = le),
by = join_by(date)
) |>
as_tibble()

out_le <- analyse_modobs(
df_plot,
"le_mod",
"le_obs"
)

out_le$gg +
labs(
title = "LE"
)

fabern commented 2 weeks ago

With my current version, the files p_model_drivers_newformat.rds and p_model_validation_newformat.rds were renamed. So I can't run your code*.

If you can, which data does it load? Does it mean you are not running the latest version and/or have some additional files in your working directory? Do you get the same when starting in a completely new working directory?

*(Note that the vignette that is online had been generated with older commit and doesn't represent the latest changes. However, the vignettes inside of the vignettes-folder on the branch should be up-to-date.)

stineb commented 1 week ago

@FrancescoGrossi-unimi what was the issue?

FrancescoGrossi-unimi commented 1 week ago

I didn't have the latest version, after I reinstall it works

geco-bern / rsofun

very negative le #227