Closed stineb closed 1 year ago
Here's the code. I will not change the repo now, but it will be added for revision.
vec_files <- list.files("../data", pattern = "_FLUXNET2015_FULLSET_DD_", full.names = TRUE)
list_df <- purrr:::map(as.list(vec_files), ~readr::read_csv(.))
names(list_df) <- vec_files
clean_fluxnet_dd <- function(df){
df %>%
## select only the variables we're interested in
select(starts_with("TIMESTAMP"),
ends_with("_F"),
ends_with("_F_MDS"),
# ends_with("_ERA"),
starts_with("_SWC_F_MDS"),
CO2_F_MDS,
PPFD_IN,
GPP_NT_VUT_REF,
NEE_VUT_REF_QC,
USTAR,
-starts_with("G_"),
-starts_with("LE_"),
-starts_with("H_"),
-contains("JSB")
) %>%
## convert to a nice date object
mutate(TIMESTAMP = lubridate::ymd(TIMESTAMP)) %>%
## not setting heavily gapfilled data to zero
## set all -9999 to NA
na_if(-9999) %>%
# drop NAs
# drop_na() %>%
## filter bad data (at least 80% must be measured or good quality gapfilled)
mutate(GPP_NT_VUT_REF = ifelse(NEE_VUT_REF_QC < 0.8, NA, GPP_NT_VUT_REF)) %>%
## drop QC variables (no longer needed), except NEE_VUT_REF_QC
select(-ends_with("_QC"))
}
df <- purrr::map(list_df, ~clean_fluxnet_dd(.)) %>%
dplyr::bind_rows(.id = "siteid") %>%
dplyr::mutate(siteid = str_sub(siteid, start = 10, end = 15))
readr::write_csv(df, "../data/df_for_stepwise_regression.csv")
Ok, thanks. I've adopted it.
@padasch Add code for creating the dataset
df_for_stepwise_regression.csv
in a script indata-raw
.