BoulderCodeHub / RWDataPlyr

R package to read and manipulate data from RiverWareTM
3 stars 5 forks source link

rw_scen_aggregate() should work for any RW output type #56

Open rabutler opened 7 years ago

rabutler commented 7 years ago

You should be able to pass in a slot agg list that contains some rdfs, some csvs, and some netcdfs, and it should combine the output from all the various sources

rabutler commented 7 years ago

Need to figure out how this should work for a csv file that has already been combined together, vs. a csv file that has not yet been combined together.

think we can probably provide a check to see if it has a "scenario" column; if it doesn't assume it is only 1 scenario worth of data? Or maybe check and see if there is any more than the one scenario worth of data

rabutler-usbr commented 6 years ago

rwcsv_aggregate() based on rdf_aggregate():

also used in testing of #85

rwcsv_aggregate <- function(agg, 
                          rdf_dir = ".",
                          scenario = NULL,
                          keep_cols = FALSE,
                          nans_are = "0",
                          find_all_slots = TRUE)
{
  if (!is_rwd_agg(agg))
    stop("`agg` passed to `rdf_aggregate()` is not a `rwd_agg`")

  nans_are <- match.arg(nans_are, choices = c("0", "error"))
  # get unique rdf files
  #rdfs <- unique(agg$file)
  rdfs <- "KeySlots.csv"
  rdf_files <- file.path(rdf_dir, rdfs)
  rdfs_exist <- file.exists(rdf_files)

  # verify rdfs exist
  if (!any(rdfs_exist)) {
    stop(
      "The following rdfs were not found in ", normalizePath(rdf_dir), ":\n",
      toString(rdfs[!rdfs_exist]), 
      call. = FALSE
    )
  }

  rdf_len <- seq_len(length(rdfs))

  rwtblsmmry <- lapply(
    rdf_len,
    function(x){
      # call rwtbl_apply_sam for each unique rdf
      # seperate sam into one sam for each rdf;
      # read the rdf, then apply the sam to that rdf

      rwtbl <- read_rw_csv(rdf_files[x])
      rwtbl <- RWDataPlyr:::add_ym_to_rdftbl(rwtbl)
      rwtbl <- select(rwtbl, Timestep, Year, Month, TraceNumber, ObjectSlot, Value)

      rwtbl <- RWDataPlyr:::check_nans(rwtbl, nans_are, rdf_file = rdf_files[x])

      tmp_sam <- agg[agg$file == "KeySlots.rdf",]

      RWDataPlyr:::rwtbl_apply_sam(rwtbl, tmp_sam, find_all_slots)
    }
  )

  rwtbl_atts <- lapply(rdf_len, function(x) RWDataPlyr:::rwtbl_get_atts(rwtblsmmry[[x]]))
  names(rwtbl_atts) <- rdfs

  rwtblsmmry <- dplyr::bind_rows(rwtblsmmry)

  cols <- colnames(rwtblsmmry)
  cols <- cols[!(cols %in% c("Variable", "Value"))]
  rwtblsmmry <- rwtblsmmry %>% 
    dplyr::select(dplyr::one_of(cols, "Variable", "Value"))

  scen_folder <- data.frame(
    "scenario" = ifelse(is.null(scenario), NA_character_, scenario), 
    "folder" = normalizePath(rdf_dir),
    stringsAsFactors = FALSE
  )

  # save the sam as an attribute
  structure(
    rwtblsmmry,
    "rwd_agg" = agg,
    "rdf_atts" = rwtbl_atts,
    "scen_folder" = scen_folder
  )
}