bstaton1 / KuskoHarvEst

R package for producing in-season harvest/effort estimates and reports for Kuskokwim River subsistence salmon fisheries
MIT License
0 stars 0 forks source link

Create a `combine_boot()` function #191

Closed bstaton1 closed 1 year ago

bstaton1 commented 1 year ago

Currently, the code to combine bootstrap samples of harvest by gear types (and to get the total across gears) is messy, and will require much editing to make #178 work:

boot_out_total = data.frame(iter = boot_out_drift$iter, 
                            date = boot_out_drift$date,
                            gear = "total", 
                            stratum = boot_out_drift$stratum,
                            chinook = rowSums(cbind(boot_out_drift[,"chinook"], boot_out_set[,"chinook"]), na.rm = TRUE),
                            chum = rowSums(cbind(boot_out_drift[,"chum"], boot_out_set[,"chum"]), na.rm = TRUE),
                            sockeye = rowSums(cbind(boot_out_drift[,"sockeye"], boot_out_set[,"sockeye"]), na.rm = TRUE),
                            total = rowSums(cbind(boot_out_drift[,"total"], boot_out_set[,"total"]), na.rm = TRUE)
)

# combine drift, set, and total into one data frame of bootstrap output
boot_out = rbind(boot_out_total, boot_out_drift, boot_out_set)

This code differs in some cases, do a global find for boot_out_total = data.frame( to see all of the places it is called -- it is quite a lot (also in 'KuskoHarvData'!). One function (combine_boot()) with arguments about how to properly deal with species and set nets (included/not) could go a long way towards improving this.

bstaton1 commented 1 year ago

Here is a first cut at this. It should work regardless of species included in boot_out_drift and/or boot_out_set, thus streamlining future edits for coho estimation/reporting.

combine_boot = function(boot_out_drift = NULL, boot_out_set = NULL) {

  # create shortcut variables
  no_drift = is.null(boot_out_drift)
  no_set = is.null(boot_out_set)

  # if both inputs are null, return error
  if (no_drift & no_set) {
    stop ("At least one of 'boot_out_drift' or 'boot_out_set' must contain the output of KuskoHarvEst::bootstrap_harvest()")
  }

  # which columns store ID variables
  id_vars = c("iter", "date", "gear", "stratum")

  # placeholder object for if no drift estimates
  # duplicate the set net output and convert estimates to NA
  if (no_drift) {
    boot_out_drift = boot_out_set
    boot_out_drift[,!(colnames(boot_out_drift) %in% id_vars)] = NA
    boot_out_drift$gear = "drift"
  }

  # placeholder object for if no set estimates
  # duplicate the drift net output and convert estimates to NA
  if (no_set) {
    boot_out_set = boot_out_drift
    boot_out_set[,!(colnames(boot_out_set) %in% id_vars)] = NA
    boot_out_set$gear = "set"
  }

  # extract species names
  spp = colnames(boot_out_drift)[!(colnames(boot_out_drift) %in% id_vars)]

  # obtain a total across gears
  boot_out_total = cbind(
    boot_out_drift[,id_vars],
    sapply(spp, function(s) rowSums(cbind(boot_out_drift[,s], boot_out_set[,s]), na.rm = TRUE))
  )

  # combine gear types
  rbind(boot_out_total, boot_out_drift, boot_out_set)
}
bstaton1 commented 1 year ago

Here's the final version of the function:

combine_boot_out = function(boot_out_drift = NULL, boot_out_set = NULL) {

  # create shortcut variables
  no_drift = is.null(boot_out_drift)
  no_set = is.null(boot_out_set)

  # if both inputs are null, return error
  if (no_drift & no_set) {
    stop ("At least one of 'boot_out_drift' or 'boot_out_set' must contain the output of KuskoHarvEst::bootstrap_harvest()")
  }

  # which columns store ID variables
  id_vars = c("iter", "date", "gear", "stratum")

  # placeholder object for if no drift estimates
  # duplicate the set net output and convert estimates to NA
  if (no_drift) {
    boot_out_drift = boot_out_set
    boot_out_drift[,!(colnames(boot_out_drift) %in% id_vars)] = NA
    boot_out_drift$gear = "drift"
  }

  # placeholder object for if no set estimates
  # duplicate the drift net output and convert estimates to NA
  if (no_set) {
    boot_out_set = boot_out_drift
    boot_out_set[,!(colnames(boot_out_set) %in% id_vars)] = NA
    boot_out_set$gear = "set"
  }

  # extract species names
  spp = colnames(boot_out_drift)[!(colnames(boot_out_drift) %in% id_vars)]

  # obtain a total across gears
  boot_out_total = cbind(
    boot_out_drift[,id_vars],
    sapply(spp, function(s) rowSums(cbind(boot_out_drift[,s], boot_out_set[,s]), na.rm = TRUE))
  )
  boot_out_total$gear = "total"

  # combine gear types
  rbind(boot_out_total, boot_out_drift, boot_out_set)

}

To-do List

bstaton1 commented 1 year ago

Closed by #194