bstaton1 / KuskoHarvEst

R package for producing in-season harvest/effort estimates and reports for Kuskokwim River subsistence salmon fisheries
MIT License
0 stars 0 forks source link

Add p_active() and p_active_plot() functions #238

Open bstaton1 opened 1 month ago

bstaton1 commented 1 month ago

These are currently only used in KuskoHarvEst-ms, however, this project would benefit from having them integrated, for future inclusion in sensitivity analyses. This is an excellent way to visualize agreement between the interview and flight data when there are two flights, and I can't believe I haven't thought of it previously.

bstaton1 commented 1 month ago

Here I've pulled most of what I'll need together to get this going. It would be really nice to also have bstaton1/KuskoHarvUtils#10 completed as part of this as well.


# load historical flight and interview data
data("flight_data_all", package = "KuskoHarvData")
data("interview_data_all", package = "KuskoHarvData")

# all dates
dates = unique(lubridate::date(flight_data_all$start_time))

# select a date
d = 1
f_dat = subset(flight_data_all, lubridate::date(start_time) == dates[d])
i_dat = subset(interview_data_all, lubridate::date(trip_start) == dates[d] & suit_effort)

trip_start = i_dat$trip_start
trip_end = i_dat$trip_end
test_freq_mins = 5

# function to calculate the fraction of interviewed trips active at given times
p_active = function(trip_start, trip_end, test_freq_mins = 15)  {

  # make sure all records have data present
  times = data.frame(trip_start = trip_start, trip_end = trip_end)
  times = times[!is.na(times$trip_start) & !is.na(times$trip_end),]

  # extract the date
  the_date = unique(KuskoHarvUtils::basic_date(lubridate::date(times$trip_start)))

  # build the times to test the proportion of all fishers that were active at
  time_test_start = KuskoHarvUtils::combine_datetime(the_date, "00:00")
  time_test_end = KuskoHarvUtils::combine_datetime(the_date, "23:00")
  test_at = seq(0, 59, by = test_freq_mins)
  time_test = seq(time_test_start, time_test_end, by = "hour")
  time_test = lapply(time_test, function(x) x + test_at * 60)
  time_test = do.call(c, time_test)

  # function to determine if each fisher was fishing at a given time
  fishing_at_time = function(trip_start, trip_end) {
    sapply(1:length(time_test), function(i) dplyr::between(time_test[i], trip_start, trip_end))
  }

  # apply this to all trips and times and convert to a proportion
  active = sapply(1:nrow(times), function(i) fishing_at_time(times$trip_start[i], times$trip_end[i]))
  p_active = rowMeans(active)

  # create and return the output
  data.frame(time = time_test, p_active = p_active)
}

# apply the function to one date
x = p_active(i_dat$trip_start, i_dat$trip_end, test_freq_mins = 60)

# basic plot
plot(p_active ~ time, data = x, type = "l")

# plotting code used to build the figure in the manuscript
# function to create plot for one day
f = function(d, x_axis_ticks = TRUE, y_axis_ticks = TRUE) {

  # subset the meta data, interview data, and flight data for this day
  mdat = subset(M_info, date(start) == d)
  idat = subset(I_data, date(trip_start) == d)
  fdat = subset(F_data, date(start_time) == d)

  # subset the p_active data for this day
  out = p_active_all[date(p_active_all$time) == d,]

  # get pi estimates
  edat = KuskoHarvEst:::tally_effort_data(idat, fdat)
  effort_ests = KuskoHarvEst:::N_estimator(edat)
  pi_ests = effort_ests[str_detect(names(effort_ests), "pi")]

  # empty plot with correct dimensions
  plot(p_active ~ time, data = out, type = "n", ylim = c(0,1.2),
       xlim = c(min(time[hour(time) == 0]), max(time)),
       xaxt = "n", yaxt = "n", xlab = "")

  # draw the p_active distribution
  with(out, polygon(x = c(time, rev(time)),
                    y = c(p_active, rep(0, length(p_active))),
                    col = "grey85", border = "black"))

  # draw xaxis
  at_x = to_datetime(seq(4, 20, 4), KuskoHarvUtils::basic_date(d))
  # at_x = to_datetime(seq(3, 21, 3), KuskoHarvUtils::basic_date(d))
  # at_x = to_datetime(seq(4, 20, 8), KuskoHarvUtils::basic_date(d))
  if (x_axis_ticks) {
    lab_x = c("4am", "8am", "12pm", "4pm", "8pm")
    # lab_x = c("3am", "6am", "9am", "12pm", "3pm", "6pm", "9pm")
    # lab_x = c("4am", "12pm", "8pm")
    axis(side = 1, at = at_x, labels = lab_x, las = 2)
  } else {
    axis(side = 1, at = at_x, labels = FALSE, las = 2)
  }

  # draw yaxis
  at_y = seq(0, 1, by = 0.25)
  if (y_axis_ticks) {
    axis(side = 2, at = at_y, labels = KuskoHarvUtils::percentize(at_y), las = 1)
  } else {
    axis(side = 2, at = at_y, labels = FALSE)
  }

  # draw stripes for each flight
  start_i = which(out$time %in% round_date(fdat$start_time, "15 minutes"))
  end_i = which(out$time %in% round_date(fdat$end_time, "15 minutes"))
  sapply(1:length(start_i), function(i) {
    polygon(x = c(out$time[start_i[i]:end_i[i]], rev(out$time[start_i[i]:end_i[i]])),
            y = c(rep(0, length(start_i[i]:end_i[i])), rev(out$p_active[start_i[i]:end_i[i]])),
            col = scales::alpha("grey20", 0.2), border = "black")
    points(x = mean(out$time[start_i[i]:end_i[i]]), y = pi_ests[i], cex = 1.2, pch = 21, col = "black", bg = "black")
  })

  # draw the start and end times of the opener
  segments(mdat$start, 0, mdat$start, 1, lty = 2)
  segments(mdat$end, 0, mdat$end, 1, lty = 2)

  # draw a date label
  print_date = paste0(" ", year(d), " ", month(d, label = TRUE), " ", day(d))
  mtext(side = 3, line = -1.15, adj = 0, print_date, cex = 0.7)
}

# open the graphics device
png(file.path(figure_dir, "p-active-obs.png"), h = 8 * ppi, w = 7.2 * ppi, res = ppi)

# graphical parameters
par(yaxs = "i", mar = c(0.6,0.3,0,0.3), oma = c(2.25,4,0.5,0.5), cex.axis = 1, tcl = -0.25, mgp = c(2,0.3,0))
par(mfrow = c(8,5))

# is each panel in the first column or last row?
is_first_column = rep(c(T,F,F,F,F), 8)
is_last_row = c(rep(F, 35), rep(T, 5))

# loop through days, making the plot for each
ds = 1:nrow(M_info)
sapply(ds, function(i) {
  f(d = date(M_info$start)[i],
    x_axis_ticks = is_last_row[which(ds == i)],
    y_axis_ticks = is_first_column[which(ds == i)]
  )
})

# draw y-axis label
mtext(side = 2, outer = TRUE, line = 2.25, "% of Trips Active", cex = 1.2)

# close the device
dev.off()

This should be written to take arguments: