Open bstaton1 opened 1 month ago
Here I've pulled most of what I'll need together to get this going. It would be really nice to also have bstaton1/KuskoHarvUtils#10 completed as part of this as well.
# load historical flight and interview data
data("flight_data_all", package = "KuskoHarvData")
data("interview_data_all", package = "KuskoHarvData")
# all dates
dates = unique(lubridate::date(flight_data_all$start_time))
# select a date
d = 1
f_dat = subset(flight_data_all, lubridate::date(start_time) == dates[d])
i_dat = subset(interview_data_all, lubridate::date(trip_start) == dates[d] & suit_effort)
trip_start = i_dat$trip_start
trip_end = i_dat$trip_end
test_freq_mins = 5
# function to calculate the fraction of interviewed trips active at given times
p_active = function(trip_start, trip_end, test_freq_mins = 15) {
# make sure all records have data present
times = data.frame(trip_start = trip_start, trip_end = trip_end)
times = times[!is.na(times$trip_start) & !is.na(times$trip_end),]
# extract the date
the_date = unique(KuskoHarvUtils::basic_date(lubridate::date(times$trip_start)))
# build the times to test the proportion of all fishers that were active at
time_test_start = KuskoHarvUtils::combine_datetime(the_date, "00:00")
time_test_end = KuskoHarvUtils::combine_datetime(the_date, "23:00")
test_at = seq(0, 59, by = test_freq_mins)
time_test = seq(time_test_start, time_test_end, by = "hour")
time_test = lapply(time_test, function(x) x + test_at * 60)
time_test = do.call(c, time_test)
# function to determine if each fisher was fishing at a given time
fishing_at_time = function(trip_start, trip_end) {
sapply(1:length(time_test), function(i) dplyr::between(time_test[i], trip_start, trip_end))
}
# apply this to all trips and times and convert to a proportion
active = sapply(1:nrow(times), function(i) fishing_at_time(times$trip_start[i], times$trip_end[i]))
p_active = rowMeans(active)
# create and return the output
data.frame(time = time_test, p_active = p_active)
}
# apply the function to one date
x = p_active(i_dat$trip_start, i_dat$trip_end, test_freq_mins = 60)
# basic plot
plot(p_active ~ time, data = x, type = "l")
# plotting code used to build the figure in the manuscript
# function to create plot for one day
f = function(d, x_axis_ticks = TRUE, y_axis_ticks = TRUE) {
# subset the meta data, interview data, and flight data for this day
mdat = subset(M_info, date(start) == d)
idat = subset(I_data, date(trip_start) == d)
fdat = subset(F_data, date(start_time) == d)
# subset the p_active data for this day
out = p_active_all[date(p_active_all$time) == d,]
# get pi estimates
edat = KuskoHarvEst:::tally_effort_data(idat, fdat)
effort_ests = KuskoHarvEst:::N_estimator(edat)
pi_ests = effort_ests[str_detect(names(effort_ests), "pi")]
# empty plot with correct dimensions
plot(p_active ~ time, data = out, type = "n", ylim = c(0,1.2),
xlim = c(min(time[hour(time) == 0]), max(time)),
xaxt = "n", yaxt = "n", xlab = "")
# draw the p_active distribution
with(out, polygon(x = c(time, rev(time)),
y = c(p_active, rep(0, length(p_active))),
col = "grey85", border = "black"))
# draw xaxis
at_x = to_datetime(seq(4, 20, 4), KuskoHarvUtils::basic_date(d))
# at_x = to_datetime(seq(3, 21, 3), KuskoHarvUtils::basic_date(d))
# at_x = to_datetime(seq(4, 20, 8), KuskoHarvUtils::basic_date(d))
if (x_axis_ticks) {
lab_x = c("4am", "8am", "12pm", "4pm", "8pm")
# lab_x = c("3am", "6am", "9am", "12pm", "3pm", "6pm", "9pm")
# lab_x = c("4am", "12pm", "8pm")
axis(side = 1, at = at_x, labels = lab_x, las = 2)
} else {
axis(side = 1, at = at_x, labels = FALSE, las = 2)
}
# draw yaxis
at_y = seq(0, 1, by = 0.25)
if (y_axis_ticks) {
axis(side = 2, at = at_y, labels = KuskoHarvUtils::percentize(at_y), las = 1)
} else {
axis(side = 2, at = at_y, labels = FALSE)
}
# draw stripes for each flight
start_i = which(out$time %in% round_date(fdat$start_time, "15 minutes"))
end_i = which(out$time %in% round_date(fdat$end_time, "15 minutes"))
sapply(1:length(start_i), function(i) {
polygon(x = c(out$time[start_i[i]:end_i[i]], rev(out$time[start_i[i]:end_i[i]])),
y = c(rep(0, length(start_i[i]:end_i[i])), rev(out$p_active[start_i[i]:end_i[i]])),
col = scales::alpha("grey20", 0.2), border = "black")
points(x = mean(out$time[start_i[i]:end_i[i]]), y = pi_ests[i], cex = 1.2, pch = 21, col = "black", bg = "black")
})
# draw the start and end times of the opener
segments(mdat$start, 0, mdat$start, 1, lty = 2)
segments(mdat$end, 0, mdat$end, 1, lty = 2)
# draw a date label
print_date = paste0(" ", year(d), " ", month(d, label = TRUE), " ", day(d))
mtext(side = 3, line = -1.15, adj = 0, print_date, cex = 0.7)
}
# open the graphics device
png(file.path(figure_dir, "p-active-obs.png"), h = 8 * ppi, w = 7.2 * ppi, res = ppi)
# graphical parameters
par(yaxs = "i", mar = c(0.6,0.3,0,0.3), oma = c(2.25,4,0.5,0.5), cex.axis = 1, tcl = -0.25, mgp = c(2,0.3,0))
par(mfrow = c(8,5))
# is each panel in the first column or last row?
is_first_column = rep(c(T,F,F,F,F), 8)
is_last_row = c(rep(F, 35), rep(T, 5))
# loop through days, making the plot for each
ds = 1:nrow(M_info)
sapply(ds, function(i) {
f(d = date(M_info$start)[i],
x_axis_ticks = is_last_row[which(ds == i)],
y_axis_ticks = is_first_column[which(ds == i)]
)
})
# draw y-axis label
mtext(side = 2, outer = TRUE, line = 2.25, "% of Trips Active", cex = 1.2)
# close the device
dev.off()
This should be written to take arguments:
interview_data
flight_data
These are currently only used in KuskoHarvEst-ms, however, this project would benefit from having them integrated, for future inclusion in sensitivity analyses. This is an excellent way to visualize agreement between the interview and flight data when there are two flights, and I can't believe I haven't thought of it previously.