camrinbraun / tags2etuff

An R package for converting the hugely variable formats of animal tag data to a flat file format called eTUFF
Other
2 stars 2 forks source link

add_daynight weirdness #38

Closed marosteg closed 10 months ago

marosteg commented 10 months ago

MWE for add_daynight failure to assign daytime only on certain days. The needed metadata file is attached and you'll have to change the setwd() lines. After running this whole block (this takes a long time given the archival resolution), you should have NaN results in the resulting dailymetrics object on days 34 and 56 only. This is because those two calendar days lack daytime assignments entirely.:

devtools::load_all('~/Dropbox/GitHub/tags2etuff')
devtools::load_all('~/Dropbox/GitHub/HMMoce')
library(tidyverse)
library(dplyr)
library(lubridate)
library(padr)

dir <- '~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/Sharks/'
setwd('~/Dropbox/WHOI/Azores/R/')

#Read in metadata
meta <- read.csv('add_azoressharks_meta.csv',header=T)

## format dates
meta$time_coverage_start <- lubridate::parse_date_time(meta$time_coverage_start, orders='Ymd HMS', tz='UTC')
meta$time_coverage_end <- lubridate::parse_date_time(meta$time_coverage_end, orders='Ymd HMS', tz='UTC')

#Read in acoustic detections
acoustics <- read.csv('acoustic_detections.csv', header=T)
#Match psat ptt to acoustic tag serial number
psat.atag <- as.data.frame(cbind(meta$ptt,c("A69-9001-17182","A69-9001-17180",
                                            "A69-9001-17163","A69-9001-7861","A69-9001-7860",NA,"A69-9001-17177",
                                            rep(NA,times=6)))); colnames(psat.atag) <- c("ptt","asn")

#### Environmental Data ####
#### Spatial Limits ####
# SET SPATIAL LIMITS these are the lat/lon bounds of your study area (e.g.
# where you think the animal went)
sp.lim <- list(lonmin = -40, lonmax = -5, latmin = 25, latmax = 55)
## setup the spatial grid to base likelihoods on
locs.grid <- setup.locs.grid(sp.lim, res = "hycom")
# unique date vector for entire study period (all deployments)
for (i in 1:nrow(meta)) {
  if (i == 1) dep.dates <- seq.Date(as.Date(meta$time_coverage_start[i]), as.Date(meta$time_coverage_end[i]), by = "day")
  add.dates <- seq.Date(as.Date(meta$time_coverage_start[i]), as.Date(meta$time_coverage_end[i]), by = "day")
  dep.dates <- c(dep.dates, add.dates)
}
udates <- sort(unique(dep.dates))

#### SST data ####
sst.dir <- "~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/EnvData/sst/"
if (!dir.exists(sst.dir)) dir.create(sst.dir, recursive = TRUE)

for (i in 1:length(udates)){
  if (!file.exists(paste0(sst.dir, "oi_", udates[i], ".nc"))) get.env(udates[i],
                                                                      filename = "oi", type = "sst", sst.type = "oi", spatLim = sp.lim, save.dir = sst.dir)
}

#### Depth-temperature data ####
hycom.dir <- "~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/EnvData/hycom/"
if (!dir.exists(hycom.dir)) dir.create(hycom.dir, recursive = TRUE)

if (udates[length(udates)] > "2018-11-20") {
  old_hycom <- raster::raster('~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/EnvData/hycom/hycom_2018-01-01.nc')
  for (i in 1:length(udates)){
    if (!file.exists(paste0(hycom.dir, "hycom_", udates[i], ".nc"))) get.env(udates[i],
                                                                             filename = "hycom", type = "hycom", spatLim = sp.lim, save.dir = hycom.dir, template = old_hycom)
  }
} else {
  for (i in 1:length(udates)){
    if (!file.exists(paste0(hycom.dir, "hycom_", udates[i], ".nc"))) get.env(udates[i],
                                                                             filename = "hycom", type = "hycom", spatLim = sp.lim, save.dir = hycom.dir)
  }
}

glorys.dir <- "~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/EnvData/glorys/"
  # glorys data must be acquired separately via motu client

#### 3.2.4 Bathymetry ####
bathy.dir <- "~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/EnvData/bathy/"
if (!dir.exists(bathy.dir)) dir.create(bathy.dir, recursive = TRUE)
if (!file.exists(paste0(bathy.dir, "bathy.nc"))) {
  bathy <- get.bath.data(spatLim = sp.lim, save.dir = bathy.dir, res = 0.5)
} else {
  ## OR (once downloaded and reading the .nc later)
  bathy <- irregular_ncToRaster(paste0(bathy.dir, "bathy.nc"), varid = "topo")
}

#### Preparing and running HMMoce ####
setwd('~/Dropbox/WHOI/Azores/R/')

shark <- list.files(dir)
shark <- shark[c(1)] # sixgill with recovered tag

  sharkID <- shark[1] # specify the individual
  data_dir <- paste(dir, sharkID, sep='')

  #### 3.1 Load and prepare the tag data ####
  # SET START/END LOCATIONS iniloc is dataframe containing cols: day, month,
  # year, lat, lon and rows: start, end
  meta.i <- which(meta$ptt == sharkID)
  iniloc <- data.frame(matrix(c(day(meta[meta.i,"time_coverage_start"]), month(meta[meta.i,"time_coverage_start"]), year(meta[meta.i,"time_coverage_start"]), meta$geospatial_lat_start[meta.i], meta$geospatial_lon_start[meta.i],   #start
                                day(meta[meta.i,"time_coverage_end"]), month(meta[meta.i,"time_coverage_end"]), year(meta[meta.i,"time_coverage_end"]), meta$geospatial_lat_end[meta.i], meta$geospatial_lon_end[meta.i]),   #end
                              nrow = 2, ncol = 5, byrow = T))
  names(iniloc) <- list("day", "month", "year", "lat", "lon")
  iniloc[2,1] <- 23 # exclude last two days from 132057 (bad Argos pop location)
  tag <- as.POSIXct(paste(iniloc[1, 1], "/", iniloc[1, 2], "/", iniloc[1, 3],
                          sep = ""), format = "%d/%m/%Y", tz = "UTC")
  pop <- as.POSIXct(paste(iniloc[2, 1], "/", iniloc[2, 2], "/", iniloc[2, 3],
                          sep = ""), format = "%d/%m/%Y", tz = "UTC")
  # VECTOR OF DATES FROM DATA. THIS WILL BE THE TIME STEPS, T, IN THE
  # LIKELIHOODS
  dateVec <- seq.POSIXt(tag, pop, by = "24 hours")

  #### Extracting known locations from acoustic detections ####
  if (is.na(psat.atag$asn[which(psat.atag$ptt == sharkID)]) == FALSE) { # only for fish with detections
    kl <- subset(acoustics, codespace == psat.atag$asn[which(psat.atag$ptt == sharkID)])
    kl$date <- NA
    # identifies date, using different underlying formats of User/Argos and FastGPS
    for (i in 1:nrow(kl)){
      kl$date[i] <- as.Date(kl$transmission[i],findDateFormat(kl$transmission[i]))
    }
    # Coerces numerical date output to same date format
    class(kl$date) <- "Date"
    # exclude data prior to deployment start and after deployment end
    kl <- kl[which(kl$date > tag),]
    kl <- kl[which(kl$date < pop),]
    # eliminates repeats of unique positions on days with multiple detections
    kl <- kl %>% group_by(date) %>% distinct(receiver, .keep_all = TRUE)
    # eliminate unnecessary columns and fix date class
    kl <- kl[, c("date","latitude", "longitude")]; colnames(kl) <- c("date","lat","lon")
    kl$date <- parse_date_time(kl$date,orders='ymd')
    kl <- as.data.frame(kl) # makes class appropriate for make.L down the line
  }

  iniloc[2,c("lat","lon")] <- kl[60,c("lat","lon")] # fix deployment end

  #### 3.1.2 Depth-Temperature Data ####
  pdtFile <- paste(data_dir,paste(sharkID,"-PDTs.csv",sep=""),sep="/")
  pdt <- read.wc(pdtFile, type = "pdt", tag = tag, pop = pop, verbose = T)
  pdt <- pdt[, c("Date", "Depth", "MinTemp", "MaxTemp")]

  #### 3.1.4 Depth-only Data (Bathymetry) ####
  # If using built-in daily summary
  mmd <- read.csv(paste(data_dir,paste(sharkID,"-MinMaxDepth.csv",sep=""),sep="/"),header=T)
  mmd <- mmd[, c("Date","MinDepth", "MaxDepth")]
  mmd$Date <- as.POSIXct(mmd$Date, format = findDateFormat(mmd$Date), tz = "UTC")
  # exclude data prior to deployment start and after deployment end
  mmd <- mmd[which(as.Date(mmd$Date) >= as.Date(tag)),]
  mmd <- mmd[which(as.Date(mmd$Date) <= as.Date(pop)),]
  mmd[is.na(mmd$MaxDepth),"MaxDepth"] <- 0
  # extracts date from datetime
  mmd$Date <- as.Date(mmd$Date, format="%Y-%m-%d")
  # collapse 6 hr summaries into 24 hr summaries
  mmd <- mmd %>% group_by(Date) %>% dplyr::summarise(n = n(), MinDepth = min(MinDepth,
                                                                             na.rm = T), MaxDepth = max(MaxDepth, na.rm = T), .groups = "keep")
  mmd$Date <- as.POSIXct(as.character(mmd$Date),format="%Y-%m-%d",tz="UTC")
  # Pads days without max depth data to have max depth of 0 (excludes land in bathy likelihood)
  mmd <- padr::pad(as.data.frame(mmd),start_val=tag,end_val=pop)
  mmd[is.na(mmd$MaxDepth),"MaxDepth"] <- 0

  #### 3.3 Observation Likelihoods ####
  #### 3.3.3 3D Depth-Temperature Likelihood ####
  # OCEAN HEAT CONTENT (INTEGRATED PDTs)
  if (!file.exists(paste0(data_dir, "/L.ohc.rds"))) {
    L.ohc <- calc.ohc.par(pdt, filename = "hycom", ohc.dir = hycom.dir, dateVec = dateVec, 
                          isotherm = "", use.se = T, ncores = 8)
    saveRDS(L.ohc,paste0(data_dir, "/L.ohc.rds"))
  } else {L.ohc <- readRDS(paste0(data_dir, "/L.ohc.rds"))}

  if (!file.exists(paste0(data_dir, "/L.hycom.rds"))) {
    L.hycom <- calc.hycom.par(pdt, filename = "hycom", hycom.dir = hycom.dir, dateVec = dateVec, 
                              use.se = T, ncores = 8)
    saveRDS(L.hycom,paste0(data_dir, "/L.hycom.rds"))
  } else {L.hycom <- readRDS(paste0(data_dir, "/L.hycom.rds"))}

  if (!file.exists(paste0(data_dir, "/L.ohc.glorys.rds"))) {
    L.ohc.glorys <- calc.ohc.glorys.parMCA(pdt, filename = "glorys", ohc.dir = glorys.dir, dateVec = dateVec, 
                                        isotherm = "", use.se = T, ncores = 8)
    saveRDS(L.ohc.glorys,paste0(data_dir, "/L.ohc.glorys.rds"))
  } else {L.ohc.glorys <- readRDS(paste0(data_dir, "/L.ohc.glorys.rds"))}

  if (!file.exists(paste0(data_dir, "/L.glorys.rds"))) {
    L.glorys <- calc.glorys.parMCA(pdt, filename = "glorys", glorys.dir = glorys.dir, dateVec = dateVec, 
                                use.se = T, ncores = 8)
    saveRDS(L.glorys,paste0(data_dir, "/L.glorys.rds"))
  } else {L.glorys <- readRDS(paste0(data_dir, "/L.glorys.rds"))}

  #### 3.3.4 Bathymetry Likelihood ####
  ## bathymetry based likelihood resample bathy to a more reasonable (coarse) grid
  if (!file.exists(paste0(data_dir, "/L.bathy.rds"))) {
    bathy_resamp <- raster::resample(bathy, L.ohc)
    L.bathy <- calc.bathy(mmd, bathy_resamp, dateVec, focalDim = 5, sens.err = 5,
                          lik.type = "max") # serial
    saveRDS(L.bathy,paste0(data_dir, "/L.bathy.rds"))
  } else {L.bathy <- readRDS(paste0(data_dir, "/L.bathy.rds"))}

  if (!file.exists(paste0(data_dir, "/L.bathy.mixed.rds"))) {
    # Generate etuff w/ track
    load('~/Google Drive/Shared drives/MPG_WHOI/data/Azores_deepsharks/Sharks/132057/132057-HMMoce-mod3-viterbi.rda')
      # Prepare to add track
    customCols <- res$tr.pv
    names(customCols) <- c('DateTime','latitude', 'longitude')

    # Generate etuff w/ track
    etuff <- tag_to_etuff(dir=data_dir, meta=meta[which(meta$ptt == sharkID),], gpe3 = FALSE, customCols=customCols, ignore_orig_coords = TRUE)
    series <- get_series(etuff)
    series <- subset(series,as.Date(DateTime) <= meta$time_coverage_end[which(meta$ptt==sharkID)])
    series$dn <- add_daynight(series, etuff)
    dailymetrics <- as.data.frame(matrix(nrow=nrow(customCols),ncol=2))
    colnames(dailymetrics) <- c("mean.d","sd.d")

    # Calculate daily daytime depth mean and sd
    for (j in 1:nrow(customCols)){
      ind.d <- which(as.Date(series$DateTime)==customCols$DateTime[j] & series$dn=="d")
      mean.d <- mean(series$depth[ind.d])
      sd.d <- sd(series$depth[ind.d])
      dailymetrics[j,] <- c(mean.d,sd.d)
    }

add_azoressharks_meta.csv

camrinbraun commented 10 months ago

Appears to be an issue in get_srss() which add_daynight() relies on to get sunrise/sunset times:

> srss[34,]
          day latitude longitude        tz n_locs   DateTime             sunrise              sunset
34 2017-06-20 38.80659 -29.16954 Etc/GMT+2      1 2017-06-20 2017-06-19 06:31:29 2017-06-19 21:24:44
                                       day_interval                                   night_interval
34 2017-06-19 06:31:29 +00--2017-06-19 21:24:44 +00 2017-06-19 21:24:44 +00--2017-06-21 06:31:24 +00
camrinbraun commented 10 months ago

The issue stems from an animal moving between time zones AND from our assumption that input times to get_srss need to be converted to local time. Maybe they do BUT in the current iteration we are feeding in daily locations that are arbitrarily set at 00:00 UTC. When the animal moves out of the UTC time zone, east or west from the Azores in this case, the incoming time stamp is auto-transformed to UTC which pushes Jan 10 2000 00:00 (now assumed to be in GMT +1, for example) to Jan 9 2000 23:00 UTC.

Screen Shot 2023-12-21 at 5 38 25 PM
camrinbraun commented 10 months ago

I believe this is fixed with https://github.com/camrinbraun/tags2etuff/commit/737bee79e9fcb2451be4d636458d051039ed7ad5 with an error catch for erroneous date assignment.

No more NAs in the example data.