Open tomjwebb opened 8 years ago
# Combining marine regions with environmental data
# use raster::mask to clip a gridded raster to a spatialPolygon
# Get an example spatial polygon using marine regions
devtools::install_github("sckott/mregions")
library("mregions")
res <- region_shp(name = "Australian Exclusive Economic Zone")
# trim the global gridded raster to this region
r_masked <- mask(r0, res)
plot(r_masked)
# get statistics out of r0 and r_masked, e.g. mean 'temperature'
mean(r0@data@values)
# masked version contains NAs, so use na.rm = T
mean(r_masked@data@values, na.rm = T)
##############
# Gridding environmental data from NetCDF files
# Data from: http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html
# Direct link to data (~53MB): https://drive.google.com/file/d/0B7OcGQ7KA64UQzRiQVB6M2d4QnM/view?usp=sharing
# install.packages("RNetCDF")
library(RNetCDF)
marine <- open.nc("sst.mnmean.nc")
print.nc(marine)
marine1 <- read.nc(marine, unpack = T)
names(marine1)
# [1] "lat" "lon" "sst" "time" "time_bnds"
lapply(marine1, length)
# $lat
# [1] 180
# $lon
# [1] 360
# $sst
# [1] 26438400
# $time
# [1] 408
# $time_bnds
# [1] 816
# NB - 360 * 180 * 408 = 26438400
# create grid - this feels a bit clunky right now
# Find spatial resolution:
range(diff(marine1$lon))
# [1] 1 1
range(diff(marine1$lat))
# [1] -1 -1
spatial_res <- 1
# Create the base grid
r <- raster(ncols = 360/spatial_res, nrows = 180/spatial_res)
# populate it with data for a given time period
x <- seq(-180, (180 - spatial_res), by = spatial_res) + 0.5*spatial_res
y <- seq(-90, (90 - spatial_res), by = spatial_res) + 0.5*spatial_res
# create full set of coordinates
xy <- expand.grid(x, y)
# add one month of temperature data
time_period <- 1
obs_per_time <- with(marine1, length(lat) * length(lon))
time_index <- 1:obs_per_time + (time_period - 1)*obs_per_time
z <- marine1$sst[time_index]
# add the variable to the rasterized grid
r0 <- rasterize(xy, field = z, r)
# plot results
plot(r0)
# can now get sst values for given spatial points as before
# locating points in both time may require converting time as given here into date
# Time is given as days since 01/01/1800, so set this as origin
origin <- as.Date("1800-1-1")
# Then date is obtained from 'time' as:
marine1$date <- origin + marine1$time
# Then use e.g. findInterval to locate your occurrence points in time,
# rasterize all required time periods and extract data accordingly
# Will work on putting this into a function…
# First (clunky) attempt to match species occurrence data to environmental data in space and time
# From SST data described above
# (Data from: http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html)
# (Direct link to data (~53MB): https://drive.google.com/file/d/0B7OcGQ7KA64UQzRiQVB6M2d4QnM/view?usp=sharing)
# Time is given as days since 01/01/1800, so set this as origin
origin <- as.Date("1800-1-1")
# Then date is obtained from 'time' as:
marine1$date <- origin + marine1$time
# findInterval doesn't work on dates:
findInterval(as.Date("1990-11-15"), marine1$time)
# instead convert target date to Julian:
as.numeric(marine1$date[1] - origin)
# findInterval:
findInterval(as.numeric(as.Date("2010-11-27") - origin), marine1$time)
# works
# Example occurrence data:
library("spocc")
# get occurrence data for Mola mola
res <- occ(query = 'Mola mola', from = 'obis', limit = 200)
# extract data frame
res_df <- res$obis$data$Mola_mola
# restrict to occurrences with dates within range of env data:
res_df <- subset(res_df, !is.na(eventDate))
res_df <- subset(res_df, eventDate >= min(marine1$date))
nrow(res_df)
# 167
# for simplicity, just retain key variables here: id, lat, lon, date
res_df <- select(res_df, id:longitude, eventDate)
res_df$eventDate <- as.Date(res_df$eventDate)
# find appropriate month from SST data for each event date
res_df$month_id <- findInterval(as.numeric(res_df$eventDate - origin), marine1$time)
# Function to extract environmental data matched in space and time to occurrence records
get_env_par_space_x_time <- function(occ_dat, env_dat = marine1){
spatial_res <- abs(min(diff(env_dat$lon)))
# Create the base grid
r <- raster(ncols = 360/spatial_res, nrows = 180/spatial_res)
# populate it with data for a given time period
x <- seq(-180, (180 - spatial_res), by = spatial_res) + 0.5*spatial_res
y <- seq(-90, (90 - spatial_res), by = spatial_res) + 0.5*spatial_res
# create full set of coordinates
xy <- expand.grid(x, y)
# get appropriate time slice
time_period <- occ_dat$month_id[1]
obs_per_time <- with(env_dat, length(lat) * length(lon))
time_index <- 1:obs_per_time + (time_period - 1)*obs_per_time
z <- env_dat$sst[time_index]
# add the variable to the rasterized grid
r0 <- rasterize(xy, field = z, r)
# get value for points within occ_points
occ_points_temp <- extract(r0, select(occ_dat, longitude, latitude), cellnumbers = F)
return(list(time_slice = occ_dat$month_id, id = occ_dat$id, env_par = occ_points_temp))
}
# All month ids in occurrence dataset:
month_ids <- unique(res_df$month_id)
# Get data for one month:
get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids[40]))
get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids[12]))
# Loop across months (NB slow - takes ~3.5 minutes for ~170 records):
system.time(
matched_env_dat <- sapply(month_ids, function(month_ids){
get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids))
})
)
# need to match by ID to get back into df
id_match <- match(res_df$id, as.numeric(unlist(matched_env_dat[2,])))
# add time- and space-matched SST to occurrence data:
res_df$sst <- as.numeric(unlist(matched_env_dat[3,]))[id_match]
# More efficient example
# Improved time-space matching using a raster brick approach
library(raster)
# Read in NetCDF file as a raster brick - need file of this name in working directory
# Could try download.file from http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html
# But not sure how to get direct link to download
marine_brick <- brick("sst.mnmean.nc", lvar = 4, varname = "sst")
dim(marine_brick)
# 180 x 360 x 408
# To plot global temperature for a given month, specify month as y here:
plot(marine_brick, y = 2)
# coordinates are 0:360, -90:90,
marine_brick@extent
# class : Extent
# xmin : 0
# xmax : 360
# ymin : -90
# ymax : 90
# So longitude runs from 0 (-180) to 360 (+180), most occurrence data will need to be adjusted
# example of extracting temperature for a given coordinate (specified in cbind) and month
extract(marine_brick, cbind(180,0), layer = 1, nl = 1)
# layer is the FIRST time slice to consider; goes from there to end of time period, or for
# nl (number of layers) months if specified
# to get dates present in the raster brick
names(marine_brick@z)
marine_brick@z[["Date"]]
# NB Date is given as days since 01/01/1800, so set this as origin
# Function to get envirnomental parameters for given point in time and space
# env_dat is the environmental data, as a raster brick
# occ_dat is a single occurrence columns including latitude, lon_adj (longitude adjusted to 0-360 scale), and eventDate
# origin is the baseline date used for Julian day calculations
get_env_par_space_x_time <- function(
env_dat = marine_brick, occ_dat, origin = as.Date("1800-1-1")){
# calculate starting julian day for each month in env_dat
month_intervals <- as.numeric(env_dat@z[["Date"]] - origin)
# calculate julan day for the focal date (eventDate in occ_dat)
focal_date <- as.numeric(occ_dat$eventDate - origin)
# extract environmental variable (SST here) for this point
env_val <- extract(
marine_brick,
cbind(occ_dat$lon_adj, occ_dat$latitude),
layer = findInterval(focal_date, month_intervals),
nl = 1
)
# Return the value
as.numeric(env_val)
}
# Get some example occupancy data
# Example occurrence data:
library("spocc")
library(dplyr)
# get occurrence data for Mola mola
res <- occ(query = 'Mola mola', from = 'obis', limit = 200)
# extract data frame
res_df <- res$obis$data$Mola_mola
# restrict to occurrences with dates within range of env data:
res_df <- subset(res_df, !is.na(eventDate))
res_df <- subset(res_df, eventDate >= min(marine_brick@z[["Date"]]))
nrow(res_df)
# 167
# for simplicity, just retain key variables here: id, lat, lon, date
res_df <- select(res_df, id:longitude, eventDate)
res_df$eventDate <- as.Date(res_df$eventDate)
# adjust longitude
res_df$lon_adj <- res_df$longitude + 180
# example for a single space-time point:
get_env_par_space_x_time(occ_dat = res_df[1,])
# example looping through each occupancy point - ~2.3s for 167 points
i <- 1:nrow(res_df)
matched_env_dat <- sapply(i, function(i){get_env_par_space_x_time(occ_dat = res_df[i,])})
# add sst back to occurrence dataset
res_df$sst <- matched_env_dat
Worth checking out the help files for raster::extract
as the buffer
and method
arguments may be useful - buffer
could be set to precision of location estimate, method
allows interpolated values from nearest raster cells