sckott / spenv

Combine environmental and spatial data
https://sckott.github.io/spenv/
Other
8 stars 1 forks source link

Code to rasterise points into grid #3

Open tomjwebb opened 8 years ago

tomjwebb commented 8 years ago
# Creating a rasterised grid from point data

# Load raster library
library(raster)

# Set the spatial resolution of the grid you require, in degrees
spatial_res <- 10
# Create the base grid
r <- raster(ncols = 360/spatial_res, nrows = 180/spatial_res)

# Populate with data (here, xy coordinates are for the centre of each grid square)
x <- seq(-180, (180 - spatial_res), by = spatial_res) + 0.5*spatial_res
y <- seq(-90, (90 - spatial_res), by = spatial_res) + 0.5*spatial_res
# create full set of coordinates
xy <- expand.grid(x, y)
# create some random data to act as an 'environmental variable'
z <- runif(r@nrows * r@ncols)

# add the variable to the rasterized grid
r0 <- rasterize(xy, field = z, r)
# plot for info if required (not recommended for small values of spatial_res)
plot(r0)

# To extract value of z for a given point in space
# create a set of occurrence points
n_points <- 50
occ_points <- data.frame(
    lon = runif(n_points, min = -180, max = 180),
    lat = runif(n_points, min = -90, max = 90)
    )

# Extract relevant value of z from grid for each point in occ_points
occ_points$z <- extract(r0, occ_points, cellnumbers = F)
tomjwebb commented 8 years ago
# Combining marine regions with environmental data
# use raster::mask to clip a gridded raster to a spatialPolygon

# Get an example spatial polygon using marine regions
devtools::install_github("sckott/mregions")
library("mregions")
res <- region_shp(name = "Australian Exclusive Economic Zone")

# trim the global gridded raster to this region
r_masked <- mask(r0, res)
plot(r_masked)

# get statistics out of r0 and r_masked, e.g. mean 'temperature'
mean(r0@data@values)
# masked version contains NAs, so use na.rm = T
mean(r_masked@data@values, na.rm = T)
tomjwebb commented 8 years ago
##############
# Gridding environmental data from NetCDF files
# Data from: http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html
# Direct link to data (~53MB): https://drive.google.com/file/d/0B7OcGQ7KA64UQzRiQVB6M2d4QnM/view?usp=sharing
# install.packages("RNetCDF")
library(RNetCDF)
marine <- open.nc("sst.mnmean.nc")
print.nc(marine)
marine1 <- read.nc(marine, unpack = T)

names(marine1)
# [1] "lat"       "lon"       "sst"       "time"      "time_bnds"
lapply(marine1, length)
# $lat
# [1] 180

# $lon
# [1] 360

# $sst
# [1] 26438400

# $time
# [1] 408

# $time_bnds
# [1] 816
# NB - 360 * 180 * 408 = 26438400

# create grid - this feels a bit clunky right now
# Find spatial resolution:
range(diff(marine1$lon))
# [1] 1 1
range(diff(marine1$lat))
# [1] -1 -1
spatial_res <- 1
# Create the base grid
r <- raster(ncols = 360/spatial_res, nrows = 180/spatial_res)

# populate it with data for a given time period
x <- seq(-180, (180 - spatial_res), by = spatial_res) + 0.5*spatial_res
y <- seq(-90, (90 - spatial_res), by = spatial_res) + 0.5*spatial_res
# create full set of coordinates
xy <- expand.grid(x, y)
# add one month of temperature data
time_period <- 1
obs_per_time <- with(marine1, length(lat) * length(lon))
time_index <- 1:obs_per_time + (time_period - 1)*obs_per_time
z <- marine1$sst[time_index]

# add the variable to the rasterized grid
r0 <- rasterize(xy, field = z, r)
# plot results
plot(r0)
# can now get sst values for given spatial points as before
# locating points in both time may require converting time as given here into date
# Time is given as days since 01/01/1800, so set this as origin
origin <- as.Date("1800-1-1")
# Then date is obtained from 'time' as:
marine1$date <- origin + marine1$time
# Then use e.g. findInterval to locate your occurrence points in time,
# rasterize all required time periods and extract data accordingly

# Will work on putting this into a function…
tomjwebb commented 8 years ago
# First (clunky) attempt to match species occurrence data to environmental data in space and time
# From SST data described above
# (Data from: http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html)
# (Direct link to data (~53MB): https://drive.google.com/file/d/0B7OcGQ7KA64UQzRiQVB6M2d4QnM/view?usp=sharing)

# Time is given as days since 01/01/1800, so set this as origin
origin <- as.Date("1800-1-1")
# Then date is obtained from 'time' as:
marine1$date <- origin + marine1$time
# findInterval doesn't work on dates:
findInterval(as.Date("1990-11-15"), marine1$time)

# instead convert target date to Julian:
as.numeric(marine1$date[1] - origin)
# findInterval:
findInterval(as.numeric(as.Date("2010-11-27") - origin), marine1$time)
# works

# Example occurrence data:
library("spocc")

# get occurrence data for Mola mola
res <- occ(query = 'Mola mola', from = 'obis', limit = 200)
# extract data frame
res_df <- res$obis$data$Mola_mola

# restrict to occurrences with dates within range of env data:
res_df <- subset(res_df, !is.na(eventDate))
res_df <- subset(res_df, eventDate >= min(marine1$date))
nrow(res_df)
# 167

# for simplicity, just retain key variables here: id, lat, lon, date
res_df <- select(res_df, id:longitude, eventDate)
res_df$eventDate <- as.Date(res_df$eventDate)

# find appropriate month from SST data for each event date
res_df$month_id <- findInterval(as.numeric(res_df$eventDate - origin), marine1$time)

# Function to extract environmental data matched in space and time to occurrence records
get_env_par_space_x_time <- function(occ_dat, env_dat = marine1){

    spatial_res <- abs(min(diff(env_dat$lon)))
    # Create the base grid
    r <- raster(ncols = 360/spatial_res, nrows = 180/spatial_res)

    # populate it with data for a given time period
    x <- seq(-180, (180 - spatial_res), by = spatial_res) + 0.5*spatial_res
    y <- seq(-90, (90 - spatial_res), by = spatial_res) + 0.5*spatial_res
    # create full set of coordinates
    xy <- expand.grid(x, y)

    # get appropriate time slice
    time_period <- occ_dat$month_id[1]
    obs_per_time <- with(env_dat, length(lat) * length(lon))
    time_index <- 1:obs_per_time + (time_period - 1)*obs_per_time
    z <- env_dat$sst[time_index]

    # add the variable to the rasterized grid
    r0 <- rasterize(xy, field = z, r)

    # get value for points within occ_points
    occ_points_temp <- extract(r0, select(occ_dat, longitude, latitude), cellnumbers = F)

    return(list(time_slice = occ_dat$month_id, id = occ_dat$id, env_par = occ_points_temp)) 
}

# All month ids in occurrence dataset:
month_ids <- unique(res_df$month_id)
# Get data for one month:
get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids[40]))
get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids[12]))

# Loop across months (NB slow - takes ~3.5 minutes for ~170 records):
system.time(
matched_env_dat <- sapply(month_ids, function(month_ids){
    get_env_par_space_x_time(occ_dat = subset(res_df, month_id == month_ids))
})
)

# need to match by ID to get back into df
id_match <- match(res_df$id, as.numeric(unlist(matched_env_dat[2,])))
# add time- and space-matched SST to occurrence data:
res_df$sst <- as.numeric(unlist(matched_env_dat[3,]))[id_match]
tomjwebb commented 8 years ago
# More efficient example
# Improved time-space matching using a raster brick approach

library(raster)

# Read in NetCDF file as a raster brick - need file of this name in working directory
# Could try download.file from http://www.esrl.noaa.gov/psd/data/gridded/data.noaa.oisst.v2.html
# But not sure how to get direct link to download
marine_brick <- brick("sst.mnmean.nc", lvar = 4, varname = "sst")
dim(marine_brick)
# 180 x 360 x 408

# To plot global temperature for a given month, specify month as y here:
plot(marine_brick, y = 2)

# coordinates are 0:360, -90:90,
marine_brick@extent
# class       : Extent 
# xmin        : 0 
# xmax        : 360 
# ymin        : -90 
# ymax        : 90 

# So longitude runs from 0 (-180) to 360 (+180), most occurrence data will need to be adjusted

# example of extracting temperature for a given coordinate (specified in cbind) and month
extract(marine_brick, cbind(180,0), layer = 1, nl = 1)
# layer is the FIRST time slice to consider; goes from there to end of time period, or for
# nl (number of layers) months if specified

# to get dates present in the raster brick
names(marine_brick@z)
marine_brick@z[["Date"]]
# NB Date is given as days since 01/01/1800, so set this as origin

# Function to get envirnomental parameters for given point in time and space
# env_dat is the environmental data, as a raster brick
# occ_dat is a single occurrence columns including latitude, lon_adj (longitude adjusted to 0-360 scale), and eventDate
# origin is the baseline date used for Julian day calculations
get_env_par_space_x_time <- function(
    env_dat = marine_brick, occ_dat, origin = as.Date("1800-1-1")){

    # calculate starting julian day for each month in env_dat
    month_intervals <- as.numeric(env_dat@z[["Date"]] - origin)
    # calculate julan day for the focal date (eventDate in occ_dat)
    focal_date <- as.numeric(occ_dat$eventDate - origin)

    # extract environmental variable (SST here) for this point
    env_val <- extract(
        marine_brick,
        cbind(occ_dat$lon_adj, occ_dat$latitude),
        layer = findInterval(focal_date, month_intervals),
        nl = 1
    )
    # Return the value
    as.numeric(env_val)
}

# Get some example occupancy data
# Example occurrence data:
library("spocc")
library(dplyr)

# get occurrence data for Mola mola
res <- occ(query = 'Mola mola', from = 'obis', limit = 200)
# extract data frame
res_df <- res$obis$data$Mola_mola

# restrict to occurrences with dates within range of env data:
res_df <- subset(res_df, !is.na(eventDate))
res_df <- subset(res_df, eventDate >= min(marine_brick@z[["Date"]]))
nrow(res_df)
# 167

# for simplicity, just retain key variables here: id, lat, lon, date
res_df <- select(res_df, id:longitude, eventDate)
res_df$eventDate <- as.Date(res_df$eventDate)
# adjust longitude
res_df$lon_adj <- res_df$longitude + 180

# example for a single space-time point:
get_env_par_space_x_time(occ_dat = res_df[1,])

# example looping through each occupancy point - ~2.3s for 167 points
i <- 1:nrow(res_df)
matched_env_dat <- sapply(i, function(i){get_env_par_space_x_time(occ_dat = res_df[i,])})

# add sst back to occurrence dataset
res_df$sst <- matched_env_dat
tomjwebb commented 8 years ago

Worth checking out the help files for raster::extract as the buffer and method arguments may be useful - buffer could be set to precision of location estimate, method allows interpolated values from nearest raster cells