fhui28 / CBFM

Spatio-temporal joint species distribution modeling using community-level basis functions
13 stars 2 forks source link

Modified CBFM predictions for presence-absences responses #21

Open fhui28 opened 2 years ago

fhui28 commented 2 years ago

Requires downloading additionalpredictfns.R off the main Github page

library(autoFRK)
library(FRK)
library(MASS)
library(mvabund)
library(mvtnorm)
library(ROCR)
library(sp)
library(RandomFields)
library(tidyverse)
source("additionalpredictfns.R")

##------------------------------
## **Example 1a: Fitting a CBFM to spatial multivariate presence-absence data**
## simulated from a spatial latent variable model
## Please note the data generation process (thus) differs from CBFM.
##------------------------------
set.seed(2021)
num_sites <- 1000 # 500 (units) sites for training set + 500 sites for testing.
num_spp <- 50 # Number of species
num_X <- 4 # Number of regression slopes

spp_slopes <- matrix(runif(num_spp * num_X, -1, 1), nrow = num_spp)
spp_intercepts <- runif(num_spp, -2, 0)

# Simulate spatial coordinates and environmental covariate components
# We will use this information in later examples as well
xy <- data.frame(x = runif(num_sites, 0, 5), y = runif(num_sites, 0, 5))
X <- rmvnorm(num_sites, mean = rep(0,4))
colnames(X) <- c("temp", "depth", "chla", "O2")
dat <- data.frame(xy, X)
mm <- model.matrix(~ temp + depth + chla + O2 - 1, data = dat) %>%
scale %>%
as.matrix

# Simulate latent variable component
# We will use this information in later examples as well
true_lvs <- RFsimulate(model = RMexp(var=1, scale=2),
x = xy$x, y = xy$y, n = 2)@data %>%
as.matrix
spp_loadings <- matrix(runif(num_spp * 2, -1, 1), nrow = num_spp)
set.seed(NULL)

# Simulate spatial multivariate abundance data (presence-absence)
# We will use this information in later examples as well
eta <- tcrossprod(cbind(1,mm), cbind(spp_intercepts,spp_slopes)) +
tcrossprod(true_lvs, spp_loadings)
simy <- matrix(rbinom(num_sites * num_spp, size = 1,
prob = plogis(eta)), nrow = num_sites)

# Form training and test sets
dat_train <- dat[1:500,]
dat_test <- dat[501:1000,]
simy_train <- simy[1:500,]
simy_test <- simy[501:1000,]
rm(X, mm, spp_loadings, true_lvs, xy, simy, dat)

# Set up spatial basis functions for CBFM -- Most users will start here!
# We will also use this basis functions in some later examples
num_basisfunctions <- 25 # Number of spatial basis functions to use
# Training set basis functions
train_basisfunctions <- mrts(dat_train[,c("x","y")], num_basisfunctions) %>%
as.matrix %>%
{.[,-(1)]} # Remove the first intercept column
# Testing set basis functions
test_basisfunctions <- mrts(dat_train[,c("x","y")], num_basisfunctions) %>%
predict(newx = dat_test[,c("x","y")]) %>%
as.matrix %>%
{.[,-c(1)]}

# Fit CBFM
tic <- proc.time()
useformula <- ~ temp + depth + chla + O2
fitcbfm <- CBFM(y = simy_train, formula_X = useformula, data = dat_train,
B_space = train_basisfunctions, family = binomial(), control = list(trace = 1))
toc <- proc.time()
toc - tic

## Not taking into account parameter uncertainty
predict_PA_CBFM(fitcbfm, newdata = dat_test, type = "response", new_B_space = test_basisfunctions) # Predict probabilities
predict_PA_CBFM(fitcbfm, newdata = dat_test, type = "y", new_B_space = test_basisfunctions, num_sims = 100) # An array based on simulating responses off the predicted probabilities 100 times

## Taking into account parameter uncertainty
predict_PA_CBFM(fitcbfm, newdata = dat_test, type = "response", new_B_space = test_basisfunctions, parameter_uncertainty = TRUE, num_sims = 100) # An array based on simulating parameters off their large sample distribution 100 times, then constructing predicted probabilities from each of these
predict_PA_CBFM(fitcbfm, newdata = dat_test, type = "y", new_B_space = test_basisfunctions, parameter_uncertainty = TRUE, num_sims = 100) # An array based on that builds on the above, but then further simulates responses