Example from Amael Le Squin for residual checks with STAN - to be integrated with the help
#### Example from https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMaForBayesians.html ran with Stan
library(data.table)
library(cmdstanr)
library(DHARMa)
#### Create data
set.seed(123)
dat = DHARMa::createData(200, overdispersion = 0.2)
Data = as.list(dat)
Data$nobs = nrow(dat)
Data$nGroups = length(unique(dat$group))
#### Fit data
## Common variables
n_chains = 3
maxIter = 3000
## Compile model
model = cmdstan_model("./florian_eg.stan")
## Run model
results = model$sample(data = Data, parallel_chains = n_chains, refresh = 50, chains = n_chains,
iter_warmup = round(maxIter/2), iter_sampling = round(maxIter/2), save_warmup = FALSE)
n_sampling = results$metadata()$iter_sampling
#### Compute residuals
## Get parameters
intercept_array = as.vector(results$draws("intercept")) # dimensions: iter_sampling * n_chains * 1
env_array = as.vector(results$draws("env")) # dimensions: iter_sampling * n_chains * 1
## Posterior distributions
# Posterior lambda
posteriorPredDistr = matrix(data = 0, nrow = n_sampling*n_chains, ncol = nrow(dat))
for (i in 1:nrow(dat))
posteriorPredDistr[, i] = exp(intercept_array + env_array*Data$Environment1[i])
# Posterior response variable
posteriorPredSim = results$draws("observedResponseSim") # dimension: iter_sampling * n_chains * nobs
# Reshape posteriorPredSim as a matrix nobs x n_repetition
posteriorPredSim_matrix = matrix(data = 0, nrow = nrow(dat), ncol = n_sampling*n_chains)
for (obs in 1:nrow(posteriorPredSim_matrix))
{
col_start = 1
for (chain in 1:n_chains)
{
col_end = chain*n_sampling
posteriorPredSim_matrix[obs, col_start:col_end] = posteriorPredSim[, chain, obs]
col_start = col_end + 1
}
}
## Create dharma data for the residuals
sim = createDHARMa(simulatedResponse = posteriorPredSim_matrix,
observedResponse = dat$observedResponse,
fittedPredictedResponse = apply(posteriorPredDistr, 2, median),
integerResponse = TRUE)
## Plot residuals
plot(sim)
########################################################
############### RANDOM EFFECT PART ###############
########################################################
#### Add a random effect.
## Explanations:
# By default (see ?createData), the intercept is 0, the fixed effect is 1, the number of groups is 10, the random effect is on
# the intercept and its variance is 1. We create the expected result below and then run a stan model accounting for the random effect.
expected_results = c(intercept_mean = 0, env = 1, randEff_var = 1)
#### Create stan data
## Create indexing for groups (used in Stan to compute likelihood)
# Convert data to data table and order it by group
setDT(dat)
setorder(dat, group)
# Index
ls_groups = as.integer(dat[, unique(group)])
indices = integer(Data$nGroups + 1)
for (i in 1:Data$nGroups)
indices[i] = min(which(dat[, group] == ls_groups[i]))
indices[Data$nGroups + 1] = Data$nobs + 1
## Data list for stan
Data = as.list(dat)
Data$nobs = nrow(dat)
Data$nGroups = length(unique(dat$group))
Data$indices = indices
#### Fit data
## Common variables
n_chains = 3
maxIter = 3000
## Compile model
model = cmdstan_model("./florian_eg_randEff.stan")
## Run model
results = model$sample(data = Data, parallel_chains = n_chains, refresh = 50, chains = n_chains,
iter_warmup = round(maxIter/2), iter_sampling = round(maxIter/2), save_warmup = FALSE)
results$cmdstan_diagnose()
results$print(names(expected_results))
print(expected_results)
n_sampling = results$metadata()$iter_sampling
#### Compute residuals
## Posterior distributions
# Posterior lambda
posteriorPredDistr = results$draws("lambda") # dimension: iter_sampling * n_chains * nobs
posteriorPredDistr_matrix = matrix(data = 0, nrow = n_sampling*n_chains, ncol = nrow(dat)) # One obs per column
for (i in 1:nrow(dat))
posteriorPredDistr_matrix[, i] = as.vector(posteriorPredDistr[,,i])
# Posterior response variable
posteriorPredSim = results$draws("observedResponseSim") # dimension: iter_sampling * n_chains * nobs
# Reshape posteriorPredSim as a matrix nobs x n_repetition
posteriorPredSim_matrix = matrix(data = 0, nrow = nrow(dat), ncol = n_sampling*n_chains)
for (obs in 1:nrow(posteriorPredSim_matrix))
{
col_start = 1
for (chain in 1:n_chains)
{
col_end = chain*n_sampling
posteriorPredSim_matrix[obs, col_start:col_end] = posteriorPredSim[, chain, obs]
col_start = col_end + 1
}
}
## Create dharma data for the residuals
sim = createDHARMa(simulatedResponse = posteriorPredSim_matrix,
observedResponse = dat$observedResponse,
fittedPredictedResponse = apply(posteriorPredDistr_matrix, 2, median),
integerResponse = TRUE)
## Plot residuals
plot(sim)
florian_eg.stan
data {
// Number of data
int<lower = 0> nobs;
// Observations
int observedResponse [nobs];
// Explanatory variables
vector [nobs] Environment1;
}
parameters {
real intercept;
real env;
}
model {
// Declare variables
vector [nobs] lambda = exp(intercept + env*Environment1);
// Priors
target += normal_lpdf(intercept | 0, 1e4);
target += normal_lpdf(env | 0, 1e4);
// Likelihood
target += poisson_lpmf(observedResponse | lambda);
}
generated quantities {
int<lower = 0> observedResponseSim [nobs] = poisson_rng(exp(intercept + env*Environment1));
}
florian_eg_randEff.stan
data {
// Number of data
int<lower = 1> nobs;
int<lower = 1> nGroups;
// Indexing (index of the beginning of each group)
array [nGroups + 1] int indices; // This syntax is compatible only from Stan 2.29!
// Observations
array [nobs] int observedResponse;
// Explanatory variables
vector [nobs] Environment1;
}
parameters {
real intercept_mean;
array [nGroups] real intercepts; // This syntax is compatible only from Stan 2.29!
real env;
// Random effect
real<lower = 0> randEff_var;
}
model {
// Declare variables
vector [nobs] lambda;
for (i in 1:nGroups)
lambda[indices[i]:(indices[i + 1] - 1)] = exp(intercepts[i] + env*Environment1[indices[i]:(indices[i + 1] - 1)]);
// Priors
target += normal_lpdf(intercept_mean | 0, 1e4);
target += normal_lpdf(intercepts | intercept_mean, randEff_var);
target += normal_lpdf(env | 0, 1e4);
target += gamma_lpdf(randEff_var | 1.0/1000, 1.0/1000); // Give an average of 1, and a variance of 1000
// Likelihood
target += poisson_lpmf(observedResponse | lambda);
}
generated quantities {
array [nobs] int<lower = 0> observedResponseSim;
vector<lower = 0> [nobs] lambda;
for (i in 1:nGroups)
{
observedResponseSim[indices[i]:(indices[i + 1] - 1)] = poisson_rng(exp(intercepts[i] + env*Environment1[indices[i]:(indices[i + 1] - 1)]));
lambda[indices[i]:(indices[i + 1] - 1)] = exp(intercepts[i] + env*Environment1[indices[i]:(indices[i + 1] - 1)]);
}
}
Example from Amael Le Squin for residual checks with STAN - to be integrated with the help
florian_eg.stan
florian_eg_randEff.stan