DistanceDevelopment / dsims

New simulation R library
1 stars 2 forks source link

Inconsistent truncation distances across design, detectability and analyses causing strange results #76

Open erex opened 1 year ago

erex commented 1 year ago

I've been using a simulation in the Intro workshop to demonstrate how adding group size as a covariate can resolve the size bias issue. Formerly I used DSsim, showing the desired effect of group size in the detection function to produce unbiased abundance estimates. Recently I tried to do the same using dsims. This simulation produces results with a ~13% bias.

Perhaps I am not duplicating the simulation as I think I am. Code provided below using both packages: @LHMarshall

# simulation with DSsim
library(extraDistr)
library(DSsim)
library(Distance)
eg.region <- make.region()
covariate.list <- list()
covariate.list$size <- list(list("ztruncpois", list(mean = 10)))
pop.desc <- make.population.description(covariates = covariate.list, N=200)
cov.params <- list(size = c(0.10))
detect <- make.detectability(scale.param = 10, 
                             cov.param = cov.params, 
                             truncation = 80)
plot(detect, pop.desc)
my.pop <- generate.population(pop.desc, detect, eg.region)
transects <- make.design()
simulation <- make.simulation()
# thelines <- generate.transects(simulation)
size.cov <- make.simulation(reps=150, region=eg.region, design=transects, 
                            pop=pop.desc, det=detect,
                            ddf.analyses.list=make.ddf.analysis.list(dsmodel=list(~cds(key="hn",
                                                                                       formula=~size)),truncation=80))
size.cov.sim <- run(size.cov, run.parallel = TRUE)
hist(size.cov.sim@results$expected.size[1,1,1:150], main="Computed average group size\nGroup size covariate", xlab="Group size")
abline(v=unname(covariate.list$size[[1]][[2]][1]), lwd=2, lty=3)
indiv <- size.cov.sim@results$individuals$N[1,1:6,151]
# average over simulations should be unbiased
indiv[1]

#
#   same simulation with dsims
#

detach("package:Distance", unload = TRUE)
detach("package:DSsim", unload = TRUE)
detach("package:extraDistr", unload = TRUE)

# =====================================

library(dsims)
library(Distance)
library(extraDistr)
eg.region <- make.region()
covariate.list <- list()
covariate.list$size <- list(list(distribution="ztruncpois", 
                                 mean = 10))
pop.desc <- make.population.description(region = eg.region,
                                        density = make.density(region=eg.region),
                                        covariates = covariate.list, 
                                        N=200)
cov.params <- list(size = c(0.10))
detect <- make.detectability(scale.param = 10, 
                             cov.param = cov.params, 
                             truncation = 80)
plot(detect, pop.desc)
my.pop <- generate.population(pop.desc, detect, eg.region)
transects <- make.design()
simulation <- make.simulation()
detmodel.size <- make.ds.analysis(dfmodel = list(~size),
                                  key = "hn",
                                  criteria = "AIC",
                                  truncation=80)
size.cov <- make.simulation(reps=150, design=transects, 
                            pop=pop.desc, det=detect,
                            ds.analysis = detmodel.size)
size.cov.survey <- run.survey(size.cov)
runsim.cov <- run.simulation(size.cov, run.parallel = TRUE)
hist(runsim.cov@results$expected.size[1,1,1:150], 
     main="Computed average group size\nGroup size covariate", 
     xlab="Group size", xlim=c(8,12))
abline(v=unname(covariate.list$size[[1]][[2]][1]), lwd=2, lty=3)
indiv <- runsim.cov@results$individuals$N[1,1:6,151]
# average over simulations should be unbiased (but is not)
indiv[1]
erex commented 1 year ago

Modification to code above, slightly prettier, large number of replicates, histogram of results. Same result however, DSsim results show no bias, dsims result shows ~13% positive bias.

# simulation with DSsim
library(extraDistr)
library(DSsim)
library(Distance)

nsims <- 1000

eg.region <- make.region()
covariate.list <- list()
covariate.list$size <- list(list("ztruncpois", list(mean = 10)))
pop.desc <- make.population.description(covariates = covariate.list, N=200)
cov.params <- list(size = c(0.10))
detect <- make.detectability(scale.param = 10, 
                             cov.param = cov.params, 
                             truncation = 80)
plot(detect, pop.desc)
my.pop <- generate.population(pop.desc, detect, eg.region)
transects <- make.design()
simulation <- make.simulation()
# thelines <- generate.transects(simulation)
size.cov <- make.simulation(reps=nsims, region=eg.region, design=transects, 
                            pop=pop.desc, det=detect,
                            ddf.analyses.list=make.ddf.analysis.list(dsmodel=list(~cds(key="hn",
                                                                                       formula=~size)),truncation=80))
size.cov.sim <- run(size.cov, run.parallel = TRUE)
# hist(size.cov.sim@results$expected.size[1,1,1:nsims], 
#      main="Computed average group size\nGroup size covariate", 
#      xlab="Group size")
# abline(v=unname(covariate.list$size[[1]][[2]][1]), lwd=2, lty=3)
indiv <- size.cov.sim@results$individuals$N[1,1:6,nsims+1] # just the mean
# average over simulations should be unbiased
indiv[1]

#
#   same simulation with dsims
#

detach("package:Distance", unload = TRUE)
detach("package:DSsim", unload = TRUE)
detach("package:extraDistr", unload = TRUE)

# =====================================

library(dsims)
library(Distance)
library(extraDistr)
eg.region <- make.region()
covariate.list <- list()
covariate.list$size <- list(list(distribution="ztruncpois", 
                                 mean = 10))
pop.desc <- make.population.description(region = eg.region,
                                        density = make.density(region=eg.region),
                                        covariates = covariate.list, 
                                        N=200)
cov.params <- list(size = c(0.10))
detect <- make.detectability(scale.param = 10, 
                             cov.param = cov.params, 
                             truncation = 80)
plot(detect, pop.desc)
my.pop <- generate.population(pop.desc, detect, eg.region)
transects <- make.design()
simulation <- make.simulation()
detmodel.size <- make.ds.analysis(dfmodel = list(~size),
                                  key = "hn",
                                  criteria = "AIC",
                                  truncation=80)
size.cov <- make.simulation(reps=nsims, design=transects, 
                            pop=pop.desc, det=detect,
                            ds.analysis = detmodel.size)
size.cov.survey <- run.survey(size.cov)
runsim.cov <- run.simulation(size.cov, run.parallel = TRUE)
# hist(runsim.cov@results$expected.size[1,1,1:nsims], 
#      main="Computed average group size\nGroup size covariate", 
#      xlab="Group size", xlim=c(8,12))
# abline(v=unname(covariate.list$size[[1]][[2]][1]), lwd=2, lty=3)
indiv <- runsim.cov@results$individuals$N[1,1:6,nsims+1] # just the mean
# average over simulations should be unbiased (but is not)
indiv[1]

# histogram of the overlap of the replicate estimates
dssim <- size.cov.sim@results$individuals$N[1,1,1:nsims]
dsims <- runsim.cov@results$individuals$N[1,1,1:nsims]

myblue <- rgb(0, 0, 255, max = 255, alpha = 125, names = "blue50")
myyellow <- rgb(255, 0, 0, max = 255, alpha = 125, names = "yellow50")

lower <- min(c(dssim, dsims), na.rm=TRUE)
upper <- max(c(dssim, dsims), na.rm=TRUE)
nice <- pretty(lower:upper, n=70)
hist(dssim, breaks=nice, col=myyellow, 
     main="Simulated abundance estimates",
     xlab="Abundance")
hist(dsims, breaks=nice, add=TRUE, col=myblue)
abline(v=mean(dssim, na.rm=TRUE), lwd=3, lty=3, col="salmon")
abline(v=mean(dsims, na.rm=TRUE), lwd=3, lty=3, col="blue")
LHMarshall commented 1 year ago

@erex This looks to be a truncation distance issue. Although the truncation is set to 80 in both the detectability and the analysis there is no truncation specified in the design and so the default value of 50 is used. There should be a warning when the simulation is created about this warning the user that the design truncation is different to the detectability and analysis - so that needs fixing.

Changing the design truncation to 80 all but eliminates the bias. Also note that changing the analysis truncation to 50 to match the default design truncation reduces the bias to only around 2%.

LHMarshall commented 1 year ago

A further strange example with inconsistent truncation distances. Here even though the available animals are clipped to the covered area... the covered area is the whole area due to the truncation specified in the design being half the spacing. As truncation is not specified in the analyses then detections happen at greater distances than the truncation specified in design and detectability.

library(dsims)

outer <- matrix(c(0,0,15,0,15,10,0,10,0,0),ncol=2, byrow=TRUE)
pol1 <- sf::st_polygon(list(outer))
pol2 <- sf::st_polygon(list(outer + 15))
pol3 <- sf::st_polygon(list(outer + 30))
sfc <- sf::st_sfc(pol1,pol2,pol3)
strata.names <- c("SW", "central", "NE")
mp1 <- sf::st_sf(strata = strata.names, geom = sfc)

region <- make.region(region.name = "study.area", 
                      strata.name = strata.names, 
                      shape = mp1)
plot(region)

density <- make.density(region = region,
                        x.space = 0.25,
                        constant = rep(1,3))

covs <- list()
covs$size <- list(list(distribution = "ztruncpois", mean = 25),
                  list(distribution = "ztruncpois", mean = 12),
                  list(distribution = "ztruncpois", mean = 3))
popdesc <- make.population.description(region = region,
                                       density = density,
                                       covariates = covs,
                                       N = rep(100,3),
                                       fixed.N = TRUE)

design <- make.design(region,
                      spacing = 1,
                      truncation = 0.5)

detect <- make.detectability(scale.param = 0.5,
                             truncation = 0.5)

sim <- make.simulation(reps = 10,
                       design = design,
                       population.description = popdesc,
                       detectability = detect)

survey <- run.survey(sim)
plot(survey, region)
image
LHMarshall commented 1 year ago

Actually truncation argument not being implemented at all!

analyses <- make.ds.analysis(truncation = 0.5)

sim <- make.simulation(reps = 10,
                       design = design,
                       population.description = popdesc,
                       detectability = detect,
                       ds.analysis = analyses)

survey <- run.survey(sim)
plot(survey, region)
image