DistanceDevelopment / mrds

R package for mark-recapture-distance-sampling analysis
GNU General Public License v3.0
4 stars 4 forks source link

Factor covariate ordering not checked when passed to MCDS #83

Closed LHMarshall closed 1 year ago

LHMarshall commented 1 year ago

Sometimes factor parameters are estimated in a different order in MCDS than in R this leads to incorrect parameter estimates being passed to mrds to create the final model. Reproducible example below:

# Simulate some distance data

# Generate random x values from a uniform
x <- runif(450, 0, 600)
# Generate some factor values
tip.colour <- c(rep("red",150), rep("blue",150), rep("yellow", 150))

sigma <- rep(NA,600)
sigma <- ifelse(tip.colour == "red", 150, sigma)
sigma <- ifelse(tip.colour == "blue", 250, sigma)
sigma <- ifelse(tip.colour == "yellow", 400, sigma)

p.detected <- exp(-x^2/(2*sigma^2))
plot(x, p.detected, col=tip.colour)

detected <- rbinom(length(p.detected), 1, p.detected)
index <- which(detected == 1)
n.obs <- sum(detected)

dist.data <- data.frame(Region.Label = "study_ar",
                        Area = 100,
                        Sample.Label = sample(1:10, n.obs, replace = TRUE),
                        Effort = 10,
                        object = 1:n.obs,
                        distance = x[index],
                        tip.colour = as.factor(tip.colour[index])) 

order.by.sampler <- order(dist.data$Sample.Label)
dist.data <- dist.data[order.by.sampler,]
#Re do object IDs
dist.data$object <- 1:n.obs

head(dist.data)

summary(dist.data)
attributes(dist.data$tip.colour)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "blue")
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "MCDS")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  3100.648 
# Optimisation:  MCDS.exe 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)       6.0650017 0.2579102
# tip.colourred    -0.6103258 0.3110606
# tip.colouryellow -0.9536875 0.2587018
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "red")
attributes(dist.data$tip.colour)
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "MCDS")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  3010.845 
# Optimisation:  MCDS.exe 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)       6.0650017 0.3664951
# tip.colourblue   -0.9536875 0.3689361
# tip.colouryellow -0.6103258 0.3690810
plot(sim.fit)

# Double check expected out comes with R

dist.data$tip.colour <- relevel(dist.data$tip.colour, "blue")
attributes(dist.data$tip.colour)
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "R")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2926.819 
# Optimisation:  mrds (nlminb) 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate         se
# (Intercept)       5.4545689 0.08871495
# tip.colourred    -0.3432753 0.13354674
# tip.colouryellow  0.6106041 0.18131084
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "red")
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "R")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2926.819 
# Optimisation:  mrds (nlminb) 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)      5.1112936 0.0998218
# tip.colourblue   0.3432753 0.1335468
# tip.colouryellow 0.9538793 0.1869965
plot(sim.fit)

# Plots should be identical, parameter estimates should be equivalent.
LHMarshall commented 1 year ago
# Simulate some distance data
set.seed(527)

# Generate random x values from a uniform
x <- runif(450, 0, 600)
# Generate some factor values
tip.colour <- c(rep("red",150), rep("blue",150), rep("yellow", 150))

sigma <- rep(NA,600)
sigma <- ifelse(tip.colour == "red", 150, sigma)
sigma <- ifelse(tip.colour == "blue", 250, sigma)
sigma <- ifelse(tip.colour == "yellow", 400, sigma)

p.detected <- exp(-x^2/(2*sigma^2))
plot(x, p.detected, col=tip.colour)

detected <- rbinom(length(p.detected), 1, p.detected)
index <- which(detected == 1)
n.obs <- sum(detected)

dist.data <- data.frame(Region.Label = "study_ar",
                        Area = 100,
                        Sample.Label = sample(1:10, n.obs, replace = TRUE),
                        Effort = 10,
                        object = 1:n.obs,
                        distance = x[index],
                        tip.colour = as.factor(tip.colour[index])) 

order.by.sampler <- order(dist.data$Sample.Label)
dist.data <- dist.data[order.by.sampler,]
#Re do object IDs
dist.data$object <- 1:n.obs

head(dist.data)

summary(dist.data)
attributes(dist.data$tip.colour)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "blue")
labels <- levels(dist.data$tip.colour)
if(length(labels) > 1){
  labels <- c(labels[2:length(labels)], labels[1])
}
cat(paste("FACTOR /NAME=tip.colour /LEVELS=", length(labels), " /LABELS=",
          paste(labels, collapse = ", "), ";", sep=""))
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "MCDS")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  MCDS.exe 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)       5.5511692 0.1002956
# tip.colourred    -0.5490796 0.1469813
# tip.colouryellow  0.3095405 0.1644351
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "red")
labels <- levels(dist.data$tip.colour)
if(length(labels) > 1){
  labels <- c(labels[2:length(labels)], labels[1])
}
cat(paste("FACTOR /NAME=tip.colour /LEVELS=", length(labels), " /LABELS=",
          paste(labels, collapse = ", "), ";", sep=""))
attributes(dist.data$tip.colour)
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "MCDS")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  MCDS.exe 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)      5.0020895 0.1074444
# tip.colourblue   0.5490795 0.1469813
# tip.colouryellow 0.8586200 0.1688904
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "yellow")
labels <- levels(dist.data$tip.colour)
if(length(labels) > 1){
  labels <- c(labels[2:length(labels)], labels[1])
}
cat(paste("FACTOR /NAME=tip.colour /LEVELS=", length(labels), " /LABELS=",
          paste(labels, collapse = ", "), ";", sep=""))
attributes(dist.data$tip.colour)
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "MCDS")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  MCDS.exe 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)     5.8607096 0.1303061
# tip.colourred  -0.8586199 0.1688905
# tip.colourblue -0.3095405 0.1644350
plot(sim.fit)

# Double check expected out comes with R

dist.data$tip.colour <- relevel(dist.data$tip.colour, "blue")
attributes(dist.data$tip.colour)
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "R")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  mrds (nlminb) 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)       5.5510488 0.1002714
# tip.colouryellow  0.3094895 0.1643849
# tip.colourred    -0.5489678 0.1469635
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "red")
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "R")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  mrds (nlminb) 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)      5.0020812 0.1074426
# tip.colourblue   0.5489672 0.1469634
# tip.colouryellow 0.8584573 0.1688549
plot(sim.fit)

dist.data$tip.colour <- relevel(dist.data$tip.colour, "yellow")
sim.fit <- ds(dist.data, 
              transect="line", 
              key="hn", 
              formula = ~tip.colour,
              truncation=600,
              optimizer = "R")

summary(sim.fit)
# Model       : Half-normal key function 
# AIC         :  2952.413 
# Optimisation:  mrds (nlminb) 
# 
# Detection function parameters
# Scale coefficient(s):  
#   estimate        se
# (Intercept)     5.8605382 0.1302615
# tip.colourred  -0.8584572 0.1688548
# tip.colourblue -0.3094894 0.1643850
plot(sim.fit)

# Plots should be identical, parameter estimates should be equivalent.