Below is some code to simulate the Dawid-Skene model and the basic parameters I use in my simulations.
Best,
Jeffrey
library(rater)
#' Simulate incomplete data from the Dawid-Skene model.
#'
#' @param I The number of ratings.
#' @param J The number of raters.
#' @param K The number of categories.
#' @param n_ratings The total number of ratings in the simulated data.
#' @param pi The pi parameter
#' @param theta The theta parameter
#'
#' @return A list with two components:
#' * z: The latent class of each item;
#' * data: The simulated data in a long format.
#'
simulate_ds_incomplete <- function(I, J, K,
n_ratings,
pi, theta) {
# We need there to be at least one rating per item.
stopifnot(n_ratings > I)
n_extra_ratings <- n_ratings - I
z <- sample(1:K, size = I, prob = pi, replace = TRUE)
p <- rep(1 / I, I)
n_extra_ratings_per_item <- rmultinom(1, n_extra_ratings, p)
# Add in the minimal number of ratings which are distributed perfectly
# uniformly.
n_ratings_per_item <- n_extra_ratings_per_item + 1
# NB: This is in ragged list form
rater_indexes <- lapply(n_ratings_per_item, function(x) {
# We currently sample from the raters uniformly - this could be extended.
sample(1:J, size = x, replace = TRUE)
})
# Use rater_indexes as a placeholder.
item_indexes <- rep(1:I, lengths(rater_indexes))
out <- cbind(item_indexes, unlist(rater_indexes), 0)
for (i in 1:nrow(out)) {
out[i, 3] <- sample(1:K, 1, prob = theta[out[i, 2], z[[out[i, 1]]], ])
}
colnames(out) <- c("item", "rater", "rating")
list(
z = z,
data = out
)
}
#' Build the theta parameter of the Dawid-Skene model
#'
#' @param on_diag The on diagonal entries of each matrix
#' @param J The number of raters (number of matrices)
#' @param K The number of categories (the dimensions of each matrix)
#'
#' @return A c(J, K, K) array; the theta parameter
#'
build_theta <- function(on_diag, J, K) {
stopifnot(length(on_diag) == 1)
stopifnot(is.numeric(on_diag))
theta <- array(0, dim = c(J, K, K))
for (j in 1:J) {
theta[j, ,] <- array((1 - on_diag) / (K - 1))
for (k in 1:K) {
theta[j, k, k] <- on_diag
}
}
theta
}
I <- 100
K <- 5
J <- 5
n_ratings <- 500
pi <- rep(1 / K, K)
on_diag <- 0.7
theta <- build_theta(0.7, J, K)
sim <- simulate_ds_incomplete(I, J, K, n_ratings, pi, theta)
out <- rater(sim$data, "dawid_skene")
Hi Kate,
Below is some code to simulate the Dawid-Skene model and the basic parameters I use in my simulations.
Best, Jeffrey