Generating item parameters from prior literature

Hi Dr. Chalmers!

I was wondering if it would be possible to add functionality to the simdata function to generate item parameters from prior IRT simulation studies. I've done this for the (M/U)GRM, (M/U)GPCM, and (M/U)GGUM in both the traditional parametrization and slope/intercept form and thought it might be useful in the examples for this function. Apologies if this isn't the best venue for this feature request. If there's any written mirt style guide, I can modify the code below and submit a PR.

Hope your semester is wrapping up well!

-Ben

# MGRM
# https://www.frontiersin.org/articles/10.3389/fpsyg.2016.00109/full
# https://www.tandfonline.com/doi/pdf/10.1080/00273171.2018.1455572?casa_token=LoPkmRJy-HEAAAAA:pLgXKDsTk9n2EqWiX1OSLiA1OclWtaDp9rNF16_DMDQZj6CfHR9h4xbgLmiOk-rBZysX1kbdBxag
# https://journals.sagepub.com/doi/pdf/10.1177/0013164420958060?casa_token=4vKSZeV4CpAAAAAA:4WICdv7fvp7osyTYwBN4J3csparhEYaX7RdDyxr_mtAqVjQFSVb_aCl-Q-vhrg-soP_CZOPXZNC5

generate_grm_item_params = function(n_dim, n_item, n_response_options){

  a_matrix = matrix(0, nrow = n_item, ncol = n_dim)

  generate_matrix = function(n_item, n_dim){split(seq(1,n_item,1), ceiling(seq_along(seq(1,n_item,1))/(n_item / n_dim)))}

  for(i in 1:n_dim){
    a_matrix[generate_matrix(n_item = n_item, n_dim = n_dim)[[i]],i] = NA
  }

  # The three-dimensional graded response model with four response categories for all items was used in this study. The ajhs were randomly sampled from U[1.1, 2.8]

  a_matrix[is.na(a_matrix)] = runif(n = n_item, min = 1.1, max = 2.8)

  # A simple structure was assumed so that for each item only one of the three discrimination parameters was non-zero, and every dimension was represented by an equal number of items. bjks ranged from [−2, 2] (De Ayala, 1994), and each was uniformly distributed along an equidistant interval within this range for each item. Thus, the three boundary parameters were sampled randomly from U[−2, −0.67], U[−0.67, 0.67], and U[0.67, 2], respectively.

  b_range = seq(-2, 2, length.out = n_response_options)

  b_matrix = matrix(NA, nrow = n_item, ncol = (n_response_options-1))

  if(n_response_options == 2){
    for(i in 1:(n_response_options-1)){
      b_matrix[,i] = runif(n = n_item, min = b_range[i], max = b_range[i+1])
    }
  } else{
    repeat{

      for(i in 1:(n_response_options-1)){

        b_matrix[,i] = runif(n = n_item, min = b_range[i], max = b_range[i+1])

      }

      # To avoid sparseness of the response matrix, only adjacent boundary parameters with distance of at least 0.5 apart were retained.

      if(all(rowSums(abs(b_matrix)) > 1)){
        break
      }
    }
  }

  # Convert to intercepts, for GRM d_i = -a_i * b_i, use mirt2traditional() to double check this.

  d_matrix = - a_matrix[a_matrix!=0]*b_matrix

  # Inter-factor correlation set to 0.5

  rho_matrix = matrix(ncol = n_dim, nrow = n_dim, data = 0)

  rho_matrix = ifelse(row(rho_matrix)==col(rho_matrix),1,0.5)

  return(
    list(
      a = a_matrix,
      d = d_matrix,
      b = b_matrix,
      sigma = rho_matrix
    )
  )
}

# MGGUM
# Wang and Wu (2016)
# Roberts, Donoghue, and Laughlin (2002)
# Nye et al. (2020)

# Item parameters were drawn from the literature, quotes from the Wang and Wu paper are inserted in their respective item parameter sections.

generate_ggum_item_params = function(n_dim, n_item, n_response_options){

  a_matrix = matrix(0, nrow = n_item, ncol = n_dim)

  generate_matrix = function(n_item, n_dim){split(seq(1,n_item,1), ceiling(seq_along(seq(1,n_item,1))/(n_item / n_dim)))}

  for(i in 1:n_dim){
    a_matrix[generate_matrix(n_item = n_item, n_dim = n_dim)[[i]],i] = NA
  }

  # True item discrimination parameters(αi) were generated from a uniform distribution, which spanned the interval of (.5, 2)

  a_matrix[is.na(a_matrix)] = runif(n = n_item, min = 0.5, max = 2)

  b_matrix = matrix(0, nrow = n_item, ncol = n_dim)

  generate_matrix = function(n_item, n_dim){split(seq(1,n_item,1), ceiling(seq_along(seq(1,n_item,1))/(n_item / n_dim)))}

  for(i in 1:n_dim){
    b_matrix[generate_matrix(n_item = n_item, n_dim = n_dim)[[i]],i] = NA
  }

  # Items (δi) were always located at equally distant positions on the latent continuum and always ranged from −2.0 to 2.0

  b_matrix[is.na(b_matrix)] = matrix(data = runif(n = n_item, min = -2, max = 2.0), ncol = n_dim)

  # The threshold parameters (τik) were generated independently for each item. For a given item, the true τiC parameter was generated from a uniform (−1.4, −.4) distribution. Successive true τik parameters were then generated with the following recursive equation:
  # τik−1 = τik − .25 + eik−1, for k = 2, 3, ... ,C,
  # where eik−1 denotes a random error term generated from a N(0, .04) distribution. The τik parameters derived with this formula were not consistently ordered across the continuum within an item.

  if(n_response_options == 2){
    t_matrix = matrix(data = runif(n = n_item, min = -1.4, max = -.4), ncol = 1)
    t_matrix = -t_matrix
  }

  else{
    generate_t_matrix = function(n_item, n_response_options){

      t_vector = list()

      t_vector[[1]] = runif(n = 1, min = -1.4, max = -.4)

      out = replicate(n = n_item, ({

        t_vector = list()

        t_vector[[1]] = runif(n = 1, min = -1.4, max = -.4)

        e_vector = list()

        for(i in 2:(n_response_options-1)){

          e = rnorm(n = 1, mean = 0, sd = 0.04)

          t_vector[[i]] = t_vector[[i-1]] - (0.25*e)

          e_vector[[i-1]] = e

        }

        list(unlist(t_vector), unlist(e_vector))

      })
      )

      return(t(out))

    }

    # Examine the t_e_matrix to ensure the recursive approach is being done correctly, its not presented by default.

    t_e_matrix = generate_t_matrix(n_item = n_item, n_response_options = n_response_options)

    t_matrix = do.call(rbind, t_e_matrix[,1])

    t_matrix = -t_matrix
  }

  rho_matrix = matrix(ncol = n_dim, nrow = n_dim, data = 0)

  rho_matrix = ifelse(row(rho_matrix)==col(rho_matrix),1,0.5)

  return(  
    list(
      a = a_matrix,
      d = b_matrix,
      sigma = rho_matrix,
      t = t_matrix
    )
  )
}

# MGPCM
# https://journals.sagepub.com/doi/pdf/10.1177/0146621608327800?casa_token=I4qGT8cn2IoAAAAA:XJK6x1AM1-zoQj7f_eogNcK7jAqYZGDMgdXqTV9mQRsz_slu6YnqlqBFwsrlylkVDFfJHiEkNcho
# Matlock 2018: https://journals.sagepub.com/doi/pdf/10.1177/0013164416670211?casa_token=c3T0ITbryd4AAAAA:ZwsWwuUtwabhwq9ChflizBuZmIObQCZdIFhXL2jVq7dlVzZzRRu_M52w8r1KQl0zBEbF2L9DC_fQ 

generate_gpcm_item_params = function(n_dim, n_item, n_response_options){

  a_matrix = matrix(0, nrow = n_item, ncol = n_dim)

  generate_matrix = function(n_item, n_dim){split(seq(1,n_item,1), ceiling(seq_along(seq(1,n_item,1))/(n_item / n_dim)))}

  for(i in 1:n_dim){
    a_matrix[generate_matrix(n_item = n_item, n_dim = n_dim)[[i]],i] = NA
  }

  # Discrimination parameters for the GPCM and GRM were randomly sampled from a lognormal (0, 0.5^2)

  a_matrix[is.na(a_matrix)] = rlnorm(n = n_item, meanlog = 0, sdlog = 0.5^2)

  # For five-category items, four location parameters of each item were randomly drawn from normal distributions with standard deviations of 1.0 and means of –1.5, –0.5, 0.5, and 1.5, respectively.

  b_range = seq(-1.5, 1.5, length.out = n_response_options-1)

  b_matrix = matrix(NA, nrow = n_item, ncol = n_response_options-1)

  if(n_response_options == 2){
    for(i in 1:(n_response_options-1)){
      b_matrix[,i] = rnorm(n = n_item, mean = 0)
    }
  } else{

    repeat{

      for(i in 1:length(b_range)){

        b_matrix[,i] = rnorm(n = n_item, mean = b_range[i])

      }

      if(all(rowSums(abs(b_matrix)) > 1)){
        break
      }
    }
  }

  # Convert to intercepts
  # The relationship between the slope and intercept values with the IRT parameterized values from the GPCM is somewhat straightforward, where a = a and d =  - a * Sigma_(v=1)^k (b_k). 
  # Reminder to myself: \delta-bik is person location - category location. Category location is broken down in item location and category threshold. (Bi - tauik)

  if(n_response_options == 2){
    d_matrix = cbind(rep(0, length(n_item)), - a_matrix[a_matrix!=0] * b_matrix)
  } else{
    d_matrix = cbind(rep(0, length(n_item)), - a_matrix[a_matrix!=0] * t(apply(b_matrix, 1, cumsum)))
  }

  # Inter-factor correlation

  rho_matrix = matrix(ncol = n_dim, nrow = n_dim, data = 0)

  rho_matrix = ifelse(row(rho_matrix)==col(rho_matrix),1,0.5)

  return(  
    list(
      a = a_matrix,
      d = d_matrix,
      b = b_matrix,
      sigma = rho_matrix
    )
  )
}

philchalmers / mirt

Generating item parameters from prior literature #215