tmatta / lsasim

Simulate large scale assessment data
6 stars 5 forks source link

means & correlation matrix #17

Closed wleoncio closed 3 years ago

wleoncio commented 3 years ago

0. Setup

I've tested most values below. Not all testings are shown in this report. I only included the testings that are showing errors/warnings or inconsistent results.

cluster_gen_2 <- function(...) {
  cluster_gen(..., verbose = FALSE, calc_weights = FALSE)
}
set.seed(12334)
n1 <- c(3, 6)
n2 <- c(groups = 4, people = 2)
n3 <- c(school = 3, class = 2, student = 5)
n4 <- c(20, 50)
n5 <- list(school = 3, class = c(2, 1, 3), student = c(20, 20, 10, 30, 30, 30))
n5a <- list(school = 3, class = c(2, 3, 3), student = c(20, 20, 10, 30, 30, 30))
n6 <- list(school = 3, class = c(2, 1, 3), student = ranges(10, 50))
n6a <- list(school = 3, class = c(2, 3, 3), student = ranges(10, 50))
n7 <- list(school = 10, student = ranges(10, 50))
n8 <- list(school = 3, student = c(20, 20, 10))
n8a <- list(school = 3, class = c(2, 2, 2),student = c(20, 20, 10))
n8b <- list(school = 3, class = c(2, 3, 3),student = c(20, 20, 10, 5))
n8c <- list(school = 3, class = c(2, 1, 3),student = c(20, 20, 10))
n9 <- list(school = 10, class = c(2,1,3,1,1,1,2,1,2,1), student = ranges(10, 50))
n10 <- list(country = 2, school = 10, class = c(2,1,3,1,1,1,2,1,2,1), student = ranges(10, 50))
n11 <- list(culture = 2, country = 2, school = 10, class = c(2,1,3,1,1,1,2,1,2,1), student = ranges(10, 50))
n12 <- list(culture = 2, country = 2, district = 3, school = 10, class = c(2,1,3,1,1,1,2,1,2,1), student = ranges(10, 50))
N1 <- c(100, 20)

7. means & correlation matrix

Error and warning messages

Need to provide additional instructions about how to debug the errors.

#needs to specify n_X, n_W!!
set.seed(12334)

#n1 <- c(3, 6)
#n7 <- list(school = 10, student = ranges(10, 50))
m1 <- matrix(c(1, 0.2, 0.3, 0.4,
               0.2, 1, 0.5, 0.7,
               0.3, 0.5, 1, 0.8,
               0.4, 0.7, 0.8, 1), 4, 4)
mc1 <- cluster_gen_2(n1, c_mean = c(0.1, 0.5, 0.001, 234), cor_matrix = m1)
## Error: length(c_mean) cannot be larger than n_X + theta
mc4 <- cluster_gen_2(n7, c_mean = c(0.1, 0.5, 0.001, 234), cor_matrix = m1)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

mc1_1 <- cluster_gen_2(n1, n_X=4, c_mean = c(0.1, 0.5, 0.001, 234), cor_matrix = m1)
## Error: n_X + n_W + theta must not be different from ncol(cor_matrix). The former add up to 15, whereas the latter equals 4
#Error: n_X + n_W + theta must not be different from ncol(cor_matrix). The former add up to 9, whereas the latter equals 4

set.seed(12334)
mc7 <- cluster_gen_2(n7, n_X=4, n_W=0, c_mean = c(0.1, 0.5, 0.001, 234), cor_matrix = m1)
summarize_clusters(mc1_2)
## Error in lapply(X = X, FUN = FUN, ...): object 'mc1_2' not found
set.seed(12334)
mc7_1 <- cluster_gen_2(n7, n_X=2, n_W=2, c_mean = c(0.5, 0.001), cor_matrix = m1)
summarize_clusters(mc7_1) #mean: q2 0.01560; cor_matrix: q1&q4, 0.2063252; q2&q3, 0.3781356;
## Summary statistics for all schools
##        q1                q2           q3        q4
##  Min.   :-3.8964   Min.   :-4.27695   1:10583   1:10232
##  Mean   : 0.5019   Mean   : 0.01560   2: 5652   2: 5652
##  Max.   : 4.7033   Max.   : 3.70541   3: 5253   3: 4669
##                                       4: 3981   4: 4643
##  Stddev.: 1        Stddev.: 1         5: 1263   5: 1536
##
##                                       Prop.     Prop.
##                                       1:0.396   1:0.383
##                                       2:0.211   2:0.211
##                                       3:0.197   3:0.175
##                                       4:0.149   4:0.174
##                                       5:0.047   5:0.057
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2        q3        q4
## q1 1.0000000 0.2063252 0.2362506 0.2784801
## q2 0.2063252 1.0000000 0.3781356 0.4940810
## q3 0.2362506 0.3781356 1.0000000 0.4311451
## q4 0.2784801 0.4940810 0.4311451 1.0000000
                          #                              q2&q4, 0.4940810; q3&q4, 0.4311451

mc7_2 <- cluster_gen_2(n7, n_X=3, n_W=1, c_mean = c(0.5, 0.001, 234), cor_matrix = m1)
summarize_clusters(mc7_2) #cor_matrix: q1&q4, 0.3291191; q3&q4, 0.6709309
## Summary statistics for all schools
##        q1                q2                  q3        q4
##  Min.   :-3.5208   Min.   :-4.084603   Min.   :230.2   1:6145
##  Mean   : 0.4973   Mean   : 0.000749   Mean   :234.0   2:4767
##  Max.   : 4.6430   Max.   : 4.319822   Max.   :238.1   3:4574
##                                                        4:3567
##  Stddev.: 1.01     Stddev.: 1          Stddev.: 1      5:1592
##
##                                                        Prop.
##                                                        1:0.298
##                                                        2:0.231
##                                                        3:0.222
##                                                        4:0.173
##                                                        5:0.077
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2        q3        q4
## q1 1.0000000 0.2059226 0.2955722 0.3291191
## q2 0.2059226 1.0000000 0.5129932 0.5895139
## q3 0.2955722 0.5129932 1.0000000 0.6709309
## q4 0.3291191 0.5895139 0.6709309 1.0000000
set.seed(12334)
m2 <- matrix(c(1, 0.5, 0.6,
               0.5, 1, 0.9,
               0.6, 0.9, 1), 3, 3)
mc2 <- cluster_gen_2(n7, c_mean = c(55, 2.34, 5001), cor_matrix = m2)
## Error: length(c_mean) cannot be larger than n_X + theta
summarize_clusters(mc2)
## Error in lapply(X = X, FUN = FUN, ...): object 'mc2' not found
#Error: length(c_mean) cannot be larger than n_X + theta

set.seed(12334)
mc3 <- cluster_gen_2(n7, c_mean = c(55, 2.34, 5001), cor_matrix = m2)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

set.seed(12334)
mc3_1 <- cluster_gen_2(n7, n_X=3, n_W=0, c_mean = c(55, 2.34, 5001), cor_matrix = m2)
summarize_clusters(mc3_1)
## Summary statistics for all schools
##        q1              q2               q3
##  Min.   :50.81   Min.   :-2.014   Min.   :4996
##  Mean   :54.99   Mean   : 2.336   Mean   :5001
##  Max.   :58.95   Max.   : 6.083   Max.   :5005
##
##  Stddev.: 1.01   Stddev.: 0.99    Stddev.: 1
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2        q3
## q1 1.0000000 0.5077625 0.6110242
## q2 0.5077625 1.0000000 0.8984665
## q3 0.6110242 0.8984665 1.0000000
set.seed(12334)
mc3_2 <- cluster_gen_2(n7, n_X=2, n_W=1, c_mean = c(55, 2.34), cor_matrix = m2)
summarize_clusters(mc3_2)
## Summary statistics for all schools
##        q1              q2         q3
##  Min.   :50.81   Min.   :-2.014   1:8290
##  Mean   :54.99   Mean   : 2.335   2:8959
##  Max.   :58.95   Max.   : 6.467   3:4911
##                                   4:2429
##  Stddev.: 1.01   Stddev.: 0.99    5:2143
##
##                                   Prop.
##                                   1:0.31
##                                   2:0.335
##                                   3:0.184
##                                   4:0.091
##                                   5:0.08
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2        q3
## q1 1.0000000 0.5059057 0.5056551
## q2 0.5059057 1.0000000 0.7381464
## q3 0.5056551 0.7381464 1.0000000
#          q1        q2        q3
#q1 1.0000000 0.5059057 0.5056551
#q2 0.5059057 1.0000000 0.7381464
#q3 0.5056551 0.7381464 1.0000000

m3 <- matrix(c(1, 0.55, 0.77,
               0.55, 1, 0.33,
               0.77, 0.33, 1), 3, 3)
set.seed(12334)
mc5 <- cluster_gen_2(n6, c_mean = c(0.3, 0.35, 0.4), cor_matrix = m3)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

m4 <- matrix(c(1, 0.55,
               0.55, 1), 2, 2)
set.seed(12334)
mc6 <- cluster_gen_2(n6, c_mean = c(0.7, 355), cor_matrix = m4)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

set.seed(12334)
mc7 <- cluster_gen_2(n10, c_mean = c(20, 0.25, 50.54), cor_matrix = m3)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

set.seed(12334)
mc8 <- cluster_gen_2(n10, c_mean = c(213, 234), cor_matrix = m4)
## Error: length(c_mean) cannot be larger than n_X + theta
#Error: length(c_mean) cannot be larger than n_X + theta

set.seed(12334)
mc8 <- cluster_gen_2(n10, n_X=2, n_W=0, c_mean = c(213, 234), cor_matrix = m4)
summarize_clusters(mc8)
## Summary statistics for all countries
##        q1              q2
##  Min.   :210.0   Min.   :230.6
##  Mean   :212.9   Mean   :233.6
##  Max.   :214.6   Max.   :235.7
##
##  Stddev.: 1.12   Stddev.: 1.26
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2
## q1 1.0000000 0.6059107
## q2 0.6059107 1.0000000
## Summary statistics for all schools
##        q1              q2
##  Min.   :211.0   Min.   :231.5
##  Mean   :212.9   Mean   :233.9
##  Max.   :214.5   Max.   :235.7
##
##  Stddev.: 0.88   Stddev.: 0.9
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2
## q1 1.0000000 0.5000034
## q2 0.5000034 1.0000000
## Summary statistics for all classes
##        q1              q2
##  Min.   :209.8   Min.   :230.5
##  Mean   :213.0   Mean   :234.0
##  Max.   :215.7   Max.   :237.0
##
##  Stddev.: 1.01   Stddev.: 1
##
##
##
##  Heterogeneous correlation matrix
##           q1        q2
## q1 1.0000000 0.5888858
## q2 0.5888858 1.0000000
wleoncio commented 3 years ago

All errors addressed in the commits above.