marklhc / pinsearch

Specification search for partial factorial invariance
https://marklhc.github.io/pinsearch/
GNU General Public License v3.0
0 stars 0 forks source link

Discrepancy between pinsearch result and manually specified model #3

Closed YichiZhang2024 closed 1 year ago

YichiZhang2024 commented 1 year ago

Based on the noninvariant parameters given by pinsearch, I manually specified a model and compare this model summary with the result of pinsearch. However, this manually specified model has different fit statistics and a different number of equality constraints. See the example below. The pinsearch result shows the number of equality constraints is 121, but the manually specified model shows the number of equality constraints is 123.

library(tidyverse)
library(lavaan)
#> This is lavaan 0.6-14
#> lavaan is FREE software! Please report any bugs.
library(pinsearch)
library(reprex)
library(pinsearch)

#### Load data ####

orig_dat <- haven::read_sav(file = "https://osf.io/download/wxjsg/")
## create four groups
dat <- orig_dat %>%
    filter_at(vars(class14, audit1:audit3), ~ !is.na(.)) %>%
    filter(eth %in% 1:4) %>%
    mutate_at(vars(class1:class15, audit1:audit10), as.numeric) %>%
    mutate(campuslive = factor(campuslive),
           eth = factor(eth, 1:4),
           group = case_when(
               # white male
               eth == 1 & gender == 1 ~ "1",
               # white female
               eth == 1 & gender == 2 ~ "2",
               # asian male
               eth == 2 & gender == 1 ~ "3",
               # asian female
               eth == 2 & gender == 2 ~ "4"
           )) %>%
    mutate(group = factor(group, 1:4)) %>%
    filter(group %in% 1:4) %>%
    arrange(group)
dat$class6 <- ifelse(dat$class6 > 3, 4, dat$class6)
dat[,9:23] <- sapply(dat %>% select("class1":"class15"), FUN = function(x) ifelse(x > 3, 4, x))

#### CLASS Invariance Testing ####

# baseline model
c_mod <- "
f1 =~ class1 + class2 + class3 + class4 + class5 + class6 +
      class7 + class8 + class9 + class10 + class11 + class12 +
      class13 + class14 + class15"

# Continuous
res <- pinSearch(c_mod,
                 data = dat,
                 group = "group",
                 group.label = 1:4,
                 type = "residual.covariances")
res
#> $`Partial Invariance Fit`
#> lavaan 0.6.14 ended normally after 69 iterations
#> 
#>   Estimator                                         ML
#>   Optimization method                           NLMINB
#>   Number of model parameters                       186
#>   Number of equality constraints                   121
#> 
#>   Number of observations per group:                   
#>     1                                              133
#>     2                                              259
#>     3                                               78
#>     4                                              106
#> 
#> Model Test User Model:
#>                                                       
#>   Test statistic                               819.672
#>   Degrees of freedom                               475
#>   P-value (Chi-square)                           0.000
#>   Test statistic for each group:
#>     1                                          206.594
#>     2                                          209.551
#>     3                                          202.417
#>     4                                          201.109
#> 
#> $`Non-Invariant Items`
#>        lhs     rhs group       type
#> 1  class14             2 intercepts
#> 2   class1             2 intercepts
#> 3   class9             3 intercepts
#> 4  class15             1 intercepts
#> 5  class10             3 intercepts
#> 6  class12             3 intercepts
#> 7   class4             4 intercepts
#> 8  class14 class14     3  residuals
#> 9   class6  class6     1  residuals
#> 10 class14 class14     4  residuals
#> 11  class7  class7     1  residuals
#> 12 class11 class11     1  residuals
#> 13  class1  class1     1  residuals
#> 14 class14 class14     1  residuals

# Specifying partial strict model syntax based on pinSearch results
mod_res <- "
f1 =~ c(NA, NA, NA, NA) * class1 + c(l1, l1, l1, l1) * class1 + c(l2, l2, l2, l2) * class2 + c(l3, l3, l3, l3) * class3 + c(l4, l4, l4, l4) * class4 +
c(l5, l5, l5, l5) * class5 + c(l6, l6, l6, l6) * class6 +
c(l7, l7, l7, l7) * class7 + c(l8, l8, l8, l8) * class8 +
c(l9, l9, l9, l9) * class9 + c(l10, l10, l10, l10) * class10 +
c(l11, l11, l11, l11) * class11 + c(l12, l12, l12, l12) * class12 +
c(l13, l13, l13, l13) * class13 + c(l14, l14, l14, l14) * class14 +
c(l15, l15, l15, l15) * class15

## common factor
f1 ~ c(0, NA, NA, NA) * 1
f1 ~~ c(1, NA, NA, NA) * f1

## intercepts
class1 ~ c(I1, I1m, I1, I1) * 1
class2 ~ c(I2, I2, I2, I2) * 1
class3 ~ c(I3, I3, I3, I3) * 1
class4 ~ c(I4, I4, I4, I4m) * 1
class5 ~ c(I5, I5, I5, I5) * 1
class6 ~ c(I6, I6, I6, I6) * 1
class7 ~ c(I7, I7, I7, I7) * 1
class8 ~ c(I8, I8, I8, I8) * 1
class9 ~ c(I9, I9, I9m, I9) * 1
class10 ~ c(I10, I10, I10m, I10) * 1
class11 ~ c(I11, I11, I11, I11) * 1
class12 ~ c(I12, I12, I12m, I12) * 1
class13 ~ c(I13, I13, I13, I13) * 1
class14 ~ c(I14, I14m, I14, I14) * 1
class15 ~ c(I15m, I15, I15, I15) * 1

## unique factor variances
class1 ~~ c(U1m, U1, U1, U1) * class1
class2 ~~ c(U2, U2, U2, U2) * class2
class3 ~~ c(U3, U3, U3, U3) * class3
class4 ~~ c(U4, U4, U4, U4) * class4
class5 ~~ c(U5, U5, U5, U5) * class5
class6 ~~ c(U6m, U6, U6, U6) * class6
class7 ~~ c(U7m, U7, U7, U7) * class7
class8 ~~ c(U8, U8, U8, U8) * class8
class9 ~~ c(U9, U9, U9, U9) * class9
class10 ~~ c(U10, U10, U10, U10) * class10
class11 ~~ c(U11m, U11, U11, U11) * class11
class12 ~~ c(U12, U12, U12, U12) * class12
class13 ~~ c(U13, U13, U13, U13) * class13
class14 ~~ c(U14m, U14, U14m, U14m) * class14
class15 ~~ c(U15, U15, U15, U15) * class15
"

par_res <- cfa(model = mod_res,
               data = dat,
               group = "group",
               group.label = 1:4)
summary(par_res, fit.measures = TRUE)
#> lavaan 0.6.14 ended normally after 61 iterations
#> 
#>   Estimator                                         ML
#>   Optimization method                           NLMINB
#>   Number of model parameters                       186
#>   Number of equality constraints                   123
#> 
#>   Number of observations per group:                   
#>     1                                              133
#>     2                                              259
#>     3                                               78
#>     4                                              106
#> 
#> Model Test User Model:
#>                                                       
#>   Test statistic                               855.699
#>   Degrees of freedom                               477
#>   P-value (Chi-square)                           0.000
#>   Test statistic for each group:
#>     1                                          233.319
#>     2                                          208.600
#>     3                                          210.331
#>     4                                          203.449
#> 
#> Model Test Baseline Model:
#> 
#>   Test statistic                              3642.119
#>   Degrees of freedom                               420
#>   P-value                                        0.000
#> 
#> User Model versus Baseline Model:
#> 
#>   Comparative Fit Index (CFI)                    0.882
#>   Tucker-Lewis Index (TLI)                       0.897
#> 
#> Loglikelihood and Information Criteria:
#> 
#>   Loglikelihood user model (H0)             -11457.308
#>   Loglikelihood unrestricted model (H1)     -11029.458
#>                                                       
#>   Akaike (AIC)                               23040.616
#>   Bayesian (BIC)                             23315.050
#>   Sample-size adjusted Bayesian (SABIC)      23115.051
#> 
#> Root Mean Square Error of Approximation:
#> 
#>   RMSEA                                          0.074
#>   90 Percent confidence interval - lower         0.066
#>   90 Percent confidence interval - upper         0.082
#>   P-value H_0: RMSEA <= 0.050                    0.000
#>   P-value H_0: RMSEA >= 0.080                    0.120
#> 
#> Standardized Root Mean Square Residual:
#> 
#>   SRMR                                           0.085
#> 
#> Parameter Estimates:
#> 
#>   Standard errors                             Standard
#>   Information                                 Expected
#>   Information saturated (h1) model          Structured
#> 
#> 
#> Group 1 [1]:
#> 
#> Latent Variables:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   f1 =~                                               
#>     class1    (l1)    0.627    0.050   12.645    0.000
#>     class2    (l2)    0.792    0.061   13.035    0.000
#>     class3    (l3)   -0.385    0.043   -8.979    0.000
#>     class4    (l4)    0.737    0.058   12.773    0.000
#>     class5    (l5)    0.593    0.052   11.360    0.000
#>     class6    (l6)    0.462    0.042   10.988    0.000
#>     class7    (l7)    0.782    0.059   13.245    0.000
#>     class8    (l8)   -0.295    0.043   -6.844    0.000
#>     class9    (l9)    0.585    0.052   11.175    0.000
#>     class10  (l10)    0.585    0.053   10.934    0.000
#>     class11  (l11)    0.640    0.050   12.736    0.000
#>     class12  (l12)    0.662    0.054   12.248    0.000
#>     class13  (l13)    0.589    0.052   11.348    0.000
#>     class14  (l14)    0.630    0.052   12.224    0.000
#>     class15  (l15)    0.633    0.057   11.178    0.000
#> 
#> Intercepts:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1                0.000                           
#>    .class1    (I1)    3.586    0.069   52.219    0.000
#>    .class2    (I2)    3.142    0.081   38.783    0.000
#>    .class3    (I3)    2.671    0.056   47.627    0.000
#>    .class4    (I4)    2.868    0.078   36.972    0.000
#>    .class5    (I5)    2.514    0.069   36.404    0.000
#>    .class6    (I6)    2.056    0.056   36.592    0.000
#>    .class7    (I7)    3.446    0.078   43.971    0.000
#>    .class8    (I8)    2.619    0.056   46.691    0.000
#>    .class9    (I9)    3.196    0.070   45.675    0.000
#>    .class10  (I10)    2.836    0.071   39.725    0.000
#>    .class11  (I11)    3.506    0.066   52.819    0.000
#>    .class12  (I12)    3.256    0.072   44.959    0.000
#>    .class13  (I13)    2.451    0.069   35.706    0.000
#>    .class14  (I14)    3.389    0.075   45.245    0.000
#>    .class15 (I15m)    2.474    0.099   25.085    0.000
#> 
#> Variances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1                1.000                           
#>    .class1   (U1m)    0.357    0.049    7.284    0.000
#>    .class2    (U2)    0.615    0.041   14.956    0.000
#>    .class3    (U3)    0.826    0.050   16.618    0.000
#>    .class4    (U4)    0.609    0.040   15.203    0.000
#>    .class5    (U5)    0.809    0.050   16.113    0.000
#>    .class6   (U6m)    0.870    0.109    7.964    0.000
#>    .class7   (U7m)    0.385    0.056    6.899    0.000
#>    .class8    (U8)    1.039    0.062   16.805    0.000
#>    .class9    (U9)    0.834    0.052   16.160    0.000
#>    .class10  (U10)    0.917    0.056   16.235    0.000
#>    .class11 (U11m)    0.356    0.049    7.247    0.000
#>    .class12  (U12)    0.660    0.042   15.657    0.000
#>    .class13  (U13)    0.801    0.050   16.118    0.000
#>    .class14 (U14m)    0.664    0.056   11.768    0.000
#>    .class15  (U15)    0.892    0.055   16.076    0.000
#> 
#> 
#> Group 2 [2]:
#> 
#> Latent Variables:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   f1 =~                                               
#>     class1    (l1)    0.627    0.050   12.645    0.000
#>     class2    (l2)    0.792    0.061   13.035    0.000
#>     class3    (l3)   -0.385    0.043   -8.979    0.000
#>     class4    (l4)    0.737    0.058   12.773    0.000
#>     class5    (l5)    0.593    0.052   11.360    0.000
#>     class6    (l6)    0.462    0.042   10.988    0.000
#>     class7    (l7)    0.782    0.059   13.245    0.000
#>     class8    (l8)   -0.295    0.043   -6.844    0.000
#>     class9    (l9)    0.585    0.052   11.175    0.000
#>     class10  (l10)    0.585    0.053   10.934    0.000
#>     class11  (l11)    0.640    0.050   12.736    0.000
#>     class12  (l12)    0.662    0.054   12.248    0.000
#>     class13  (l13)    0.589    0.052   11.348    0.000
#>     class14  (l14)    0.630    0.052   12.224    0.000
#>     class15  (l15)    0.633    0.057   11.178    0.000
#> 
#> Intercepts:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1               -0.705    0.126   -5.583    0.000
#>    .class1   (I1m)    3.871    0.079   48.956    0.000
#>    .class2    (I2)    3.142    0.081   38.783    0.000
#>    .class3    (I3)    2.671    0.056   47.627    0.000
#>    .class4    (I4)    2.868    0.078   36.972    0.000
#>    .class5    (I5)    2.514    0.069   36.404    0.000
#>    .class6    (I6)    2.056    0.056   36.592    0.000
#>    .class7    (I7)    3.446    0.078   43.971    0.000
#>    .class8    (I8)    2.619    0.056   46.691    0.000
#>    .class9    (I9)    3.196    0.070   45.675    0.000
#>    .class10  (I10)    2.836    0.071   39.725    0.000
#>    .class11  (I11)    3.506    0.066   52.819    0.000
#>    .class12  (I12)    3.256    0.072   44.959    0.000
#>    .class13  (I13)    2.451    0.069   35.706    0.000
#>    .class14 (I14m)    3.746    0.078   48.000    0.000
#>    .class15  (I15)    2.807    0.081   34.524    0.000
#> 
#> Variances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1                1.201    0.200    6.003    0.000
#>    .class1    (U1)    0.609    0.044   13.754    0.000
#>    .class2    (U2)    0.615    0.041   14.956    0.000
#>    .class3    (U3)    0.826    0.050   16.618    0.000
#>    .class4    (U4)    0.609    0.040   15.203    0.000
#>    .class5    (U5)    0.809    0.050   16.113    0.000
#>    .class6    (U6)    0.491    0.035   14.110    0.000
#>    .class7    (U7)    0.694    0.052   13.343    0.000
#>    .class8    (U8)    1.039    0.062   16.805    0.000
#>    .class9    (U9)    0.834    0.052   16.160    0.000
#>    .class10  (U10)    0.917    0.056   16.235    0.000
#>    .class11  (U11)    0.610    0.044   13.711    0.000
#>    .class12  (U12)    0.660    0.042   15.657    0.000
#>    .class13  (U13)    0.801    0.050   16.118    0.000
#>    .class14  (U14)    0.538    0.052   10.418    0.000
#>    .class15  (U15)    0.892    0.055   16.076    0.000
#> 
#> 
#> Group 3 [3]:
#> 
#> Latent Variables:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   f1 =~                                               
#>     class1    (l1)    0.627    0.050   12.645    0.000
#>     class2    (l2)    0.792    0.061   13.035    0.000
#>     class3    (l3)   -0.385    0.043   -8.979    0.000
#>     class4    (l4)    0.737    0.058   12.773    0.000
#>     class5    (l5)    0.593    0.052   11.360    0.000
#>     class6    (l6)    0.462    0.042   10.988    0.000
#>     class7    (l7)    0.782    0.059   13.245    0.000
#>     class8    (l8)   -0.295    0.043   -6.844    0.000
#>     class9    (l9)    0.585    0.052   11.175    0.000
#>     class10  (l10)    0.585    0.053   10.934    0.000
#>     class11  (l11)    0.640    0.050   12.736    0.000
#>     class12  (l12)    0.662    0.054   12.248    0.000
#>     class13  (l13)    0.589    0.052   11.348    0.000
#>     class14  (l14)    0.630    0.052   12.224    0.000
#>     class15  (l15)    0.633    0.057   11.178    0.000
#> 
#> Intercepts:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1               -1.047    0.178   -5.870    0.000
#>    .class1    (I1)    3.586    0.069   52.219    0.000
#>    .class2    (I2)    3.142    0.081   38.783    0.000
#>    .class3    (I3)    2.671    0.056   47.627    0.000
#>    .class4    (I4)    2.868    0.078   36.972    0.000
#>    .class5    (I5)    2.514    0.069   36.404    0.000
#>    .class6    (I6)    2.056    0.056   36.592    0.000
#>    .class7    (I7)    3.446    0.078   43.971    0.000
#>    .class8    (I8)    2.619    0.056   46.691    0.000
#>    .class9   (I9m)    3.600    0.125   28.855    0.000
#>    .class10 (I10m)    3.176    0.129   24.545    0.000
#>    .class11  (I11)    3.506    0.066   52.819    0.000
#>    .class12 (I12m)    3.540    0.119   29.856    0.000
#>    .class13  (I13)    2.451    0.069   35.706    0.000
#>    .class14  (I14)    3.389    0.075   45.245    0.000
#>    .class15  (I15)    2.807    0.081   34.524    0.000
#> 
#> Variances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1                1.282    0.284    4.510    0.000
#>    .class1    (U1)    0.609    0.044   13.754    0.000
#>    .class2    (U2)    0.615    0.041   14.956    0.000
#>    .class3    (U3)    0.826    0.050   16.618    0.000
#>    .class4    (U4)    0.609    0.040   15.203    0.000
#>    .class5    (U5)    0.809    0.050   16.113    0.000
#>    .class6    (U6)    0.491    0.035   14.110    0.000
#>    .class7    (U7)    0.694    0.052   13.343    0.000
#>    .class8    (U8)    1.039    0.062   16.805    0.000
#>    .class9    (U9)    0.834    0.052   16.160    0.000
#>    .class10  (U10)    0.917    0.056   16.235    0.000
#>    .class11  (U11)    0.610    0.044   13.711    0.000
#>    .class12  (U12)    0.660    0.042   15.657    0.000
#>    .class13  (U13)    0.801    0.050   16.118    0.000
#>    .class14 (U14m)    0.664    0.056   11.768    0.000
#>    .class15  (U15)    0.892    0.055   16.076    0.000
#> 
#> 
#> Group 4 [4]:
#> 
#> Latent Variables:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   f1 =~                                               
#>     class1    (l1)    0.627    0.050   12.645    0.000
#>     class2    (l2)    0.792    0.061   13.035    0.000
#>     class3    (l3)   -0.385    0.043   -8.979    0.000
#>     class4    (l4)    0.737    0.058   12.773    0.000
#>     class5    (l5)    0.593    0.052   11.360    0.000
#>     class6    (l6)    0.462    0.042   10.988    0.000
#>     class7    (l7)    0.782    0.059   13.245    0.000
#>     class8    (l8)   -0.295    0.043   -6.844    0.000
#>     class9    (l9)    0.585    0.052   11.175    0.000
#>     class10  (l10)    0.585    0.053   10.934    0.000
#>     class11  (l11)    0.640    0.050   12.736    0.000
#>     class12  (l12)    0.662    0.054   12.248    0.000
#>     class13  (l13)    0.589    0.052   11.348    0.000
#>     class14  (l14)    0.630    0.052   12.224    0.000
#>     class15  (l15)    0.633    0.057   11.178    0.000
#> 
#> Intercepts:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1               -1.058    0.164   -6.436    0.000
#>    .class1    (I1)    3.586    0.069   52.219    0.000
#>    .class2    (I2)    3.142    0.081   38.783    0.000
#>    .class3    (I3)    2.671    0.056   47.627    0.000
#>    .class4   (I4m)    2.620    0.110   23.871    0.000
#>    .class5    (I5)    2.514    0.069   36.404    0.000
#>    .class6    (I6)    2.056    0.056   36.592    0.000
#>    .class7    (I7)    3.446    0.078   43.971    0.000
#>    .class8    (I8)    2.619    0.056   46.691    0.000
#>    .class9    (I9)    3.196    0.070   45.675    0.000
#>    .class10  (I10)    2.836    0.071   39.725    0.000
#>    .class11  (I11)    3.506    0.066   52.819    0.000
#>    .class12  (I12)    3.256    0.072   44.959    0.000
#>    .class13  (I13)    2.451    0.069   35.706    0.000
#>    .class14  (I14)    3.389    0.075   45.245    0.000
#>    .class15  (I15)    2.807    0.081   34.524    0.000
#> 
#> Variances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>     f1                1.289    0.261    4.938    0.000
#>    .class1    (U1)    0.609    0.044   13.754    0.000
#>    .class2    (U2)    0.615    0.041   14.956    0.000
#>    .class3    (U3)    0.826    0.050   16.618    0.000
#>    .class4    (U4)    0.609    0.040   15.203    0.000
#>    .class5    (U5)    0.809    0.050   16.113    0.000
#>    .class6    (U6)    0.491    0.035   14.110    0.000
#>    .class7    (U7)    0.694    0.052   13.343    0.000
#>    .class8    (U8)    1.039    0.062   16.805    0.000
#>    .class9    (U9)    0.834    0.052   16.160    0.000
#>    .class10  (U10)    0.917    0.056   16.235    0.000
#>    .class11  (U11)    0.610    0.044   13.711    0.000
#>    .class12  (U12)    0.660    0.042   15.657    0.000
#>    .class13  (U13)    0.801    0.050   16.118    0.000
#>    .class14 (U14m)    0.664    0.056   11.768    0.000
#>    .class15  (U15)    0.892    0.055   16.076    0.000

#saveRDS(par_res, "ps_mod_est.rds")
## read the saved partial strict invariance model
readRDS("ps_mod_est.rds")
#> Warning in gzfile(file, "rb"): cannot open compressed file 'ps_mod_est.rds',
#> probable reason 'No such file or directory'
#> Error in gzfile(file, "rb"): cannot open the connection

Created on 2023-02-26 with reprex v2.0.2

marklhc commented 1 year ago

@YichiZhang2024 , thank you for putting the question in a reproducible example. It took me a while, but it's because you set the uniqueness for item 14 to be the same for groups 1, 3, and 4, when indeed they are all different based in pinsearch. In other words, you should have something like

class14 ~~ c(U14a, U14, U14c, U14d) * class14