tbates / umx

Making Structural Equation Modeling (SEM) in R quick & powerful
https://tbates.github.io/
45 stars 17 forks source link

Help with threshold not in ascending order, despite the lower diagonal multiplication trick #231

Closed lf-araujo closed 1 year ago

lf-araujo commented 1 year ago

Hi all,

This is a MWE of an issue that happens at times while trying to work with ordinal variables in direction of causation models (and also in MRDoC). The example here is for the DoC model. I am applying the diagonal multiplication approach, it is included in the umxThresholdMatrix code, however, in some cases, OpenMX still complains. I cannot identify the pattern where OpenMx does not complain, unfortunately, but I am able to reproduce the issue using the code below.

@tbates, @mcneale maybe you can spot where the issue lies?

The error:

out <- mxRun(model)
Error: In model 'DoC' the thresholds in column 'obese1' of matrix/algebra 'top.threshMat' are not in ascending order. The current order is:  '-0.865626303354451' and '-1.7312526067089' and ascending order is:  '-1.7312526067089' and '-0.865626303354451' . Only the first 2 element(s) of this column are inspected.

The MWE

library(umx)
library(dplyr)
library(tidyr)

# Generating data
data(twinData)
glimpse(twinData)

# Make "obese" variable with ~20% subjects categorised as overweight and 30% as obese
obesityLevels   = c('normal', 'overweight', 'obese')
cutPoints       = quantile(twinData[, "bmi1"], probs = c(.2, .3), na.rm = TRUE)
twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
# Step 2: Make the ordinal variables into umxFactors (ordered, with the levels found in the data)
selVars = c("obese1", "obese2")
twinData <- mutate(twinData, obese1 = factor(obese1, ordered = T),
    obese2 = factor(obese2, ordered = T),
) 

mzData  = subset(twinData, zygosity %in% c("MZFF", "MZMM"))
dzData  = subset(twinData, zygosity %in% c("DZFF", "DZMM"))

pheno = c("obese", "ht")
name = "DoC"
sep = ""
covar=NULL
vnames = tvars(c(pheno), sep = sep)

  # Find ordinal variables
colTypes = umx_is_ordered(xmu_extract_column(mzData, vnames),
 summaryObject= TRUE)

if(colTypes$nOrdVars > 0){
    ty = umxThresholdMatrix(rbind(mzData, dzData), fullVarNames = colTypes$ordVarNames,
      sep = sep, method="allFree")
    ordinalPresent = TRUE
}

xmu_twin_check(
  selDVs = c(pheno),
  sep = sep, dzData = dzData, mzData = mzData, enforceSep = TRUE,
  nSib = 2, optimizer = "CSOLNP"    )

top = mxModel("top",
    mxMatrix(name = "I", type = "Iden", nrow = 2, ncol = 2),
    umxMatrixFree("B",
      type = "Full", nrow = 2, ncol = 2,
      labels = c(
        NA, "g2",
        "g1", NA
    )
  ),
    umxMatrix("psi_a", type = "Symm", ncol = 2, nrow = 2, byrow = TRUE,
      labels = c(
        "ax2",
        "ra", "ay2"
    ),
      free = c(
        T,
        T, T
    ),
      values = c(
        0.1,
        0.05, 0.1
    ),
  ),
    umxMatrixFree("psi_c", type = "Symm", ncol = 2, nrow = 2,
      labels = c(
        "cx2",
        "rc", "cy2"
    ),
  ),
    umxMatrixFree("psi_e", type = "Symm", ncol = 2, nrow = 2,
      labels = c(
        "ex2",
        "re", "ey2"
    ),
  ),
    umxMatrix("lamba", type = "Diag", nrow = 2, ncol = 2, byrow = TRUE,
      free = c(F, F), labels = c("σ1", "σ2"),
      values = c(1, 1)
  ),
    mxAlgebra("A", expression = lamba %&% (solve(I - B) %&% psi_a)),
    mxAlgebra("C", expression = lamba %&% (solve(I - B) %&% psi_c)),
    mxAlgebra("E", expression = lamba %&% (solve(I - B) %&% psi_e)),
    mxAlgebra("CovMZ", expression =  rbind(
      cbind(A + C + E, A + C),
      cbind(A + C, A + C + E)
  )),
    mxAlgebra(name = "CovDZ", expression = rbind(
      cbind(A + C + E, 0.5 %x% A + C),
      cbind(0.5 %x% A + C, A + C + E)
  )),
    mxAlgebra("VC", expression = cbind(A, C, E, A / (A + C + E),
       C / (A + C + E), E / (A + C + E)),
    dimnames = list(
        rep("VC", 2),
        rep(c("A", "C", "E", "SA", "SC", "SE"), each = 2)
    )
) ,

    #  THE SECTION BELOW IS TO CALCULATE THE MEANS, THE OVERCOMPLICATED APPROACH IS LEFT OVER OF ADDING COVARS
    # BUT SHOULDNT AFFECT THE ISSUE
    # create algebra for expected mean & threshold matrices
    mxMatrix( type="Full", nrow=1, ncol=2, free= TRUE, 
      labels=c("mean_Ph1","mean_Ph2"), 
      name="meanT1" ),
    # in mz twins, prs_twin1 == prs_twin2
    # so, twin2 in mz pairs does not have the prs variable
    mxMatrix( type="Full", nrow=1, ncol=2, free=TRUE,   
      labels=c("mean_Ph1","mean_Ph2"), 
      name="meanT2MZ"),
    # in dz twins, prs_twin1 != prs_twin2
    mxMatrix( type="Full", nrow=1, ncol=2, free=TRUE,   
      labels=c("mean_Ph1","mean_Ph2"),  
      name="meanT2DZ" )
)

     # This is leftovers of adding covars, but shouldn't affect the issue
expMeanMZ <- mxAlgebra("expMeanMZ",
  expression = cbind(top.meanT1 ,
     top.meanT2MZ )) 

expMeanDZ <- mxAlgebra("expMeanDZ",
  expression = cbind(top.meanT1 ,
     top.meanT2DZ ))  

      # Adding thresholds
top = top+ty

expMZ =  mxExpectationNormal("top.CovMZ",
  means = "expMeanMZ",
  dimnames = vnames,
  thresholds="top.threshMat",
  threshnames = colTypes$ordVarNames
)

expDZ =mxExpectationNormal("top.CovDZ",
  means =  "expMeanDZ", 
  dimnames = vnames,
  thresholds="top.threshMat",
  threshnames = colTypes$ordVarNames
)

MZ = mxModel("MZ", expMeanMZ, expMZ, mxFitFunctionML())
DZ = mxModel("DZ", expMeanDZ, expDZ, mxFitFunctionML())

    # Adding data
MZ = MZ + mxData(mzData, type = "raw") 
DZ = DZ + mxData(dzData, type = "raw") 

model = mxModel(name, top, MZ, DZ, mxFitFunctionMultigroup(c("MZ","DZ") ))
# Dropping g2 and re for identification DoC
model = umxModify(model,  update = c("g2",  "re"), autoRun = F)

out <- mxRun(model)
summary(out)
umxSummary(out)
lf-araujo commented 1 year ago

I guess the warning is not too informative. That was likely due to an unidentified model.

I wasn't fixing the first thresholds, as I should. Doing that fixed the issue (however another issue arose, will open a discussion on the forums for the new one):

library(umx)
library(dplyr)
library(tidyr)

# Generating data
data(twinData)
glimpse(twinData)

# Make "obese" variable with ~20% subjects categorised as overweight and 30% as obese
obesityLevels   = c('normal', 'overweight', 'obese')
cutPoints       = quantile(twinData[, "bmi1"], probs = c(.2, .3), na.rm = TRUE)
twinData$obese1 = cut(twinData$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
twinData$obese2 = cut(twinData$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
# Step 2: Make the ordinal variables into umxFactors (ordered, with the levels found in the data)
selVars = c("obese1", "obese2")
twinData <- mutate(twinData, obese1 = factor(obese1, ordered = T),
    obese2 = factor(obese2, ordered = T),
) 

mzData  = subset(twinData, zygosity %in% c("MZFF", "MZMM"))
dzData  = subset(twinData, zygosity %in% c("DZFF", "DZMM"))

pheno = c("obese", "ht")
name = "DoC"
sep = ""
covar=NULL
vnames = tvars(c(pheno), sep = sep)

  # Find ordinal variables
colTypes = umx_is_ordered(xmu_extract_column(mzData, vnames),
 summaryObject= TRUE)

if(colTypes$nOrdVars > 0){
    ty = umxThresholdMatrix(rbind(mzData, dzData), fullVarNames = colTypes$ordVarNames,
      sep = sep, method="allFree")
    ordinalPresent = TRUE
}

xmu_twin_check(
  selDVs = c(pheno),
  sep = sep, dzData = dzData, mzData = mzData, enforceSep = TRUE,
  nSib = 2, optimizer = "CSOLNP"    )

top = mxModel("top",
    mxMatrix(name = "I", type = "Iden", nrow = 2, ncol = 2),
    umxMatrixFree("B",
      type = "Full", nrow = 2, ncol = 2,
      labels = c(
        NA, "g2",
        "g1", NA
    )
  ),
    umxMatrix("psi_a", type = "Symm", ncol = 2, nrow = 2, byrow = TRUE,
      labels = c(
        "ax2",
        "ra", "ay2"
    ),
      free = c(
        T,
        T, T
    ),
      values = c(
        0.1,
        0.05, 0.1
    ),
  ),
    umxMatrixFree("psi_c", type = "Symm", ncol = 2, nrow = 2,
      labels = c(
        "cx2",
        "rc", "cy2"
    ),
  ),
    umxMatrixFree("psi_e", type = "Symm", ncol = 2, nrow = 2,
      labels = c(
        "ex2",
        "re", "ey2"
    ),
  ),
    umxMatrix("lamba", type = "Diag", nrow = 2, ncol = 2, byrow = TRUE,
      free = c(F, F), labels = c("σ1", "σ2"),
      values = c(1, 1)
  ),
    mxAlgebra("A", expression = lamba %&% (solve(I - B) %&% psi_a)),
    mxAlgebra("C", expression = lamba %&% (solve(I - B) %&% psi_c)),
    mxAlgebra("E", expression = lamba %&% (solve(I - B) %&% psi_e)),
    mxAlgebra("CovMZ", expression =  rbind(
      cbind(A + C + E, A + C),
      cbind(A + C, A + C + E)
  )),
    mxAlgebra(name = "CovDZ", expression = rbind(
      cbind(A + C + E, 0.5 %x% A + C),
      cbind(0.5 %x% A + C, A + C + E)
  )),
    mxAlgebra("VC", expression = cbind(A, C, E, A / (A + C + E),
       C / (A + C + E), E / (A + C + E)),
    dimnames = list(
        rep("VC", 2),
        rep(c("A", "C", "E", "SA", "SC", "SE"), each = 2)
    )
) ,

    #  THE SECTION BELOW IS TO CALCULATE THE MEANS, THE OVERCOMPLICATED APPROACH IS LEFT OVER OF ADDING COVARS
    # BUT SHOULDNT AFFECT THE ISSUE
    # create algebra for expected mean & threshold matrices
    mxMatrix( type="Full", nrow=1, ncol=2, free= TRUE, 
      labels=c("mean_Ph1","mean_Ph2"), 
      name="meanT1" ),
    # in mz twins, prs_twin1 == prs_twin2
    # so, twin2 in mz pairs does not have the prs variable
    mxMatrix( type="Full", nrow=1, ncol=2, free=TRUE,   
      labels=c("mean_Ph1","mean_Ph2"), 
      name="meanT2MZ"),
    # in dz twins, prs_twin1 != prs_twin2
    mxMatrix( type="Full", nrow=1, ncol=2, free=TRUE,   
      labels=c("mean_Ph1","mean_Ph2"),  
      name="meanT2DZ" )
)

     # This is leftovers of adding covars, but shouldn't affect the issue
expMeanMZ <- mxAlgebra("expMeanMZ",
  expression = cbind(top.meanT1 ,
     top.meanT2MZ )) 

expMeanDZ <- mxAlgebra("expMeanDZ",
  expression = cbind(top.meanT1 ,
     top.meanT2DZ ))  

      # Adding thresholds
top = top+ty

expMZ =  mxExpectationNormal("top.CovMZ",
  means = "expMeanMZ",
  dimnames = vnames,
  thresholds="top.threshMat",
  threshnames = colTypes$ordVarNames
)

expDZ =mxExpectationNormal("top.CovDZ",
  means =  "expMeanDZ", 
  dimnames = vnames,
  thresholds="top.threshMat",
  threshnames = colTypes$ordVarNames
)

MZ = mxModel("MZ", expMeanMZ, expMZ, mxFitFunctionML())
DZ = mxModel("DZ", expMeanDZ, expDZ, mxFitFunctionML())

    # Adding data
MZ = MZ + mxData(mzData, type = "raw") 
DZ = DZ + mxData(dzData, type = "raw") 

model = mxModel(name, top, MZ, DZ, mxFitFunctionMultigroup(c("MZ","DZ") ))
# Dropping g2 and re for identification DoC
model = umxModify(model,  update = c("g2",  "re"), autoRun = F)

out <- mxRun(model)
summary(out)
umxSummary(out)