pistacliffcho / icenReg_devel

Code for icenReg plus testData
4 stars 0 forks source link

Error due to NA values in responses and predictors #15

Open DrJerryTAO opened 3 months ago

DrJerryTAO commented 3 months ago

Hi @pistacliffcho, I found that icenReg cannot handle na.action as most other model fitting packages when NA values appear in the response and predictors.

When any observation has missing value in any predictor, model fitting functions produce the follow error message.

Error in if (any(weights < 0)) stop("weights supplied cannot be less than 0") : 
missing value where TRUE/FALSE needed

It does not matter whether the response is specified using Surv() or cbind() or the missing cases is treated specially in factor().

ic_np(cbind(left, right) ~ gender, data = IR_diabetes)
"Works"
ic_np(
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(gender = c(rep(NA, 20), gender[21:nrow(IR_diabetes)])))
ic_np(
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(gender = c(rep(NA, 20), gender[21:nrow(IR_diabetes)])))
ic_np(
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(gender = c(rep(NA, 20), gender[21:nrow(IR_diabetes)])) |>
    transform(gender = factor(gender, exclude = NULL)))
"Error in if (any(weights < 0)) stop(weights supplied cannot be less than 0) : 
  missing value where TRUE/FALSE needed"

A temporary solution is to remove NA cases manually.

ic_np(
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(gender = c(rep(NA, 20), gender[21:nrow(IR_diabetes)])) |>
    subset(complete.cases(gender)))

icenReg also does not allow NA values in the left limit of the response interval, and -Inf value is allowed only when the response is specified using cbind().

ic_np( # ic_npSINGLE() error
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(NA, 20), left[21:nrow(IR_diabetes)])))
ic_np( # ic_npSINGLE() error
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(-Inf, 20), left[21:nrow(IR_diabetes)])))
"Error in ic_npSINGLE(thisData, maxIter = maxIter, 
 tol = tol, B = B, weights = this_w) : 
  data[,1] > data[,2].This is impossible for interval censored data"
ic_np( # yMat[] error
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(NA, 20), left[21:nrow(IR_diabetes)])))
"Error in yMat[isCensored, 1] <- yMat[isCensored, 1] + eps : 
  NAs are not allowed in subscripted assignments"
ic_np( # worked
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(-Inf, 20), left[21:nrow(IR_diabetes)])))

On the other hand, the right limit seems quite flexible, with both NA and Inf values allowed in both cbind() and Surv() format.

ic_np( # worked
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(right = c(rep(NA, 20), right[21:nrow(IR_diabetes)])))
ic_np( # worked
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(right = c(rep(Inf, 20), right[21:nrow(IR_diabetes)])))
ic_np( # worked
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(right = c(rep(NA, 20), right[21:nrow(IR_diabetes)])))
ic_np( # worked
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(right = c(rep(Inf, 20), right[21:nrow(IR_diabetes)])))

When the response is totally missing, errors about yMat[] are produced. It permits setting missing lower limits to -Inf and missing upper limits to Inf in the cbind() format. But I do not know whether the result that accounts for (-Inf, Inf) intervals should be trustworthy--is it equivalent to removing the cases with missing responses?

ic_np( # Error in yMat[]
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(NA, 20), left[21:nrow(IR_diabetes)])) |>
    transform(right = c(rep(NA, 20), right[21:nrow(IR_diabetes)])))
ic_np( # Error in yMat[]
  Surv(left, right, type = "interval2") ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(-Inf, 20), left[21:nrow(IR_diabetes)])) |>
    transform(right = c(rep(Inf, 20), right[21:nrow(IR_diabetes)])))
"Error in yMat[exact, 2] <- yMat[exact, 1] : 
  NAs are not allowed in subscripted assignments"
ic_np( # Error in yMat[]
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(NA, 20), left[21:nrow(IR_diabetes)])) |>
    transform(right = c(rep(NA, 20), right[21:nrow(IR_diabetes)])))
"Error in yMat[isCensored, 1] <- yMat[isCensored, 1] + eps : 
  NAs are not allowed in subscripted assignments"
ic_np( # Worked
  cbind(left, right) ~ gender, 
  data = IR_diabetes |> 
    transform(left = c(rep(-Inf, 20), left[21:nrow(IR_diabetes)])) |>
    transform(right = c(rep(Inf, 20), right[21:nrow(IR_diabetes)])))