Problems with the VE-Step optimization

khaledbouguila commented 1 year ago

Two problems related to the VE-Step optimization:

This is an example on trichoptera data that illustrates the problems:


# choose data

# Prepare data 
PLNdata = prepare_data(trichoptera$Abundance, trichoptera$Covariate)

# Data dimensions
n = dim(PLNdata$Abundance)[1]
p = dim(PLNdata$Abundance)[2]

# fitting data to a PLN model
PLNmodel = PLN(Abundance ~ 1, data = PLNdata)

# VE (all M_i at once):
# New data
ind = 1:n # choose from train data
Newdata = PLNdata$Abundance[ind,]

# New data size = dim(Newdata)[1]

# Parameters for the VE-Step
covariates = matrix(1,, 1) # intercept
offsets = matrix(0,, p) # no offsets
responses = matrix(Newdata, nrow =
weights = rep(1,

B = PLNmodel$model_par$B # Fixed
Omega = PLNmodel$model_par$Omega # Fixed
control = PLN_param(backend = "nlopt",
                    config_optim = list(maxeval=1e4, ftol_rel=1e-8, xtol_rel=1e-6))

# Initialize with the optimal M, S
M_init = matrix(PLNmodel$var_par$M[ind,], nrow =
S_init = matrix(PLNmodel$var_par$S[ind,], nrow =
# M_init = matrix(0,, p)
# S_init = matrix(0.1,, p)

# The VE-Step
args <- list(data = list(Y = responses, X = covariates, O = offsets, w = weights),
             params = list(M = M_init, S = S_init), 
             B = as.matrix(B),
             Omega = as.matrix(Omega),
             config = control$config_optim)
VE =, args)

# Result: the parameters M, S change
print(sum(abs(VE$M - M_init))) ## Should be zero
print(sum(abs(VE$S - S_init))) ## Should be zero
print(sum(abs(VE$Ji - PLNmodel$loglik_vec[ind,]))) ## Should be zero

# VE (for each M_i):
VE_for = list(M = array(dim = c(n, p)),
              S = array(dim = c(n, p)),
              Ji = c())
for(i in ind){
  # New data
  Newdata = PLNdata$Abundance[i,]

  # Parameters for the VE-Step
  covariates = matrix(1, 1, 1) # intercept
  offsets = matrix(0, 1, p) # no offsets
  responses = matrix(Newdata, nrow = 1)
  weights = 1

  # Initialization with training data
  M_init_i = matrix(PLNmodel$var_par$M[i,], nrow = 1)
  S_init_i = matrix(PLNmodel$var_par$S[i,], nrow = 1)
  # M_init_i = matrix(0, 1, p)
  # S_init_i = matrix(0.1, 1, p)

  # The VE-Step
  args <- list(data = list(Y = responses, X = covariates, O = offsets, w = weights),
               params = list(M = M_init_i, S = S_init_i), 
               B = as.matrix(B),
               Omega = as.matrix(Omega),
               config = control$config_optim)
  VE_ =, args)

  # Stock results
  VE_for$M[i, ] = VE_$M
  VE_for$S[i, ] = VE_$S
  VE_for$Ji[i] = VE_$Ji

# Different results obtained (VE for all M_i at once - VE for each M_i):
sum(abs(VE_for$M[ind,] - VE$M)) ## Should be zero
sum(abs(VE_for$S[ind,] - VE$S)) ## Should be zero
sum(abs(VE_for$Ji[ind] - VE$Ji)) ## Should be zero
jchiquet commented 1 year ago

Thanks for the report.

I am on it, and it seems that there is a sign error in the objective of the VE-step

Since we are optimizing something related to the negative log-likelihood, the trace term should be

   + 0.5 * trace(Omega * nSigma);


If anybody e.g. (@mahendra-mariadassou) can double check/confirm this...

jchiquet commented 1 year ago

Ok, now the following piece of code


# choose data
n <- 100
p <- 2
counts <- matrix(rpois(n*p, c(5,11)), n, p)
covariates <- matrix(1, n, 1)

# Fit PLN on data
data  <- prepare_data(counts, covariates)
model <- PLN(Abundance ~ 1 + offset(log(Offset)), data = data)

# take training data
# get parameters for the VE-step
new_n <- n
ind <- 1:new_n
new_data <- data[ind, , drop=FALSE]
new_responses  <- new_data$Abundance
new_covariates <- matrix(new_data$V1, ncol=1)
new_offsets    <- matrix(rep(log(new_data$Offset), p), ncol = p)
new_weights <- rep(1, new_n)

B <- model$model_par$B
Omega <- model$model_par$Omega
M_init <- model$var_par$M[ind, , drop=FALSE]
S_init <- model$var_par$S[ind, , drop=FALSE]

M_init <- matrix(0  , new_n, p)
S_init <- matrix(0.1, new_n, p)

args <- list(data = list(Y = new_responses, X = new_covariates, O = new_offsets, w = new_weights),
             ## Initialize the variational parameters with the new dimension of the data
             params = list(M = M_init, S = S_init),
             B = as.matrix(B),
             Omega = as.matrix(Omega) ,
             config = PLN_param()$config_optim)
VE <-, args)

mse <- function(a, b) sum((a-b)^2)
print(mse(VE$S**2, S_init**2))
print(mse(VE$M, M_init))
print(mse(VE$Ji, model$loglik_vec[ind]))

is returning, as expected

[1] 0.01935975
[1] 1.701245e-05
[1] 4.389937e-10