modal-inria / MixtComp

Model-based clustering package for mixed data
Other
12 stars 4 forks source link

MC_DETERMINISTIC (RNG seed) behaviour #6

Open Quentin62 opened 3 years ago

Quentin62 commented 3 years ago

Not really a bug, but something that must be known for replicability: the seed for rng (set with the MC_DETERMINISTIC environment variable) is valid per session. So, if you run mixtCompLearn several times with the same seed inside the same R session, you will have different results:

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn1 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn2 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn1$mixture$lnObservedLikelihood
# [1] -1036.835
resLearn2$mixture$lnObservedLikelihood
# [1] -1040.155

If you restart R between 2 runs, you will have the same results

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn1 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn1$mixture$lnObservedLikelihood
# [1] -1036.835

# If we close R and restart a new session

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn1 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn1$mixture$lnObservedLikelihood
# [1] -1036.835

If you change the seed inside a session, it has no effect:

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn1 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

Sys.setenv(MC_DETERMINISTIC = 50)
resLearn2 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn1$mixture$lnObservedLikelihood
# [1] -1036.835
resLearn2$mixture$lnObservedLikelihood
# [1] -1040.155  # the same second results as previously. Changing the seed inside the same session has no effect  

If we start a new session and run with a randomSeed of 50

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

Sys.setenv(MC_DETERMINISTIC = 50)
resLearn2 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn2$mixture$lnObservedLikelihood
# [1] -1036.879

If you run without a seed then with a seed inside the same r session, the seed has no effect

library(RMixtComp)

data(simData)

algo <- list(nbBurnInIter = 50, nbIter = 50, nbGibbsBurnInIter = 50,
             nbGibbsIter = 50,  nInitPerClass = 20, nSemTry = 20, confidenceLevel = 0.95)

resLearn1 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)

Sys.setenv(MC_DETERMINISTIC = 42)
resLearn2 <- mixtCompLearn(simData$dataLearn$matrix, simData$model$unsupervised[1:3], algo,
                           nClass = 1:2, nRun = 1, nCore = 1)
Sys.unsetenv("MC_DETERMINISTIC") 

resLearn1$mixture$lnObservedLikelihood
# [1] -1037.089
resLearn2$mixture$lnObservedLikelihood
# [1] -1037.915 # not the results associated with seed 42

It is because the seed is managed in C++ with a static variable: https://github.com/modal-inria/MixtComp/blob/master/MixtComp/src/lib/Statistic/RNG.h