greta-dev / greta

simple and scalable statistical modelling in R
https://greta-stats.org
Other
539 stars 63 forks source link

update documentation for setting seed in MCMC #736

Open njtierney opened 3 weeks ago

njtierney commented 3 weeks ago

See https://github.com/greta-dev/greta/issues/559

And also update "note" docs in mcmc()

njtierney commented 2 weeks ago

Somewhat confusingly it seems you can use set.seed() or tensorflow::set_random_seed(), and they are usually equivalent to each other, e.g.,

devtools::load_all(".")
#> ℹ Loading greta
#> ℹ Initialising python and checking dependencies, this may take a moment.
#> 
#> ✔ Initialising python and checking dependencies ... done!
a <- normal(0, 1)
y <- normal(a, 1)
m <- model(y)

set.seed(12345)
c_one <- calculate(y, nsim = 1)
set.seed(12345)
c_two <- calculate(y, nsim = 1)
as.numeric(c_one)
#> [1] 1.236408
as.numeric(c_two)
#> [1] 1.236408
all.equal(as.numeric(c_one), as.numeric(c_two))
#> [1] TRUE

c_one <- calculate(y, nsim = 1, seed = 12345)
c_two <- calculate(y, nsim = 1, seed = 12345)
as.numeric(c_one)
#> [1] -1.84305
as.numeric(c_two)
#> [1] -1.84305

all.equal(as.numeric(c_one), as.numeric(c_two))
#> [1] TRUE

set.seed(12345)
one <- mcmc(m, n_samples = 1, chains = 1)
#>     warmup                                           0/1000 | eta:  ?s              warmup ==                                       50/1000 | eta: 13s              warmup ====                                    100/1000 | eta:  7s              warmup ======                                  150/1000 | eta:  5s              warmup ========                                200/1000 | eta:  4s              warmup ==========                              250/1000 | eta:  3s              warmup ===========                             300/1000 | eta:  2s              warmup =============                           350/1000 | eta:  2s              warmup ===============                         400/1000 | eta:  2s              warmup =================                       450/1000 | eta:  2s              warmup ===================                     500/1000 | eta:  1s              warmup =====================                   550/1000 | eta:  1s              warmup =======================                 600/1000 | eta:  1s              warmup =========================               650/1000 | eta:  1s              warmup ===========================             700/1000 | eta:  1s              warmup ============================            750/1000 | eta:  1s              warmup ==============================          800/1000 | eta:  0s              warmup ================================        850/1000 | eta:  0s              warmup ==================================      900/1000 | eta:  0s              warmup ====================================    950/1000 | eta:  0s              warmup ====================================== 1000/1000 | eta:  0s          
#>   sampling                                              0/1 | eta:  ?s
set.seed(12345)
two <- mcmc(m, n_samples = 1, chains = 1)
#>     warmup                                           0/1000 | eta:  ?s              warmup ==                                       50/1000 | eta: 10s              warmup ====                                    100/1000 | eta:  5s              warmup ======                                  150/1000 | eta:  4s              warmup ========                                200/1000 | eta:  3s              warmup ==========                              250/1000 | eta:  2s              warmup ===========                             300/1000 | eta:  2s              warmup =============                           350/1000 | eta:  2s              warmup ===============                         400/1000 | eta:  2s              warmup =================                       450/1000 | eta:  1s              warmup ===================                     500/1000 | eta:  1s              warmup =====================                   550/1000 | eta:  1s              warmup =======================                 600/1000 | eta:  1s              warmup =========================               650/1000 | eta:  1s              warmup ===========================             700/1000 | eta:  1s              warmup ============================            750/1000 | eta:  1s              warmup ==============================          800/1000 | eta:  0s              warmup ================================        850/1000 | eta:  0s              warmup ==================================      900/1000 | eta:  0s              warmup ====================================    950/1000 | eta:  0s              warmup ====================================== 1000/1000 | eta:  0s          
#>   sampling                                              0/1 | eta:  ?s
as.numeric(one)
#> [1] 1.609063
as.numeric(two)
#> [1] 1.609063

all.equal(as.numeric(one), as.numeric(two))
#> [1] TRUE

tensorflow::set_random_seed(12345)
one_tf <- mcmc(m, n_samples = 1, chains = 1)
#>     warmup                                           0/1000 | eta:  ?s              warmup ==                                       50/1000 | eta: 10s              warmup ====                                    100/1000 | eta:  6s              warmup ======                                  150/1000 | eta:  4s              warmup ========                                200/1000 | eta:  3s              warmup ==========                              250/1000 | eta:  3s              warmup ===========                             300/1000 | eta:  2s              warmup =============                           350/1000 | eta:  2s              warmup ===============                         400/1000 | eta:  2s              warmup =================                       450/1000 | eta:  1s              warmup ===================                     500/1000 | eta:  1s              warmup =====================                   550/1000 | eta:  1s              warmup =======================                 600/1000 | eta:  1s              warmup =========================               650/1000 | eta:  1s              warmup ===========================             700/1000 | eta:  1s              warmup ============================            750/1000 | eta:  1s              warmup ==============================          800/1000 | eta:  0s              warmup ================================        850/1000 | eta:  0s              warmup ==================================      900/1000 | eta:  0s              warmup ====================================    950/1000 | eta:  0s              warmup ====================================== 1000/1000 | eta:  0s          
#>   sampling                                              0/1 | eta:  ?s
tensorflow::set_random_seed(12345)
two_tf <- mcmc(m, n_samples = 1, chains = 1)
#>     warmup                                           0/1000 | eta:  ?s              warmup ==                                       50/1000 | eta: 10s              warmup ====                                    100/1000 | eta:  5s              warmup ======                                  150/1000 | eta:  4s              warmup ========                                200/1000 | eta:  3s              warmup ==========                              250/1000 | eta:  2s              warmup ===========                             300/1000 | eta:  2s              warmup =============                           350/1000 | eta:  2s              warmup ===============                         400/1000 | eta:  2s              warmup =================                       450/1000 | eta:  1s              warmup ===================                     500/1000 | eta:  1s              warmup =====================                   550/1000 | eta:  1s              warmup =======================                 600/1000 | eta:  1s              warmup =========================               650/1000 | eta:  1s              warmup ===========================             700/1000 | eta:  1s              warmup ============================            750/1000 | eta:  1s              warmup ==============================          800/1000 | eta:  0s              warmup ================================        850/1000 | eta:  0s              warmup ==================================      900/1000 | eta:  0s              warmup ====================================    950/1000 | eta:  0s              warmup ====================================== 1000/1000 | eta:  0s          
#>   sampling                                              0/1 | eta:  ?s
as.numeric(one_tf)
#> [1] 1.609063
as.numeric(two_tf)
#> [1] 1.609063

all.equal(as.numeric(one_tf), as.numeric(two_tf))
#> [1] TRUE

Created on 2024-11-07 with reprex v2.1.1

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.4.1 Patched (2024-07-08 r86915) #> os macOS Sonoma 14.5 #> system aarch64, darwin20 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Australia/Hobart #> date 2024-11-07 #> pandoc 3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> ! package * version date (UTC) lib source #> abind 1.4-5 2016-07-21 [1] CRAN (R 4.4.0) #> backports 1.5.0 2024-05-23 [1] CRAN (R 4.4.0) #> base64enc 0.1-3 2015-07-28 [1] CRAN (R 4.4.0) #> brio 1.1.5 2024-04-24 [1] CRAN (R 4.4.0) #> cachem 1.1.0 2024-05-16 [1] CRAN (R 4.4.0) #> callr 3.7.6 2024-03-25 [1] CRAN (R 4.4.0) #> cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.0) #> coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.4.0) #> codetools 0.2-20 2024-03-31 [2] CRAN (R 4.4.1) #> crayon 1.5.3 2024-06-20 [1] CRAN (R 4.4.0) #> desc 1.4.3 2023-12-10 [1] CRAN (R 4.4.0) #> devtools 2.4.5 2022-10-11 [1] CRAN (R 4.4.0) #> digest 0.6.37 2024-08-19 [1] CRAN (R 4.4.1) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.4.0) #> evaluate 1.0.1 2024-10-10 [1] CRAN (R 4.4.1) #> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0) #> fs 1.6.5 2024-10-30 [1] CRAN (R 4.4.1) #> future 1.34.0 2024-07-29 [1] CRAN (R 4.4.0) #> globals 0.16.3 2024-03-08 [1] CRAN (R 4.4.0) #> glue 1.8.0 2024-09-30 [1] CRAN (R 4.4.1) #> P greta * 0.5.0 2024-11-06 [?] load_all() #> hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0) #> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0) #> htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.4.0) #> httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.4.0) #> jsonlite 1.8.9 2024-09-20 [1] CRAN (R 4.4.1) #> knitr 1.48 2024-07-07 [1] CRAN (R 4.4.0) #> later 1.3.2 2023-12-06 [1] CRAN (R 4.4.0) #> lattice 0.22-6 2024-03-20 [2] CRAN (R 4.4.1) #> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0) #> listenv 0.9.1 2024-01-29 [1] CRAN (R 4.4.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0) #> Matrix 1.7-0 2024-04-26 [2] CRAN (R 4.4.1) #> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.4.0) #> mime 0.12 2021-09-28 [1] CRAN (R 4.4.0) #> miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.4.0) #> parallelly 1.38.0 2024-07-27 [1] CRAN (R 4.4.0) #> pkgbuild 1.4.5 2024-10-28 [1] CRAN (R 4.4.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0) #> pkgload 1.4.0 2024-06-28 [1] CRAN (R 4.4.0) #> png 0.1-8 2022-11-29 [1] CRAN (R 4.4.0) #> prettyunits 1.2.0 2023-09-24 [1] CRAN (R 4.4.0) #> processx 3.8.4 2024-03-16 [1] CRAN (R 4.4.0) #> profvis 0.3.8 2023-05-02 [1] CRAN (R 4.4.0) #> progress 1.2.3 2023-12-06 [1] CRAN (R 4.4.0) #> promises 1.3.0 2024-04-05 [1] CRAN (R 4.4.0) #> ps 1.8.1 2024-10-28 [1] CRAN (R 4.4.1) #> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.4.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0) #> Rcpp 1.0.13-1 2024-11-02 [1] CRAN (R 4.4.1) #> remotes 2.5.0 2024-03-17 [1] CRAN (R 4.4.0) #> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.4.0) #> reticulate 1.38.0 2024-06-19 [1] CRAN (R 4.4.0) #> rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.0) #> rmarkdown 2.28 2024-08-17 [1] CRAN (R 4.4.0) #> rprojroot 2.0.4 2023-11-05 [1] CRAN (R 4.4.0) #> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0) #> shiny 1.9.1 2024-08-01 [1] CRAN (R 4.4.0) #> stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0) #> stringr 1.5.1 2023-11-14 [1] CRAN (R 4.4.0) #> tensorflow 2.16.0 2024-04-15 [1] CRAN (R 4.4.0) #> testthat * 3.2.1.1 2024-04-14 [1] CRAN (R 4.4.0) #> tfautograph 0.3.2 2021-09-17 [1] CRAN (R 4.4.0) #> tfruns 1.5.3 2024-04-19 [1] CRAN (R 4.4.0) #> urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.4.0) #> usethis 3.0.0 2024-07-29 [1] CRAN (R 4.4.0) #> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0) #> whisker 0.4.1 2022-12-05 [1] CRAN (R 4.4.0) #> withr 3.0.2 2024-10-28 [1] CRAN (R 4.4.1) #> xfun 0.49 2024-10-31 [1] CRAN (R 4.4.1) #> xtable 1.8-4 2019-04-21 [1] CRAN (R 4.4.0) #> yaml 2.3.10 2024-07-26 [1] CRAN (R 4.4.0) #> yesno 0.1.3 2024-07-26 [1] CRAN (R 4.4.0) #> #> [1] /Users/nick/Library/R/arm64/4.4/library #> [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library #> #> P ── Loaded and on-disk path mismatch. #> #> ─ Python configuration ─────────────────────────────────────────────────────── #> python: /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/bin/python #> libpython: /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/libpython3.10.dylib #> pythonhome: /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2:/Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2 #> version: 3.10.14 | packaged by conda-forge | (main, Mar 20 2024, 12:51:49) [Clang 16.0.6 ] #> numpy: /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/python3.10/site-packages/numpy #> numpy_version: 1.26.4 #> tensorflow: /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/python3.10/site-packages/tensorflow #> #> NOTE: Python version was forced by use_python() function #> #> ────────────────────────────────────────────────────────────────────────────── ```

However when writing a test for this in test_seed.R, running this locally I get the same for both TF set seed, and set.seed, but running it as a test I do not...something to come back to perhaps.


test_that("mcmc samples are the same when the R seed is the same, also with tf set seed", {
  skip_if_not(check_tf_version())
  a <- normal(0, 1)
  y <- normal(a, 1)
  m <- model(y)

  set.seed(12345)
  one <- mcmc(m, warmup = 10, n_samples = 1, chains = 1)
  set.seed(12345)
  two <- mcmc(m, warmup = 10, n_samples = 1, chains = 1)

  expect_equal(
    as.numeric(one),
    as.numeric(two)
    )

  tensorflow::set_random_seed(12345)
  one_tf <- mcmc(m, warmup = 10, n_samples = 1, chains = 1)
  tensorflow::set_random_seed(12345)
  two_tf <- mcmc(m, warmup = 10, n_samples = 1, chains = 1)

  expect_equal(
    as.numeric(one_tf),
    as.numeric(two_tf)
  )

  # this fails
  expect_equal(as.numeric(one, one_tf))
  expect_equal(as.numeric(two, two_tf))

# this doesn't
  mcmc_matches_tf_one <- identical(as.numeric(one),as.numeric(one_tf))
  mcmc_matches_tf_two <- identical(as.numeric(two),as.numeric(two_tf))

  expect_false(mcmc_matches_tf_one)

  expect_false(mcmc_matches_tf_two)

})