IQSS / Amelia

Amelia: A Package for Missing Data
http://gking.harvard.edu/amelia
61 stars 17 forks source link

reproducibility with parallel processing #21

Open Aariq opened 4 years ago

Aariq commented 4 years ago

set.seed() does not seem to work when using parallel = "multicore". I assume that's because there's no way to pass the seed onto the parallel jobs. I'm not sure if this is a bug or simply a limitation of using parallel processing with Amelia.

library(Amelia)
#> Warning: package 'Amelia' was built under R version 4.0.2
#> Loading required package: Rcpp
#> ## 
#> ## Amelia II: Multiple Imputation
#> ## (Version 1.7.6, built: 2019-11-24)
#> ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
#> ## Refer to http://gking.harvard.edu/amelia/ for more information
#> ##
library(parallel)
data(africa)

# Reproducible:
set.seed(123)
a.out1 <- amelia(x = africa, cs = "country", ts = "year", logs = "gdp_pc", p2s = 0)

set.seed(123)
a.out2 <- amelia(x = africa, cs = "country", ts = "year", logs = "gdp_pc", p2s = 0)

## original
africa[38:42, ]
#>    year  country gdp_pc  infl trade    civlib population
#> 38 1989  Burundi    532 11.66    NA 0.1666667    5330730
#> 39 1990  Burundi    550  7.00    NA 0.1666667    5487000
#> 40 1991  Burundi    560  9.00 38.42 0.1666667    5643320
#> 41 1972 Cameroon    815  8.09 46.48 0.5000000    6835870
#> 42 1973 Cameroon     NA 10.38    NA 0.5000000    7021850

## run 1
a.out1$imputations[[1]][38:42, ]
#>    year  country   gdp_pc  infl    trade    civlib population
#> 38 1989  Burundi  532.000 11.66 34.01444 0.1666667    5330730
#> 39 1990  Burundi  550.000  7.00 28.77401 0.1666667    5487000
#> 40 1991  Burundi  560.000  9.00 38.42000 0.1666667    5643320
#> 41 1972 Cameroon  815.000  8.09 46.48000 0.5000000    6835870
#> 42 1973 Cameroon 1534.801 10.38 85.77617 0.5000000    7021850

## run 2
a.out2$imputations[[1]][38:42, ]
#>    year  country   gdp_pc  infl    trade    civlib population
#> 38 1989  Burundi  532.000 11.66 34.01444 0.1666667    5330730
#> 39 1990  Burundi  550.000  7.00 28.77401 0.1666667    5487000
#> 40 1991  Burundi  560.000  9.00 38.42000 0.1666667    5643320
#> 41 1972 Cameroon  815.000  8.09 46.48000 0.5000000    6835870
#> 42 1973 Cameroon 1534.801 10.38 85.77617 0.5000000    7021850

# Not Reproducible:
set.seed(123)
a.out1 <- amelia(x = africa, cs = "country", ts = "year", logs = "gdp_pc", p2s = 0, parallel = "multicore", ncpus = detectCores() - 1)

set.seed(123)
a.out2 <- amelia(x = africa, cs = "country", ts = "year", logs = "gdp_pc", p2s = 0, parallel = "multicore", ncpus = detectCores() - 1)

## run 1
a.out1$imputations[[1]][38:42, ]
#>    year  country  gdp_pc  infl    trade    civlib population
#> 38 1989  Burundi 532.000 11.66 41.76351 0.1666667    5330730
#> 39 1990  Burundi 550.000  7.00 64.16109 0.1666667    5487000
#> 40 1991  Burundi 560.000  9.00 38.42000 0.1666667    5643320
#> 41 1972 Cameroon 815.000  8.09 46.48000 0.5000000    6835870
#> 42 1973 Cameroon 871.101 10.38 64.33208 0.5000000    7021850

## run 2
a.out2$imputations[[1]][38:42, ]
#>    year  country   gdp_pc  infl    trade    civlib population
#> 38 1989  Burundi 532.0000 11.66 40.37939 0.1666667    5330730
#> 39 1990  Burundi 550.0000  7.00 25.26368 0.1666667    5487000
#> 40 1991  Burundi 560.0000  9.00 38.42000 0.1666667    5643320
#> 41 1972 Cameroon 815.0000  8.09 46.48000 0.5000000    6835870
#> 42 1973 Cameroon 570.4258 10.38 48.06267 0.5000000    7021850

Created on 2020-08-17 by the reprex package (v0.3.0)