HenrikBengtsson / doFuture

:rocket: R package: doFuture - Use Foreach to Parallelize via Future Framework
https://doFuture.futureverse.org
84 stars 6 forks source link

BUG(?): Nested strategies does not work with nested foreach() %dopar% {} calls #4

Closed HenrikBengtsson closed 8 years ago

HenrikBengtsson commented 8 years ago

Hmm... there might be a bug in doFuture preventing it from properly nesting future strategies. I'm not sure, it needs to be investigated further, but below is what I get when testing on a TORQUE / PBS cluster.

This was first reported by @alexvorobiev in Issue https://github.com/HenrikBengtsson/future/issues/95.

Nested futures work as expected

library("future")
library("future.BatchJobs")

plan(list(
  tweak(batchjobs_torque, resources = list(nodes="1:ppn=4")),
  tweak(multiprocess)
))

outer %<-% {
  info <- list(
    available = availableCores(),
    all       = availableCores(which="all"),
    pid       = Sys.getpid()
  )

  inner1 %<-% {
    Sys.sleep(10)
    list(
      available = availableCores(),
      all       = availableCores(which="all"),
      pid       = Sys.getpid()
    )
  }

  inner2 %<-% {
    Sys.sleep(10)
    list(
      available = availableCores(),
      all       = availableCores(which="all"),
      pid       = Sys.getpid()
    )
  }

  c(outer=info, inner1=inner1, inner2=inner2)
}

outputs

List of 9
 $ outer.available  : Named int 4
  ..- attr(*, "names")= chr "PBS"
 $ outer.all        : Named int [1:2] 64 4
  ..- attr(*, "names")= chr [1:2] "system" "PBS"
 $ outer.pid        : int 461160
 $ inner1.available: Named int 1
  ..- attr(*, "names")= chr "mc.cores+1"
 $ inner1.all      : Named int [1:4] 64 1 1 4
  ..- attr(*, "names")= chr [1:4] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_" "PBS"
 $ inner1.pid      : int 461378
 $ inner2.available: Named int 1
  ..- attr(*, "names")= chr "mc.cores+1"
 $ inner2.all      : Named int [1:4] 64 1 1 4
  ..- attr(*, "names")= chr [1:4] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_" "PBS"
 $ inner2.pid      : int 461384

Nested futures using foreach (not correct)

library("doFuture")
library("future.BatchJobs")
registerDoFuture()

plan(list(
  tweak(batchjobs_torque, resources = list(nodes="1:ppn=4")),
  tweak(multiprocess)
))

outer <- foreach(ii = 1L) %dopar% {
  info <- list(
    available = availableCores(),
    all       = availableCores(which="all"),
    pid       = Sys.getpid()
  )

  inner <- foreach(jj = 1:2) %dopar% {
    Sys.sleep(10)
    list(
      available = availableCores(),
      all       = availableCores(which="all"),
      pid       = Sys.getpid()
    )
  }

  c(outer=info, inner=inner)
}
> str(outer)
List of 1
 $ :List of 5
  ..$ outer.available: Named int 4
  .. ..- attr(*, "names")= chr "PBS"
  ..$ outer.all      : Named int [1:2] 64 4
  .. ..- attr(*, "names")= chr [1:2] "system" "PBS"
  ..$ outer.pid      : int 493503
  ..$ inner1         :List of 3
  .. ..$ available: Named int 4
  .. .. ..- attr(*, "names")= chr "PBS"
  .. ..$ all      : Named int [1:2] 64 4
  .. .. ..- attr(*, "names")= chr [1:2] "system" "PBS"
  .. ..$ pid      : int 493503
  ..$ inner2         :List of 3
  .. ..$ available: Named int 4
  .. .. ..- attr(*, "names")= chr "PBS"
  .. ..$ all      : Named int [1:2] 64 4
  .. .. ..- attr(*, "names")= chr [1:2] "system" "PBS"
  .. ..$ pid      : int 493503

Session info

> sessionInfo()
R Under development (unstable) (2016-09-21 r71333)
Platform: x86_64-pc-linux-gnu (64-bit)

locale:
[1] C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] doFuture_0.2.1          iterators_1.0.8         foreach_1.4.3          
[4] BatchJobs_1.6           BBmisc_1.10             future.BatchJobs_0.13.0
[7] future_1.0.1           

loaded via a namespace (and not attached):
 [1] codetools_0.2-14   listenv_0.6.0      digest_0.6.10      R.methodsS3_1.7.1 
 [5] DBI_0.5-1          backports_1.0.3    magrittr_1.5       RSQLite_1.0.0     
 [9] stringi_1.1.1      R.oo_1.20.0        R.utils_2.3.0-9000 brew_1.0-6        
[13] checkmate_1.8.1    tools_3.4.0        stringr_1.1.0      compiler_3.4.0    
[17] parallel_3.4.0     sendmailR_1.2-1    base64enc_0.1-3    globals_0.7.0     
[21] fail_1.3
alexvorobiev commented 8 years ago

Running the second example under my Slurm setup produces similar output, same pid for everything:

> outer
[[1]]
[[1]]$outer.available
Slurm 
    4 

[[1]]$outer.all
system  Slurm 
    48      4 

[[1]]$outer.pid
[1] 21863

[[1]]$inner1
[[1]]$inner1$available
Slurm 
    4 

[[1]]$inner1$all
system  Slurm 
    48      4 

[[1]]$inner1$pid
[1] 21863

[[1]]$inner2
[[1]]$inner2$available
Slurm 
    4 

[[1]]$inner2$all
system  Slurm 
    48      4 

[[1]]$inner2$pid
[1] 21863

>
HenrikBengtsson commented 8 years ago

Below seems to be a reproducible example that doesn't involved future.BatchJobs:

library("doFuture")
registerDoFuture()

plan(list(
  tweak(multisession, workers = 2L),
  tweak(multisession, workers = 3L)
))

outer <- foreach(ii = 1L) %dopar% {
  info <- list(
    available = availableCores(),
    all       = availableCores(which="all"),
    pid       = Sys.getpid()
  )

  inner <- foreach(jj = 1:2) %dopar% {
    Sys.sleep(2)
    list(
      available = availableCores(),
      all       = availableCores(which="all"),
      pid       = Sys.getpid()
    )
  }

  c(outer=info, inner=inner)
}

with output:

> str(outer)
List of 1
 $ :List of 5
  ..$ outer.available: Named int 1
  .. ..- attr(*, "names")= chr "mc.cores+1"
  ..$ outer.all      : Named int [1:3] 4 1 1
  .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  ..$ outer.pid      : int 7805
  ..$ inner1         :List of 3
  .. ..$ available: Named int 1
  .. .. ..- attr(*, "names")= chr "mc.cores+1"
  .. ..$ all      : Named int [1:3] 4 1 1
  .. .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  .. ..$ pid      : int 7805
  ..$ inner2         :List of 3
  .. ..$ available: Named int 1
  .. .. ..- attr(*, "names")= chr "mc.cores+1"
  .. ..$ all      : Named int [1:3] 4 1 1
  .. .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  .. ..$ pid      : int 7805
> Sys.getpid()
[1] 7775
HenrikBengtsson commented 8 years ago

Ah... I realized why; one needs to get the foreach package to use futures also in the nested calls, i.e. specify registerDoFuture() recursively. This should be done internally by doFuture, but a workaround until that is in place is to do it manually as in:

library("doFuture")
registerDoFuture()

plan(list(
  tweak(multisession, workers = 2L),
  tweak(multisession, workers = 3L)
))

outer <- foreach(ii = 1L) %dopar% {
  registerDoFuture()  ## FIX

  info <- list(
    available = availableCores(),
    all       = availableCores(which="all"),
    pid       = Sys.getpid()
  )

  inner <- foreach(jj = 1:2) %dopar% {
    Sys.sleep(2)
    list(
      available = availableCores(),
      all       = availableCores(which="all"),
      pid       = Sys.getpid()
    )
  }

  c(outer=info, inner=inner)
}

which outputs

> str(outer)
List of 1
 $ :List of 5
  ..$ outer.available: Named int 1
  .. ..- attr(*, "names")= chr "mc.cores+1"
  ..$ outer.all      : Named int [1:3] 4 1 1
  .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  ..$ outer.pid      : int 23346
  ..$ inner1         :List of 3
  .. ..$ available: Named int 1
  .. .. ..- attr(*, "names")= chr "mc.cores+1"
  .. ..$ all      : Named int [1:3] 4 1 1
  .. .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  .. ..$ pid      : int 23370
  ..$ inner2         :List of 3
  .. ..$ available: Named int 1
  .. .. ..- attr(*, "names")= chr "mc.cores+1"
  .. ..$ all      : Named int [1:3] 4 1 1
  .. .. ..- attr(*, "names")= chr [1:3] "system" "mc.cores+1" "_R_CHECK_LIMIT_CORES_"
  .. ..$ pid      : int 23379
HenrikBengtsson commented 8 years ago

Solved in the develop branch;

source('http://callr.org/install#HenrikBengtsson/doFuture@develop')
alexvorobiev commented 8 years ago

Thanks for the fix, I will definitely try it! When are you planning to merge it to master?

HenrikBengtsson commented 8 years ago

My branch model follows the git flow ideas, so for me master == latest (CRAN) release.

In order for next doFuture to be submitted to CRAN, future (>= 1.1.0) had to reach CRAN first, which happen a week ago, so doFuture can now be submitted. However, would mind taking the develop branch for a serious ride on your end before submitting?

HenrikBengtsson commented 8 years ago

doFuture 0.3.0 is now on CRAN.

alexvorobiev commented 8 years ago

Sorry I wasn't able to test the development branch, I am testing the new release now.