When using dynesty with multiprocessing and fast likelihood function but a lot of data it is easy to be hit by significant pickling overhead which defeats the purpose of the multiprocessing.
It is easy to get around that with the simple wrapper that avoids sending the args/kwargs and functions over and over.
In the toy example below this speeds up things by a factor of 100 or so (try caching=True/False)
import dynesty
import numpy as np
from numpy import linalg
import multiprocessing as mp
import xpool
import time
class Model:
def __init__(self, s=(1000, 1000)):
self.ndim = 2
self.arr = np.zeros(s)
self.C2 = np.identity(self.ndim)
self.Cinv2 = linalg.inv(self.C2)
self.lnorm2 = -0.5 * (np.log(2 * np.pi) * self.ndim +
np.log(linalg.det(self.C2)))
def __call__(self, x):
"""Multivariate normal log-likelihood."""
time.sleep(0.0001)
return -0.5 * np.dot(x, np.dot(self.Cinv2, x)) + self.lnorm2
# prior transform
def prior_transform(self, u):
return 10. * (2. * u - 1.)
if __name__ == '__main__':
maxiter = 2000
M = Model()
caching = True
nthreads = 12
prior_transform = M.prior_transform
like = M
rstate = np.random.default_rng(1)
if caching:
with xpool.Pool(nthreads, like, prior_transform) as pool:
dsampler2 = dynesty.DynamicNestedSampler(pool.like,
pool.prior_transform,
nlive=50,
ndim=M.ndim,
bound='single',
sample='rslice',
rstate=rstate,
pool=pool)
dsampler2.run_nested(maxiter=maxiter, use_stop=False)
else:
if nthreads > 1:
pool = mp.Pool(nthreads)
else:
pool = None
nthreads = None
dsampler2 = dynesty.DynamicNestedSampler(like,
prior_transform,
nlive=50,
ndim=M.ndim,
bound='single',
sample='rslice',
rstate=rstate,
pool=pool,
queue_size=nthreads)
dsampler2.run_nested(maxiter=maxiter, use_stop=False)
When using dynesty with multiprocessing and fast likelihood function but a lot of data it is easy to be hit by significant pickling overhead which defeats the purpose of the multiprocessing. It is easy to get around that with the simple wrapper that avoids sending the args/kwargs and functions over and over.
In the toy example below this speeds up things by a factor of 100 or so (try caching=True/False)
Here's the custom Pool wrapper
I am wondering if it worth including as dynesty.pool or something.