Closed cliffckerr closed 7 months ago
Is there a specific generator you have in mind here, or a more general/efficient approach to using any RNG?
Numpy's RNG has much better performance than SciPy, including frozen:
import numpy as np
import sciris as sc
import scipy.stats as sps
reps = int(1e3)
size = int(1e5)
with sc.timer('frozen'):
unif = sps.uniform(loc=4, scale=6)
for i in range(int(reps)):
unif.rvs(size=size)
with sc.timer('regular'):
for i in range(int(reps)):
sps.uniform.rvs(loc=4, scale=6, size=size)
with sc.timer('numpy'):
rng = np.random.default_rng()
for i in range(int(reps)):
rng.uniform(low=4, high=10, size=size)
# frozen: 0.437 s
# regular: 0.436 s
# numpy: 0.267 s
Code for testing lognormal distributions:
import sciris as sc
import numpy as np
import pylab as pl
import scipy.stats as sps
ss2 = sc.importbypath('/home/cliffk/idm/starsim2/starsim') # Version 0.2.8
ss1 = sc.importbypath('/home/cliffk/idm/starsim/starsim', 'ss1') # Version 0.3.0
n = 10000
a = 0.5
b = 0.5
seed = 4
np.random.seed(seed)
s1_ln = ss1.lognorm_o(a, b, seed=seed, strict=False)
s1_ln2 = ss1.lognorm_u(a, b, seed=seed, strict=False)
s2_ln = ss2.lognorm(a, b)
s2_ln2 = ss2.lognorm_mean(a,b)
np_ln = np.random.default_rng()
sp_ln = sps.lognorm(a, np.exp(b))
mapping = sc.objdict({
's1: lognorm_o': s1_ln.rvs(n),
's1: lognorm_u': s1_ln2.rvs(n),
's2: lognorm': s2_ln.rvs(n),
's2: lognorm_mean': s2_ln2.rvs(n),
'np: lognormal': np_ln.lognormal(a, b, size=n),
'sps: lognorm': sp_ln.rvs(n),
})
sc.options(dpi=200)
for i,key,val in mapping.enumitems():
pl.subplot(3,2,i+1)
pl.hist(val, bins=101)
pl.title(key + f' {val.mean():n}')
pl.xlim(left=0, right=50)
sc.figlayout()
pl.show()
np.random.random()
is 2.5x faster than np.random.binomial()
and 3.8x faster than sps.bernoulli()
:
import numpy as np
import scipy.stats as sps
import sciris as sc
reps = range(10_000)
size = 10_000
p = 0.1
rng = np.random.default_rng()
ber = sps.bernoulli(p)
T = sc.timer()
for r in reps:
r = rng.binomial(1, p, size=size)
T.tt('binomial')
for r in reps:
r = rng.random(size=size) < p
T.tt('random')
for r in reps:
r = ber.rvs(size=size)
T.tt('bernoulli')
t = sc.objdict(T.timings)
print('binomial/random:', t.binomial/t.random)
print('bernoulli/random:', t.bernoulli/t.random)
binomial: 0.594 s
random: 0.240 s
bernoulli: 0.911 s
binomial/random: 2.477583347658078
bernoulli/random: 3.7954794086347907
Code for checking on which timestep two sims diverge:
import sciris as sc
import starsim as ss
pars = dict(diseases='sir', networks='embedding', n_agents=2000, verbose=0, rand_seed=1)
s1 = ss.Sim(pars, label='s1')
s1.initialize()
s2 = sc.dcp(s1)
s2.label = 's2'
def check_states(s1, s2, die=False):
d1 = s1.dists.dists
d2 = s2.dists.dists
keys = d1.keys()
matches = sc.objdict()
mismatches = sc.objdict()
for key in keys:
d1s = d1[key].state
d2s = d2[key].state
match = d1s == d2s
assert id(d1s) != id(d2s)
matches[key] = match
if not match:
mismatches[key] = sc.objdict(s1=d1s, s2=d2s)
if die: assert len(mismatches) == 0
return matches, mismatches
matches, mismatches = check_states(s1, s2, die=True)
for i in range(10):
sc.heading('Timestep', i)
matches, mismatches = check_states(s1, s2)
s1.step()
s2.step()
matches, mismatches = check_states(s1, s2)
out = ss.diff_sims(s1, s2, full=True, output=True)
print(out)
# assert np.allclose(s1.summary[:], s2.summary[:], rtol=0, atol=0, equal_nan=True)
Closed by #392
Hash & cache is actually slower than generating new random numbers:
import numpy as np
import sciris as sc
import numba as nb
@nb.njit
def make_rand1(n):
return np.random.random(n)
@nb.njit
def make_rand2(n):
out = np.empty(n)
for i in range(n):
out[i] = np.random.rand()
return out
# @nb.njit
def make_rand3(rands, n):
return rands[np.arange(n)]
reps = 100
n = int(1e6)
make_rand1(5)
make_rand2(5)
rng = np.random.default_rng()
rands = np.random.random(int(1e7))
with sc.timer('numpy'):
for rep in range(reps):
rng.random(n)
with sc.timer('numba1'):
for rep in range(reps):
make_rand1(n)
with sc.timer('numba2'):
for rep in range(reps):
make_rand2(n)
with sc.timer('numba3'):
for rep in range(reps):
make_rand3(rands, n)
# numpy: 0.225 s
# numba1: 0.249 s
# numba2: 0.251 s
# numba3: 0.258 s
Actually it is about 3x faster, up to 1 million numbers:
import numpy as np
import sciris as sc
import pylab as pl
rng = np.random.default_rng()
reps = 100
rands = rng.random(int(1e8))
nlist = [int(10**n) for n in np.arange(1,7.5,0.5)]
d = dict()
d['x'] = []
d['r1'] = []
d['r2'] = []
T1 = sc.timer()
T2 = sc.timer()
for n in nlist:
print(n)
d['x'].append(n)
T1.tic()
for rep in range(reps):
rng.random(n)
d['r1'].append(T1.tto())
T2.tic()
for rep in range(reps):
rands[np.arange(n)]
d['r2'].append(T2.tto())
d = sc.objdict(d)
sc.options(dpi=200)
pl.loglog(d.x, d.r1, 'o-', label='np.random.rand(n)')
pl.loglog(d.x, d.r2, 'o-', label='rands[np.arange(n)]')
pl.legend()
pl.xlabel('Number of numbers')
pl.ylabel('Time (s)')
for i in range(len(d.x)):
print(f'{d.x[i]:12,}: {d.r1[i]/d.r2[i]:n}')
10: 0.29537
31: 1.19883
100: 1.27228
316: 1.50857
1,000: 2.09693
3,162: 2.69544
10,000: 2.83491
31,622: 2.92843
100,000: 2.79662
316,227: 2.9385
1,000,000: 2.63949
3,162,277: 0.893016
10,000,000: 1.15067
(NB: y-axis is for 100 repeats!)
Nice investigation! In that last example, it looks like the cache-based approach is cheating as the random numbers are pre-computed and all that is required is a look-up, is that right? It seems the pure numpy route is doing more work, actually computing random numbers in the loop. I might be missing something?
I thought this issue was going to be about hash-based random number generators (which do not require any caching), see:
Widynski, Bernard. “Squares: A Fast Counter-Based RNG.” arXiv, March 13, 2022. https://doi.org/10.48550/arXiv.2004.06278.
A lookup is fine as long as the lookup itself is pseudorandomized. For example, if a given sim uses N random numbers, one could imagine generating N random numbers at the beginning of the sim and then just using them sequentially to avoid multiple calls to the generator (not CRN-safe, of course!). But beyond that, if you have a "sufficiently large" number of random numbers, it should be fine to reuse them -- if you use the same random number for infection probability of agent 258 on timestep 3 as death probability for agent 7244 on timestep 29, that shouldn't make any difference to the results, unless the indexing isn't sufficiently pseudorandomized (e.g. agent 258 always gets the 258th 'random' number!).
Open question is whether random number generation is a big enough proportion of overall simulation time to make this worthwhile exploring/implementing -- my sense is that it probably isn't.
Potentially a way to generate random numbers without having to generate extra.
pandas
with more direct/faster approachScipyDistribution
See also https://github.com/amath-idm/starsim/issues/299
New checklist:
test_random.py
test_rngcontainer.py
test_distributions.py
Even newer: