def make_model(data,num_draws): # 2-val case (e.g. Intelligence) is done by d
partial_dirich = pymc.Dirichlet(name='partial_dirich',theta=[1.,1.,1.]) # k-1 probs stored.
full_dirich = pymc.CompletedDirichlet(name='full_dirich',D=partial_dirich) # last prob stored.
multi = pymc.Multinomial(name='multi',value=data,n=num_draws,p=full_dirich,observed=True)
model = Model([partial_dirich,full_dirich,multi])
return model
def run_mcmc(model, **kwargs):
mcmc = pymc.MCMC(model)
mcmc.sample(**kwargs)
return mcmc
def get_counts(vals): # from 0,1,2,... result to c(0),c(1),... freq-counts.
freqs = defaultdict(int)
for val in vals:
freqs[val] += 1 # alternatively, b[val] = b.get(val,0) + 1, where get(val,0) means
# "return val if it exists in the dict as a key, otherwise 0."
return np.asarray(freqs.values())
def get_samples(experiments, evidence, num_samples=10):
# for n experiments, sample and add the freqs obtained.
ret = []
for i in xrange(experiments):
vals = [float(i['Grade']) for i in bn.randomsample(num_samples,evidence)]
ret.append(get_counts(vals))
return ret
def plot_traces(traces):
# colors = ["#348ABD","#A60628","#884732"]
colors = ['r','b','g']
plt.plot(np.arange(len(traces)),traces[:,:,0],c=colors[0])
plt.plot(np.arange(len(traces)),traces[:,:,1],c=colors[1])
plt.plot(np.arange(len(traces)),traces[:,:,2],c=colors[2])
plt.title("Traces of Posteriors")
plt.show()
def estimate_params(evidence, **kwargs):
experiments = 10
samples = get_samples(experiments, evidence, num_samples=10)
model = make_model(samples, experiments)
mc = run_mcmc(model,**kwargs)
traces = mc.trace('full_dirich')[:]
return [np.mean(traces[:,:,0]), np.mean(traces[:,:,1]), np.mean(traces[:,:,2])], traces
I then ran the following code:
cond_assignments = [['0','0'],['0','1'],['1','0'],['1','1']]
# cond_assignments = [[0,0],[0,1],[1,0],[1,1]]
ret = []
for i,j in cond_assignments:
evidence = dict(Intelligence=i, Difficulty=j)
means, traces = estimate_params(evidence, iter=500, burn=100)
ret.append([[i,j]] + means)
pd.DataFrame(ret, columns=['cond_assignment',0,1,2])
and got the error:
[-----------------100%-----------------] 500 of 500 complete in 0.1 sec<class 'pymc.distributions.Dirichlet'>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-664-23e0d4362f67> in <module>()
4 for i,j in cond_assignments:
5 evidence = dict(Intelligence=i, Difficulty=j)
----> 6 means, traces = estimate_params(evidence, iter=500, burn=100)
7 ret.append([[i,j]] + means)
8 pd.DataFrame(ret, columns=['cond_assignment',0,1,2])
<ipython-input-659-c4ed1fa1bdff> in estimate_params(evidence, **kwargs)
39 experiments = 10
40 samples = get_samples(experiments, evidence, num_samples=10)
---> 41 model = make_model(samples, experiments)
42 mc = run_mcmc(model,**kwargs)
43 traces = mc.trace('full_dirich')[:]
<ipython-input-659-c4ed1fa1bdff> in make_model(data, num_draws)
3 full_dirich = pymc.CompletedDirichlet(name='full_dirich',D=partial_dirich) # last prob stored.
4 print type(partial_dirich)
----> 5 multi = pymc.Multinomial(name='multi',value=data,n=num_draws,p=full_dirich,observed=True)
6 model = Model([partial_dirich,full_dirich,multi])
7 return model
/Users/jacobsw/anaconda2/lib/python2.7/site-packages/pymc/distributions.pyc in __init__(self, name, n, p, trace, value, rseed, observed, cache_depth, plot, verbose, **kwds)
3269 rseed=rseed, observed=observed,
3270 cache_depth=cache_depth, plot=plot,
-> 3271 verbose=verbose, **kwds)
3272
3273
/Users/jacobsw/anaconda2/lib/python2.7/site-packages/pymc/PyMCObjects.pyc in __init__(self, logp, doc, name, parents, random, trace, value, dtype, rseed, observed, cache_depth, plot, verbose, isdata, check_logp, logp_partial_gradients)
749 'Stochastic %s: Failed to cast initial value to required dtype.\n\nOriginal error message:\n' %
750 name + inst.message)
--> 751 six.reraise(cls, new_inst, tb)
752
753 # Store the shape of the stochastic value
/Users/jacobsw/anaconda2/lib/python2.7/site-packages/pymc/PyMCObjects.pyc in __init__(self, logp, doc, name, parents, random, trace, value, dtype, rseed, observed, cache_depth, plot, verbose, isdata, check_logp, logp_partial_gradients)
740 # Convert Pandas DataFrames and Series to numpy arrays
741 value = getattr(value, 'values', value)
--> 742 self._value = np.array(value, dtype=dtype)
743 self._value.flags['W'] = False
744 else:
ValueError: Stochastic multi: Failed to cast initial value to required dtype.
Original error message:
setting an array element with a sequence.
Curiously, it works for one run occasionally, but doesn't when I ran multiple experiments.
Could anybody help with the issue? Appreciate it.
Found the bug freqs = defaultdict(int) causes the samples to have different length sometimes (i.e. some value of variables not sampled). Fixed with a dictionary with preset values).
I am following the book Probabilistic Graphical Model with Python (https://www.packtpub.com/big-data-and-business-intelligence/building-probabilistic-graphical-models-python), trying to run a Bayesian Parameter Estimation. The code for the facility functions is as follows:
I then ran the following code:
and got the error:
Curiously, it works for one run occasionally, but doesn't when I ran multiple experiments. Could anybody help with the issue? Appreciate it.