Closed ferrine closed 7 years ago
I figured out what was going on after inspecting source. I had to mask the array manually to get the result.
Can you post the fixed code? MvRandomWalk is something I wanted to try too.
Yes, no problem:) BTW NUTS is slow :D (refer #1126)
import numpy as np
import pandas as pd
import theano.tensor as tt
import pymc3 as pm
from pymc3.distributions.multivariate import get_tau_cov
class MvGaussianRandomWalk(pm.Continuous): # maybe add this neat dist to pymc3?
def __init__(self, mu=0., cov=None, tau=None, init=Flat.dist(),
*args, **kwargs):
super(MvGaussianRandomWalk, self).__init__(*args, **kwargs)
tau, cov = get_tau_cov(mu, tau=tau, cov=cov)
self.tau = tau
self.cov = cov
self.mu = mu
self.init = init
self.mean = 0.
def logp(self, x):
tau = self.tau
mu = self.mu
init = self.init
x_im1 = x[:-1]
x_i = x[1:]
innov_like = pm.MvNormal.dist(mu=x_im1 + mu, tau=tau).logp(x_i)
return init.logp(x[0]) + tt.sum(innov_like)
def covm(sd, corr): # what about adding it as built in function but corr
n_var = sd.tag.test_value.shape[0]
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)
corrm = corr[tri_index]
corrm = tt.fill_diagonal(corrm, 1)
return tt.diag(sd).dot(corrm).dot(tt.diag(sd))
data_with_unobserved = pd.read('data_with_unobserved.csv')
masked = np.ma.MaskedArray(data_with_unobserved, np.isnan(data_with_unobserved))
p = data_with_unobserved.shape[1]
with pm.Model() as model:
corr = LKJCorr('corr', n=3, p=p)
sd = Exponential('sd', lam=1, shape=(p,))
cov = Deterministic('cov', covm(sd, corr))
walk = MvGaussianRandomWalk('walk', cov=cov, observed=masked)
with model:
step = Metropolis() # neat and fast :)
#step = NUTS() # boring and slow :(
trace = sample(10000, step=step)
NUTS is always going to be slower than Metropolis; question is whether its efficient, in terms of effective samples.
sampling is 200 times slower, suppose it is not normal
something is going wrong here, what can cause such behavior?
plt.plot(range(10), data[-20:-10])
plt.plot(range(10, 20), trace['walk_missing'][-5000:].reshape(5000, 5, 10).mean(0).T);
Data looks like
Doesn't your covm
function just have to be something like this?
S = corr + corr.T
d = tt.diag(tt.diagonal(corr))
C = S - d
tt.diag(sd).dot(C).dot(tt.diag(sd))
In any case, yes, this should be a built-in function, mimicking what Stan has in quad_form_diag
.
I cant do S = corr + corr.T
, maybe there is a better solution
>>> corr.tag.test_value
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0.])
BTW there is still a problem, I found that initialization of missing_data matters heavily and it is a good idea to provide test value for it. I also suffer from strange issues, every sampling algorithm tends to converge every variable to a single point, that can be the cause of NUTS slowness.
And the provided above data is wrong, I scaled it along axis 1 instead of axis 0 first time :|
The tri_index
attribute of the LKJ should let you easily reshape it to a triangular
Thanks, didn't notice it
Got it, my time series is multi collinear. I found it in some previous experiments and forgot
That will do it.
Finally, I think it is interesting to see, that works on toy example, but fails on real data:(
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('ggplot')
%matplotlib inline
import pandas as pd
import pymc3 as pm
import numpy as np
from scipy import stats
from scenario import Scenario # will be listed later
cov = np.array(
[[10, -4, 3],
[-4, 3, -1],
[ 3, -1, 7]],
dtype='float32'
)
data = pd.DataFrame(stats.multivariate_normal.rvs(cov=cov, size=100).cumsum(0), columns=['a', 'b', 'c'])
data.plot()
with pm.Model() as model:
# keys translation:
'a' means absolute values,
'd_a' innovations list or constant innovation
t - time horizon for scenario
scenario = Scenario(data, {'d_a': 1}, t=20)
# the was too much work to make initial value be the last seen point:(
scenario.initial_data.plot()
# NUTS - too slow (46 samples while writing)
# Hamiltonian wants all to be a constant
# Metropolis is neat
with model:
step = pm.Metropolis()
trace_met = pm.sample(10000, step=step)
pm.traceplot(trace_met)
plt.suptitle('Metropolis', fontsize=16);
res = scenario.trace_scenario(trace_met,
3) # 3 sigma
fig, ax = plt.subplots(figsize=(16, 9))
res['mean'].plot(ax=ax)
res['upper'][-21:]['b'].plot(ax=ax, c='r')
res['lower'][-21:]['c'].plot(ax=ax, c='g')
res['lower'][-21:]['b'].plot(ax=ax, c='r')
res['upper'][-21:]['c'].plot(ax=ax, c='g')
fig.suptitle('Scenario Metropolis', fontsize=16);
I found this thing to be very interesting and it can be a new one in pm.models
package
brunch add_mvnormalrandomwalk
from pymc3.distributions.timeseries import MvGaussianRandomWalk
import theano.tensor as tt
import pandas as pd
import numpy as np
import pymc3 as pm
class Scenario(pm.Model):
@staticmethod
def corr(lkj):
X = lkj[lkj.distribution.tri_index]
X = tt.fill_diagonal(X, 1)
return X
def __init__(self, data, future=None, t=10, walk_prior=None, **kwargs):
super(Scenario, self).__init__(name=kwargs.get('name', ''))
if future is None:
future = dict()
future = {k:np.asarray(v) for k, v in future.items()}
self.assert_all_zerodim(future)
self.future = future
lens = [v.size for v in future.values()]
if lens:
self.t = max(lens)
else:
self.t = t
self.masked, self.columns = self.get_masked(data, future, t, strict=kwargs.get('strict', True))
if walk_prior is None:
corr_vec = pm.LKJCorr('corr', n=kwargs.get('n', 1), p=self.p)
sd = pm.HalfCauchy('sd', beta=kwargs.get('beta', 1), shape=(self.p,))
cov = tt.diag(sd).dot(self.corr(corr_vec)).dot(tt.diag(sd))
MvGaussianRandomWalk('walk', cov=cov, observed=self.masked)
else:
self.Var('walk', walk_prior, self.masked)
self['walk_missing'].tag.test_value = self.missing_point
@classmethod
def get_masked(cls, data, future, t, strict=True):
keys = set(future.keys())
new_series = list()
for k, series in data.to_dict('series').items():
array = future.get(k, None)
if array is None:
array = future.get('d_{}'.format(k), None)
if array is not None:
isdelta = True
keys.remove('d_{}'.format(k))
else:
isdelta = None
else:
keys.remove(k)
isdelta = False
new_series.append(cls.prolonged(series, array, k, isdelta, t))
if keys and strict:
raise ValueError('Not all future values are used: %s' % keys)
new_df = pd.concat(new_series, 1)
return pm.model.pandas_to_array(new_df), new_df.columns
@property
def p(self):
return len(self.columns)
@staticmethod
def prolonged(series, array, k, isdelta, t):
series = np.array(series)
future = np.array([np.nan] * t)
if array is not None:
if array.size == 1:
future[:] = array
else:
m = min(t,array.size)
future[:m] = array[:m]
if isdelta:
future = future.cumsum() + series[-1]
concated = np.concatenate([series, future])
return pd.Series(concated, name=k)
def trace_scenario(self, trace, t=1):
mean = np.mean(trace['walk_missing'], 0)
sd = np.std(trace['walk_missing'], 0)
mean_df = self.masked.filled()
upper_df = self.masked.filled()
lower_df = self.masked.filled()
mean_df[self.masked.mask.nonzero()] = mean
upper_df[self.masked.mask.nonzero()] = mean + t * sd
lower_df[self.masked.mask.nonzero()] = mean - t * sd
return dict(
lower=pd.DataFrame(lower_df, columns=self.columns),
mean=pd.DataFrame(mean_df, columns=self.columns),
upper=pd.DataFrame(upper_df, columns=self.columns)
)
@staticmethod
def filled_with_last(masked):
masked = masked.copy()
last = iter(masked.filled(np.nan)).__next__()
_mask = iter(masked.mask).__next__()
_means = masked.filled(np.nan).mean(0)
last[_mask] = _means[_mask]
del _mask, _means
new_data = list()
for row, mask in zip(masked.filled(), masked.mask):
row[mask] = last[mask]
new_data.append(row)
last = row
return np.ma.MaskedArray(new_data, masked.mask)
@property
def missing_point(self):
fill = self.filled_with_last(self.masked).data[self.masked.mask.nonzero()]
return fill
@property
def initial_data(self):
return pd.DataFrame(self.filled_with_last(self.masked).data, columns=self.columns)
@staticmethod
def assert_all_zerodim(future):
if not np.all([len(v.shape) == 0 for v in future.values()]):
raise ValueError('future values must be zero dimentional')
else:
pass
Is there any better way to do such inference?
I have strange results on real data and I don't believe them. Here are 3 sigma confidence interval for free, not conditioned missing values. I have a deadline for this task soon:( and have no ideas of how I can get reliable results. @twiecki do you have any thoughts/suggestions of how to finish the task?
Can you post a notebook that runs the whole thing?
This is quite a complex model and I'm not surprised convergence is hard. I would focus on initialization and getting NUTS to run.
I'm thinking about taking differences, sample them and then integrate.
A am doing multivariate time series analysis and got a problem with modelling scenarios. The problem is as follows: 1) suppose you have a multivariate time series with shape (T, p) 2) then you want to know the distribution in periods [T+1:T+10] conditioned on scenario 3) scenario can be described with one observed variable (or distribution over it) in time series while others are unobserved so the dataset looks like:
Distribution I use for modelling dynamics
Model
When running metropolis(NUTS fails on observed even!!) on observed only, I get tight distribution for innovation covariance, sampling with observed fails, I get the following error when plotting trace:
It seems that it sampled constant or something like this
I can't figure out what I'm doing wrong but suppose that my case is supported by pymc3 data_with_unobserved.csv.zip