quantopian / pyfolio

Portfolio and risk analytics in Python
https://quantopian.github.io/pyfolio
Apache License 2.0
5.72k stars 1.78k forks source link

Bayesian tear sheet; pandas version problem #572

Open marketneutral opened 6 years ago

marketneutral commented 6 years ago

I am having trouble running the Bayesian tear sheet. Theano fails with a "Compilation failed (return status=1)". I can make a minimal repro of the exception as follows (I am filing this here and not in the PyMC3 repo b/c pyfolio requires PyMC3==3.1 which is quite old):

I set up the basic environment which is consistent with pyfolio requirements:

conda create -n testenv -y python=3.5 numpy=1.11.3 pandas=0.18.1 scipy=0.17.1 libgfortran=3.0 mkl-service pymc3=3.1 scikit-learn pip
export MKL_THREADING_LAYER=GNU
source activate testenv

Following the most basic example of PyMC3 from the docs:

import pymc3 as pm

if __name__=='__main__':

    print('Running on PyMC3 v{}'.format(pm.__version__))

    import numpy as np
    import matplotlib.pyplot as plt

    # Initialize random number generator
    np.random.seed(123)

    # True parameter values
    alpha, sigma = 1, 1
    beta = [1, 2.5]

    # Size of dataset
    size = 100

    # Predictor variable
    X1 = np.random.randn(size)
    X2 = np.random.randn(size) * 0.2

    # Simulate outcome variable
    Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(size)*sigma

    basic_model = pm.Model()

    with basic_model:

        # Priors for unknown model parameters
        alpha = pm.Normal('alpha', mu=0, sd=10)
        beta = pm.Normal('beta', mu=0, sd=10, shape=2)
        sigma = pm.HalfNormal('sigma', sd=1)

        # Expected value of outcome
        mu = alpha + beta[0]*X1 + beta[1]*X2

        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y)

    with basic_model:
        # draw 500 posterior samples
        trace = pm.sample(500)

    print(trace['alpha'][-5:])

This gives the lengthly exception:

Running on PyMC3 v3.1
Auto-assigning NUTS sampler...
Initializing NUTS using ADVI...
Average Loss = 156:   5%|█▎                        | 10316/200000 [00:01<00:20, 9430.53it/s]
Convergence archived at 11100
Interrupted at 11,100 [5%]: Average Loss = 237.04

You can find the C code in this temporary file: /var/folders/sz/_qcyns_12zd21tjhs7q7_29h0000gn/T/theano_compilation_error_y0g4hays
Traceback (most recent call last):
  File "test_pymc3.py", line 44, in <module>
    trace = pm.sample(500)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/pymc3/sampling.py", line 243, in sample
    progressbar=progressbar, **args)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/pymc3/sampling.py", line 604, in init_nuts
    start = approx.sample(draws=njobs)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/pymc3/variational/opvi.py", line 911, in sample
    posterior = self.random_fn(draws)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/pymc3/memoize.py", line 16, in memoizer
    cache[key] = obj(*args, **kwargs)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/configparser.py", line 117, in res
    return f(*args, **kwargs)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/pymc3/variational/opvi.py", line 884, in random_fn
    posterior)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/compile/function.py", line 317, in function
    output_keys=output_keys)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/compile/pfunc.py", line 486, in pfunc
    output_keys=output_keys)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/compile/function_module.py", line 1841, in orig_function
    fn = m.create(defaults)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/compile/function_module.py", line 1715, in create
    input_storage=input_storage_lists, storage_map=storage_map)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/link.py", line 699, in make_thunk
    storage_map=storage_map)[:3]
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/vm.py", line 1091, in make_all
    impl=impl))
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/op.py", line 955, in make_thunk
    no_recycling)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/op.py", line 858, in make_c_thunk
    output_storage=node_output_storage)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cc.py", line 1217, in make_thunk
    keep_lock=keep_lock)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cc.py", line 1157, in __compile__
    keep_lock=keep_lock)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cc.py", line 1620, in cthunk_factory
    key=key, lnk=self, keep_lock=keep_lock)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cmodule.py", line 1181, in module_from_key
    module = lnk.compile_cmodule(location)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cc.py", line 1523, in compile_cmodule
    preargs=preargs)
  File "/anaconda3/envs/testenv/lib/python3.5/site-packages/theano/gof/cmodule.py", line 2388, in compile_str
    (status, compile_stderr.replace('\n', '. ')))
Exception: ('The following error happened while compiling the node', softplus(InplaceDimShuffle{x,0}.0), '\n', "Compilation failed (return status=1): /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:396:27: error: non-constant-expression cannot be narrowed from type 'npy_intp' (aka 'long') to 'int' in initializer list [-Wc++11-narrowing].     int init_totals[2] = {V1_n0, V3_n1};.                           ^~~~~. /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:396:27: note: insert an explicit cast to silence this issue.     int init_totals[2] = {V1_n0, V3_n1};.                           ^~~~~.                           static_cast<int>( ). /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:396:34: error: non-constant-expression cannot be narrowed from type 'npy_intp' (aka 'long') to 'int' in initializer list [-Wc++11-narrowing].     int init_totals[2] = {V1_n0, V3_n1};.                                  ^~~~~. /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:396:34: note: insert an explicit cast to silence this issue.     int init_totals[2] = {V1_n0, V3_n1};.                                  ^~~~~.                                  static_cast<int>( ). /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:408:12: error: non-constant-expression cannot be narrowed from type 'ssize_t' (aka 'long') to 'int' in initializer list [-Wc++11-narrowing].         0, V3_stride1, .            ^~~~~~~~~~. /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:408:12: note: insert an explicit cast to silence this issue.         0, V3_stride1, .            ^~~~~~~~~~.            static_cast<int>( ). /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:409:1: error: non-constant-expression cannot be narrowed from type 'ssize_t' (aka 'long') to 'int' in initializer list [-Wc++11-narrowing]. V1_stride0, V1_stride1. ^~~~~~~~~~. /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:409:1: note: insert an explicit cast to silence this issue. V1_stride0, V1_stride1. ^~~~~~~~~~. static_cast<int>( ). /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:409:13: error: non-constant-expression cannot be narrowed from type 'ssize_t' (aka 'long') to 'int' in initializer list [-Wc++11-narrowing]. V1_stride0, V1_stride1.             ^~~~~~~~~~. /Users/jonathan/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.5.6-64/tmp630jrguy/mod.cpp:409:13: note: insert an explicit cast to silence this issue. V1_stride0, V1_stride1.             ^~~~~~~~~~.             static_cast<int>( ). 5 errors generated.. ", '[softplus(<TensorType(float64, row)>)]')

I can see that the Travis build for python=3.5, pandas=0.18.1 is passing. That's on ubuntu and I am Mac OS X. Do you have any suggestions on this?

pip freeze seems to match that Travis build

certifi==2018.8.24
cycler==0.10.0
h5py==2.8.0
joblib==0.12.5
kiwisolver==1.0.1
Mako==1.0.7
MarkupSafe==1.0
matplotlib==2.2.3
numpy==1.11.3
pandas==0.18.1
patsy==0.5.0
pygpu==0.7.6
pymc3==3.1
pyparsing==2.2.2
python-dateutil==2.7.3
pytz==2018.5
scikit-learn==0.19.1
scipy==0.17.1
six==1.11.0
Theano==1.0.3
tornado==5.1.1
tqdm==4.26.0
marketneutral commented 6 years ago

Solved... this is a Mac OS X issue.

https://stackoverflow.com/questions/51238578/error-non-constant-expression-cannot-be-narrowed-from-type-npy-intp-to-int

import theano
theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"

or in the install...

export THEANO_FLAGS='gcc.cxxflags=-Wno-c++11-narrowing'

And then it all works .... 🙈

marketneutral commented 6 years ago

I see a pandas version problem with the Bayesian tearsheet.

Pyfolio requirements are pandas>=0.18.1 and Q zipline stable is built on 0.18.1.

However, the "Alpha Beta Model" calls data_bmark = pd.concat([data, bmark], axis='columns').dropna() somewhere inside. Pandas 0.18.1 does not support passing the names ['index', 'columns'] for the axis parameter; it only supports [0,1] in pd.concat.

Can I make a PR to change that line to data_bmark = pd.concat([data, bmark], axis=1).dropna()?

Exeception below:

Running alpha beta model
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-05367464daf0> in <module>
      2     returns,
      3     live_start_date='2017-10-22',
----> 4     benchmark_rets=benchmark
      5 )

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pyfolio/plotting.py in call_w_context(*args, **kwargs)
     50         if set_context:
     51             with plotting_context(), axes_style():
---> 52                 return func(*args, **kwargs)
     53         else:
     54             return func(*args, **kwargs)

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pyfolio/tears.py in create_bayesian_tear_sheet(returns, benchmark_rets, live_start_date, samples, return_fig, stoch_vol, progressbar)
   1273                                               bmark=benchmark_rets,
   1274                                               samples=samples,
-> 1275                                               progressbar=progressbar)
   1276         previous_time = timer("running alpha beta model", previous_time)
   1277 

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pyfolio/bayesian.py in run_model(model, returns_train, returns_test, bmark, samples, ppc, progressbar)
    560         model, trace = model_returns_t_alpha_beta(returns_train,
    561                                                   bmark, samples,
--> 562                                                   progressbar=progressbar)
    563     elif model == 't':
    564         model, trace = model_returns_t(returns_train, samples,

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pyfolio/bayesian.py in model_returns_t_alpha_beta(data, bmark, samples, progressbar)
     61     """
     62 
---> 63     data_bmark = pd.concat([data, bmark], axis='columns').dropna()
     64 
     65     with pm.Model() as model:

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pandas/tools/merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    843                        keys=keys, levels=levels, names=names,
    844                        verify_integrity=verify_integrity,
--> 845                        copy=copy)
    846     return op.get_result()
    847 

/anaconda3/envs/env_alphatools_stable/lib/python3.5/site-packages/pandas/tools/merge.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    937 
    938         self._is_series = isinstance(sample, ABCSeries)
--> 939         if not 0 <= axis <= sample.ndim:
    940             raise AssertionError("axis must be between 0 and {0}, "
    941                                  "input was {1}".format(sample.ndim, axis))

TypeError: unorderable types: int() <= str()