stefan-jansen / machine-learning-for-trading

Code for Machine Learning for Algorithmic Trading, 2nd edition.
https://ml4trading.io
13.3k stars 4.2k forks source link

04_single_factor_zipline | PicklingError #205

Closed hawyadowin closed 3 years ago

hawyadowin commented 3 years ago

I have downloaded the latest version of the 04_single_factor_zipline notebook however I am seeing a error

PicklingError: Can't pickle <enum 'ORDER STATUS'>: attribute lookup ORDER STATUS on zipline.finance.order failed

Running the following code from the cell

%%zipline --start 2015-1-1 --end 2018-1-1 --output single_factor.pickle --no-benchmark --bundle quandl

from zipline.api import (
    attach_pipeline,
    date_rules,
    time_rules,
    order_target_percent,
    pipeline_output,
    record,
    schedule_function,
    get_open_orders,
    calendars
)
from zipline.finance import commission, slippage
from zipline.pipeline import Pipeline, CustomFactor
from zipline.pipeline.factors import Returns, AverageDollarVolume
import numpy as np
import pandas as pd

MONTH = 21
YEAR = 12 * MONTH
N_LONGS = N_SHORTS = 25
VOL_SCREEN = 1000

class MeanReversion(CustomFactor):
    """Compute ratio of latest monthly return to 12m average,
       normalized by std dev of monthly returns"""
    inputs = [Returns(window_length=MONTH)]
    window_length = YEAR

    def compute(self, today, assets, out, monthly_returns):
        df = pd.DataFrame(monthly_returns)
        out[:] = df.iloc[-1].sub(df.mean()).div(df.std())

def compute_factors():
    """Create factor pipeline incl. mean reversion,
        filtered by 30d Dollar Volume; capture factor ranks"""
    mean_reversion = MeanReversion()
    dollar_volume = AverageDollarVolume(window_length=30)
    return Pipeline(columns={'longs': mean_reversion.bottom(N_LONGS),
                             'shorts': mean_reversion.top(N_SHORTS),
                             'ranking': mean_reversion.rank(ascending=False)},
                    screen=dollar_volume.top(VOL_SCREEN))

def exec_trades(data, assets, target_percent):
    """Place orders for assets using target portfolio percentage"""
    for asset in assets:
        if data.can_trade(asset) and not get_open_orders(asset):
            order_target_percent(asset, target_percent)

def rebalance(context, data):
    """Compute long, short and obsolete holdings; place trade orders"""
    factor_data = context.factor_data
    record(factor_data=factor_data.ranking)

    assets = factor_data.index
    record(prices=data.current(assets, 'price'))

    longs = assets[factor_data.longs]
    shorts = assets[factor_data.shorts]
    divest = set(context.portfolio.positions.keys()) - set(longs.union(shorts))

    exec_trades(data, assets=divest, target_percent=0)
    exec_trades(data, assets=longs, target_percent=1 / N_LONGS)
    exec_trades(data, assets=shorts, target_percent=-1 / N_SHORTS)

def initialize(context):
    """Setup: register pipeline, schedule rebalancing,
        and set trading params"""
    attach_pipeline(compute_factors(), 'factor_pipeline')
    schedule_function(rebalance,
                      date_rules.week_start(),
                      time_rules.market_open(),
                      calendar=calendars.US_EQUITIES)
    context.set_commission(commission.PerShare(cost=.01, min_trade_cost=0))
    context.set_slippage(slippage.VolumeShareSlippage())

def before_trading_start(context, data):
    """Run factor pipeline"""
    context.factor_data = pipeline_output('factor_pipeline')

ERROR BEGINS HERE

'''---------------------------------------------------------------------------
PicklingError                             Traceback (most recent call last)
<ipython-input-6-a5efd4dc9d24> in <module>
----> 1 get_ipython().run_cell_magic('zipline', '--start 2015-1-1 --end 2018-1-1 --output single_factor.pickle --no-benchmark --bundle quandl', '\nfrom zipline.api import (\n    attach_pipeline,\n    date_rules,\n    time_rules,\n    order_target_percent,\n    pipeline_output,\n    record,\n    schedule_function,\n    get_open_orders,\n    calendars\n)\nfrom zipline.finance import commission, slippage\nfrom zipline.pipeline import Pipeline, CustomFactor\nfrom zipline.pipeline.factors import Returns, AverageDollarVolume\nimport numpy as np\nimport pandas as pd\n\nMONTH = 21\nYEAR = 12 * MONTH\nN_LONGS = N_SHORTS = 25\nVOL_SCREEN = 1000\n\n\nclass MeanReversion(CustomFactor):\n    """Compute ratio of latest monthly return to 12m average,\n       normalized by std dev of monthly returns"""\n    inputs = [Returns(window_length=MONTH)]\n    window_length = YEAR\n\n    def compute(self, today, assets, out, monthly_returns):\n        df = pd.DataFrame(monthly_returns)\n        out[:] = df.iloc[-1].sub(df.mean()).div(df.std())\n\n\ndef compute_factors():\n    """Create factor pipeline incl. mean reversion,\n        filtered by 30d Dollar Volume; capture factor ranks"""\n    mean_reversion = MeanReversion()\n    dollar_volume = AverageDollarVolume(window_length=30)\n    return Pipeline(columns={\'longs\': mean_reversion.bottom(N_LONGS),\n                             \'shorts\': mean_reversion.top(N_SHORTS),\n                             \'ranking\': mean_reversion.rank(ascending=False)},\n                    screen=dollar_volume.top(VOL_SCREEN))\n\n\ndef exec_trades(data, assets, target_percent):\n    """Place orders for assets using target portfolio percentage"""\n    for asset in assets:\n        if data.can_trade(asset) and not get_open_orders(asset):\n            order_target_percent(asset, target_percent)\n\n\ndef rebalance(context, data):\n    """Compute long, short and obsolete holdings; place trade orders"""\n    factor_data = context.factor_data\n    record(factor_data=factor_data.ranking)\n\n    assets = factor_data.index\n    record(prices=data.current(assets, \'price\'))\n\n    longs = assets[factor_data.longs]\n    shorts = assets[factor_data.shorts]\n    divest = set(context.portfolio.positions.keys()) - set(longs.union(shorts))\n\n    exec_trades(data, assets=divest, target_percent=0)\n    exec_trades(data, assets=longs, target_percent=1 / N_LONGS)\n    exec_trades(data, assets=shorts, target_percent=-1 / N_SHORTS)\n\n\ndef initialize(context):\n    """Setup: register pipeline, schedule rebalancing,\n        and set trading params"""\n    attach_pipeline(compute_factors(), \'factor_pipeline\')\n    schedule_function(rebalance,\n                      date_rules.week_start(),\n                      time_rules.market_open(),\n                      calendar=calendars.US_EQUITIES)\n    context.set_commission(commission.PerShare(cost=.01, min_trade_cost=0))\n    context.set_slippage(slippage.VolumeShareSlippage())\n\n\ndef before_trading_start(context, data):\n    """Run factor pipeline"""\n    context.factor_data = pipeline_output(\'factor_pipeline\')\n')

~\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2397             with self.builtin_trap:
   2398                 args = (magic_arg_s, cell)
-> 2399                 result = fn(*args, **kwargs)
   2400             return result
   2401 

~\anaconda3\lib\site-packages\zipline\__main__.py in zipline_magic(line, cell)
    333     )
    334     try:
--> 335         return run.main(
    336             # put our overrides at the start of the parameter list so that
    337             # users may pass values with higher precedence

~\anaconda3\lib\site-packages\click\core.py in main(self, args, prog_name, complete_var, standalone_mode, windows_expand_args, **extra)
   1060             try:
   1061                 with self.make_context(prog_name, args, **extra) as ctx:
-> 1062                     rv = self.invoke(ctx)
   1063                     if not standalone_mode:
   1064                         return rv

~\anaconda3\lib\site-packages\click\core.py in invoke(self, ctx)
   1402 
   1403         if self.callback is not None:
-> 1404             return ctx.invoke(self.callback, **ctx.params)
   1405 
   1406     def shell_complete(self, ctx: Context, incomplete: str) -> t.List["CompletionItem"]:

~\anaconda3\lib\site-packages\click\core.py in invoke(_Context__self, _Context__callback, *args, **kwargs)
    761         with augment_usage_errors(__self):
    762             with ctx:
--> 763                 return __callback(*args, **kwargs)
    764 
    765     def forward(

~\anaconda3\lib\site-packages\click\decorators.py in new_func(*args, **kwargs)
     24 
     25     def new_func(*args, **kwargs):  # type: ignore
---> 26         return f(get_current_context(), *args, **kwargs)
     27 
     28     return update_wrapper(t.cast(F, new_func), f)

~\anaconda3\lib\site-packages\zipline\__main__.py in run(ctx, algofile, algotext, define, data_frequency, capital_base, bundle, bundle_timestamp, benchmark_file, benchmark_symbol, benchmark_sid, no_benchmark, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, blotter)
    298     )
    299 
--> 300     return _run(
    301         initialize=None,
    302         handle_data=None,

~\anaconda3\lib\site-packages\zipline\utils\run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter, custom_loader, benchmark_spec)
    243         click.echo(str(perf))
    244     elif output != os.devnull:  # make the zipline magic not write any data
--> 245         perf.to_pickle(output)
    246 
    247     return perf

~\anaconda3\lib\site-packages\pandas\core\generic.py in to_pickle(self, path, compression, protocol, storage_options)
   2862         from pandas.io.pickle import to_pickle
   2863 
-> 2864         to_pickle(
   2865             self,
   2866             path,

~\anaconda3\lib\site-packages\pandas\io\pickle.py in to_pickle(obj, filepath_or_buffer, compression, protocol, storage_options)
    105         else:
    106             # letting pickle write directly to the buffer is more memory-efficient
--> 107             pickle.dump(
    108                 obj, handles.handle, protocol=protocol  # type: ignore[arg-type]
    109             )

PicklingError: Can't pickle <enum 'ORDER STATUS'>: attribute lookup ORDER STATUS on zipline.finance.order failed
'''
MBounouar commented 3 years ago

That error comes from the fact that I introduced a change to use enum's in #23. I didn't foresee a pickling issue at the time. Will do a PR fix later If you can't wait. Just use zipline.run_algorithm instead of the CLI interface. Comment out the %%zipline ...
and replace the line result = _ by the following


from zipline import run_algorithm
result = run_algorithm(
    start=pd.Timestamp("2015-1-1", tz="UTC"),
    end=pd.Timestamp("2018-1-1", tz="UTC"),
    initialize=initialize,
    capital_base=10_000_000,
    before_trading_start=before_trading_start,
    bundle="quandl",
)
MBounouar commented 3 years ago

PR done wait and see #56

stefan-jansen commented 3 years ago

Merged a week ago.