quantopian / zipline

Zipline, a Pythonic Algorithmic Trading Library
https://www.zipline.io
Apache License 2.0
17.56k stars 4.71k forks source link

Price column in Data source #611

Closed vertangelx closed 9 years ago

vertangelx commented 9 years ago

Hi all,

I have tick data on local storage that I will like to load into Zipline. So I resampled the original data using pandas.resample into a DataFrame containing the 4 OHLC columns and a DataFrame with a volume column.

However when loading AAPL data from Yahoo! finance, I notice that the DataFrame contains a price column, which is not found in my resampled DataFrame.

image

How is the price column calculated? Is it simply the same value as in the close column?

ssanderson commented 9 years ago

@vertangelx yep, price is just an alias of close. You can copy it over. I think Zipline should run just fine if you only have OHLCV with no price column, though I'm not 100% sure on that.

vertangelx commented 9 years ago

And a follow up question:

I am getting an error IndexError: index out of bounds when running the TradingAlgorithm using custom data resampled by pandas and under IPython notebook.

I included the price column together with OHLCV.

def initialize(context):
    add_history(1, '1d', 'price')
    add_history(5, '1d', 'price')

    context.i = 0

def handle_data(context, data):

    sym = symbol('ABC')
    print context.i

    context.i += 1
    if context.i < 7:
        return

    short_mavg = history(1, '1d', 'price').mean()
    long_mavg = history(5, '1d', 'price').mean()

    if short_mavg[sym] > long_mavg[sym]:
        order_target(sym, 1)
    elif short_mavg[sym] < long_mavg[sym]:
        order_target(sym, 0)

    record(ABC=data[sym].price,
           short_mavg=short_mavg[sym],
           long_mavg=long_mavg[sym])

data = pd.Panel({'ABC': ohlcv})
algo = zp.TradingAlgorithm(initialize=initialize, handle_data=handle_data, analyze=analyze)
perf = algo.run(data)
ssanderson commented 9 years ago

@vertangelx can you post the traceback for the error you're seeing?

vertangelx commented 9 years ago
0
1
2
3
4
5
6
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-5-c03d9f28c52a> in <module>()
      4 # Run
      5 algo = zp.TradingAlgorithm(initialize=initialize, handle_data=handle_data, analyze=analyze)

----> 6 perf = algo.run(data)

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/algorithm.pyc in run(self, source, overwrite_sim_params, benchmark_return_source)
    421             # perf dictionary
    422             perfs = []
--> 423             for perf in self.gen:
    424                 perfs.append(perf)
    425 

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/gens/tradesimulation.pyc in transform(self, stream_in)
    125                         date,
    126                         snapshot,
--> 127                         self.algo.instant_fill,
    128                     )
    129                     # Perf messages are only emitted if the snapshot contained

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/gens/tradesimulation.pyc in _process_snapshot(self, dt, snapshot, instant_fill)
    212 
    213         if any_trade_occurred:
--> 214             new_orders = self._call_handle_data()
    215             for order in new_orders:
    216                 self.algo.perf_tracker.process_event(order)

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/gens/tradesimulation.pyc in _call_handle_data(self)
    234         during the call.
    235         """
--> 236         self.algo.handle_data(self.current_data)
    237         orders = self.algo.blotter.new_orders
    238         self.algo.blotter.new_orders = []

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/algorithm.pyc in handle_data(self, data)
    224             self.history_container.update(data, self.datetime)
    225 
--> 226         self._handle_data(self, data)
    227 
    228     def analyze(self, perf):

<ipython-input-4-79cf35da143b> in handle_data(context, data)
     25 
     26     # Compute averages
---> 27     short_mavg = history(1, '1d', 'price').mean()
     28     long_mavg = history(5, '1d', 'price').mean()
     29 

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/utils/api_support.pyc in wrapped(*args, **kwargs)
     58     def wrapped(*args, **kwargs):
     59         # Get the instance and call the method
---> 60         return getattr(get_algo_instance(), f.__name__)(*args, **kwargs)
     61     # Add functor to zipline.api
     62     setattr(zipline.api, f.__name__, wrapped)

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/algorithm.pyc in history(self, bar_count, frequency, field, ffill)
    846             bar_count, frequency, field, ffill)
    847         history_spec = self.history_specs[spec_key_str]
--> 848         return self.history_container.get_history(history_spec, self.datetime)
    849 
    850     ####################

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/history/history_container.pyc in get_history(self, history_spec, algo_dt)
    464                 buffer_frame,
    465                 digest_frame,
--> 466                 self.last_known_prior_values,
    467             )
    468 

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/zipline/history/history_container.pyc in ffill_buffer_from_prior_values(field, buffer_frame, digest_frame, pre_digest_values)
     42 
     43     # Get values which are NaN at the beginning of the period.
---> 44     first_bar = buffer_frame.iloc[0]
     45 
     46     def iter_nan_sids():

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/pandas/core/indexing.pyc in __getitem__(self, key)
   1142             return self._getitem_tuple(key)
   1143         else:
-> 1144             return self._getitem_axis(key, axis=0)
   1145 
   1146     def _getitem_axis(self, key, axis=0, validate_iterable=False):

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/pandas/core/indexing.pyc in _getitem_axis(self, key, axis, validate_iterable)
   1415                 self._is_valid_integer(key, axis)
   1416 
-> 1417             return self._get_loc(key, axis=axis)
   1418 
   1419     def _convert_to_indexer(self, obj, axis=0, is_setter=False):

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/pandas/core/indexing.pyc in _get_loc(self, key, axis)
     88 
     89     def _get_loc(self, key, axis=0):
---> 90         return self.obj._ixs(key, axis=axis)
     91 
     92     def _slice(self, obj, axis=0, typ=None):

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/pandas/core/frame.pyc in _ixs(self, i, axis)
   1600                 return self[i]
   1601             else:
-> 1602                 label = self.index[i]
   1603                 if isinstance(label, Index):
   1604                     # a location index by definition

/Users/vertangelx/anaconda/envs/test/lib/python2.7/site-packages/pandas/tseries/index.pyc in __getitem__(self, key)
   1372         arr_idx = self.view(np.ndarray)
   1373         if np.isscalar(key):
-> 1374             val = arr_idx[key]
   1375             return Timestamp(val, offset=self.offset, tz=self.tz)
   1376         else:

IndexError: index out of bounds
ssanderson commented 9 years ago

@vertangelx did you figure out what the issue was here?