Open sabirjana opened 4 years ago
Hi, I removed the csv files where I was getting problems and could able to create the bundle however not able to use it due to following error
`---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 852076800000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-27-d3d0e133d2f3> in <module>
7 capital_base=100000, # Set initial capital
8 data_frequency = 'daily', # Set data frequency
----> 9 bundle= 'india_stock_data' )#'random_equities') #'india_stock_data' )#'quandl') #'ac_equities_db' ) # Select bundle
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\zipline\utils\run_algo.py in run_algorithm(start, end, initialize, capital_base, handle_data, before_trading_start, analyze, data_frequency, data, bundle, bundle_timestamp, trading_calendar, metrics_set, default_extension, extensions, strict_extensions, environ, blotter)
428 local_namespace=False,
429 environ=environ,
--> 430 blotter=blotter,
431 )
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\zipline\utils\run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
167 equity_minute_reader=bundle_data.equity_minute_bar_reader,
168 equity_daily_reader=bundle_data.equity_daily_bar_reader,
--> 169 adjustment_reader=bundle_data.adjustment_reader,
170 )
171
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\zipline\data\data_portal.py in __init__(self, asset_finder, trading_calendar, first_trading_day, equity_daily_reader, equity_minute_reader, future_daily_reader, future_minute_reader, adjustment_reader, last_available_session, last_available_minute, minute_history_prefetch_length, daily_history_prefetch_length)
289 self._first_trading_day
290 )
--> 291 if self._first_trading_day is not None else (None, None)
292 )
293
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\trading_calendars\trading_calendar.py in open_and_close_for_session(self, session_label)
763 # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa
764 return (
--> 765 sched.at[session_label, 'market_open'].tz_localize(UTC),
766 sched.at[session_label, 'market_close'].tz_localize(UTC),
767 )
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1867
1868 key = self._convert_key(key)
-> 1869 return self.obj._get_value(*key, takeable=self._takeable)
1870
1871 def __setitem__(self, key, value):
C:\ProgramData\Anaconda3\envs\env_zipline\lib\site-packages\pandas\core\frame.py in _get_value(self, index, col, takeable)
1983
1984 try:
-> 1985 return engine.get_value(series._values, index)
1986 except (TypeError, ValueError):
1987
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('1997-01-01 00:00:00+0000', tz='UTC')
my extension.py code looks as below
from zipline.data.bundles import register, india_stock_data
register(
'india_stock_data',
india_stock_data.india_stock_data,
calendar_name='XBOM'
)
india_stock_data.py code as follows
import pandas as pd
from os import listdir
# Change the path to where you have your data
path = 'C:\\Users\\sabirj\\Desktop\\P4Finance\\data'
"""
The ingest function needs to have this exact signature,
meaning these arguments passed, as shown below.
"""
def india_stock_data(environ,
asset_db_writer,
minute_bar_writer,
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
# Get list of files from path
# Slicing off the last part
# 'example.csv'[:-4] = 'example'
symbols = [f[:-4] for f in listdir(path)]
if not symbols:
raise ValueError("No symbols found in folder.")
# Prepare an empty DataFrame for dividends
divs = pd.DataFrame(columns=['sid',
'amount',
'ex_date',
'record_date',
'declared_date',
'pay_date']
)
# Prepare an empty DataFrame for splits
splits = pd.DataFrame(columns=['sid',
'ratio',
'effective_date']
)
# Prepare an empty DataFrame for metadata
metadata = pd.DataFrame(columns=('start_date',
'end_date',
'auto_close_date',
'symbol',
'exchange'
)
)
# Check valid trading dates, according to the selected exchange calendar
sessions = calendar.sessions_in_range(start_session, end_session)
# Get data for all stocks and write to Zipline
daily_bar_writer.write(
process_stocks(symbols, sessions, metadata, divs)
)
# Write the metadata
asset_db_writer.write(equities=metadata)
# Write splits and dividends
adjustment_writer.write(splits=splits,
dividends=divs)
"""
Generator function to iterate stocks,
build historical data, metadata
and dividend data
"""
def process_stocks(symbols, sessions, metadata, divs):
# Loop the stocks, setting a unique Security ID (SID)
for sid, symbol in enumerate(symbols):
print('Loading {}...'.format(symbol))
# Read the stock data from csv file.
df = pd.read_csv('{}/{}.csv'.format(path, symbol), index_col=[0], parse_dates=[0])
# Check first and last date.
start_date = df.index[0]
end_date = df.index[-1]
# Synch to the official exchange calendar
df = df.reindex(sessions.tz_localize(None))[start_date:end_date] #tz_localize(None)
# Forward fill missing data
df.fillna(method='ffill', inplace=True)
# Drop remaining NaN
df.dropna(inplace=True)
# The auto_close date is the day after the last trade.
ac_date = end_date + pd.Timedelta(days=1)
# Add a row to the metadata DataFrame. Don't forget to add an exchange field.
metadata.loc[sid] = start_date, end_date, ac_date, symbol, "XBOM"
# If there's dividend data, add that to the dividend DataFrame
if 'dividend' in df.columns:
# Slice off the days with dividends
tmp = df[df['dividend'] != 0.0]['dividend']
div = pd.DataFrame(data=tmp.index.tolist(), columns=['ex_date'])
# Provide empty columns as we don't have this data for now
div['record_date'] = pd.NaT
div['declared_date'] = pd.NaT
div['pay_date'] = pd.NaT
# Store the dividends and set the Security ID
div['amount'] = tmp.tolist()
div['sid'] = sid
# Start numbering at where we left off last time
ind = pd.Index(range(divs.shape[0], divs.shape[0] + div.shape[0]))
div.set_index(ind, inplace=True)
# Append this stock's dividends to the list of all dividends
divs = divs.append(div)
yield sid, df
Dear Zipline Maintainers,
Before I tell you about my issue, let me describe my environment:
Environment
Now that you know a little about me, let me tell you about the issue I am having: I am getting following error while creating custom bundle using csv files