lmjohns3 / theanets

Neural network toolkit for Python
http://theanets.rtfd.org
MIT License
328 stars 73 forks source link

Why do I get this error message? IndexError: index 4 is out of bounds for axis 1 with size 4 #83

Closed gribeiro2004 closed 9 years ago

gribeiro2004 commented 9 years ago

import pandas as pd import pandas.io.data import numpy as np import datetime import theanets from sklearn import preprocessing import matplotlib.pyplot as plt

start = datetime.date(1965, 1, 1) end = datetime.date(2015, 6, 24) tickers = ['KO','PEP','^GSPC']

def get(tickers, start, end): def data(ticker): return pd.io.data.DataReader(ticker,'yahoo', start, end) datas = map(data, tickers) return pd.concat(datas, keys = tickers, names=['Ticker', 'Date'])

all_data = get(tickers, start, end)

def raw_data(): Adj_close = all_data[['Adj Close']].reset_index() Adj_close = Adj_close.pivot('Date', 'Ticker', 'Adj Close') Adj_close.columns = ['Adj Close KO', 'Adj Close PEP', 'Adj Close GSPC']

Close = all_data[['Close']].reset_index()
Close = Close.pivot('Date', 'Ticker', 'Close')
Close.columns = ['Close KO', 'Close PEP', 'Close GSPC']

High = all_data[['High']].reset_index()
High = High.pivot('Date', 'Ticker', 'High')
High.columns = ['High KO', 'High PEP', 'High GSPC']
High['High KO'] = High['High KO']*Adj_close['Adj Close KO']/Close['Close KO']
High['High PEP'] = High['High PEP']*Adj_close['Adj Close PEP']/Close['Close PEP']
High['High GSPC'] = High['High GSPC']*Adj_close['Adj Close GSPC']/Close['Close GSPC']
return Adj_close, High

Adj_close, High = raw_data()

def X_y(): Adj_close5 = Adj_close.shift(5)/Adj_close Adj_close5.columns = ['Adj Close KO5', 'Adj Close PEP5', 'Adj Close GSPC5'] Adj_close4 = Adj_close.shift(4)/Adj_close Adj_close4.columns = ['Adj Close KO4', 'Adj Close PEP4', 'Adj Close GSPC4'] Adj_close3 = Adj_close.shift(3)/Adj_close Adj_close3.columns = ['Adj Close KO3', 'Adj Close PEP3', 'Adj Close GSPC3'] Adj_close2 = Adj_close.shift(2)/Adj_close Adj_close2.columns = ['Adj Close KO2', 'Adj Close PEP2', 'Adj Close GSPC2'] Adj_close1 = Adj_close.shift(1)/Adj_close Adj_close1.columns = ['Adj Close KO1', 'Adj Close PEP1', 'Adj Close GSPC1']

X = pd.merge(Adj_close5, Adj_close4, left_index=True,right_index=True)
X = pd.merge(X, Adj_close3, left_index=True, right_index=True)
X = pd.merge(X, Adj_close2, left_index=True, right_index=True)
X = pd.merge(X, Adj_close1, left_index=True, right_index=True)

y0 = Adj_close['Adj Close KO']
y1 = High['High KO'].shift(-1)
y = (y1-y0)/y0
y = pd.DataFrame(y)
y.columns = ['Max Return']   

Data = pd.merge(y, X, left_index=True, right_index=True)
Data = Data.dropna()
y = Data['Max Return']
y = pd.DataFrame(y)
y.columns = ['Max Return']   
X = Data.drop('Max Return', axis=1)    
return X, y, Data

X, y, Data = X_y()

X = X.apply(lambda x:(x.astype(float) - min(x))/(max(x)-min(x)), axis = 0) X = X.values X=X.astype(np.float32)

y = y.values y[np.where(y>0.005)] = 1 y[np.where((y<=0.005) & (y>=0))] = 2 y[np.where((y>-0.005) & (y<0))] = 3 y[np.where(y<-0.005)] = 4 y=y.astype(np.int32)
y = y.ravel()

def split_data(X,y, slices): datasets = {} starts = np.floor(np.cumsum(len(X)*np.hstack([0,slices[:-1]])))

slices = {'training': slice(starts[0], starts[1]),
          'validation': slice(starts[1], starts[2]),
          'test': slice(starts[2], None)}
data = X,y
def slice_data(data, sl):
    return tuple(d[sl] for d in data)
for label in slices:
    datasets[label] = slice_data(data, slices[label])
return datasets

datasets = split_data(X,y,(0.6, 0.2, None))

import climate climate.enable_default_logging()

exp=theanets.Experiment(theanets.Classifier, layers=(15,2000,200,4), hidden_l1=0.1) exp.train(datasets['training'], datasets['validation'], optimize='sgd', learning_rate=0.01, momentum=0.5)

X_test, y_test = datasets['test'] y_pred = exp.network.classify(X_test)

from sklearn.metrics import confusion_matrix print(confusion_matrix(y_test, y_pred))

lmjohns3 commented 9 years ago

This is just a guess, but I think your class labels are causing this error. Change your class labels to be 0, 1, 2, and 3 (instead of 1, 2, 3, and 4) and see if that fixes it.

gribeiro2004 commented 9 years ago

Is changing class labels part of the theano settings? How can I do it?

gribeiro2004 commented 9 years ago

Got it. Worked like a charm. Thanks a lot!!!!!