phamdinhkhanh / vnquant

VietNam Data Stock Market Price
388 stars 180 forks source link

Error: IndexError: list index out of range #7

Closed giomoi closed 3 years ago

giomoi commented 3 years ago

df = loader.download();=> error line i used your library. show error. help me. my source code:

def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1, test_size=0.2, feature_columns=['close', 'volume', 'open', 'high', 'low']): loader = web.DataLoader(ticker, start="2000-01-01", end="2020-06-24", minimal=True, data_source="vnd")

see if ticker is already a loaded stock from yahoo finance

if isinstance(ticker, str):
    # load it from yahoo_fin library
    # df = si.get_data(ticker)
    df = loader.download();
elif isinstance(ticker, pd.DataFrame):
    # already loaded, use it directly
    df = ticker
# this will contain all the elements we want to return from this function
result = {}
# we will also return the original dataframe itself
result['df'] = df.copy()
# make sure that the passed feature_columns exist in the dataframe
for col in feature_columns:
    assert col in df.columns, f"'{col}' does not exist in the dataframe."
if scale:
    column_scaler = {}
    # scale the data (prices) from 0 to 1
    for column in feature_columns:
        scaler = preprocessing.MinMaxScaler()
        df[column] = scaler.fit_transform(df[column].values)
        # df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
        column_scaler[column] = scaler

    # add the MinMaxScaler instances to the result returned
    result["column_scaler"] = column_scaler
# add the target column (label) by shifting by `lookup_step`
df['future'] = df['close'].shift(-lookup_step)
# last `lookup_step` columns contains NaN in future column
# get them before droping NaNs
last_sequence = np.array(df[feature_columns].tail(lookup_step))
# drop NaNs
df.dropna(inplace=True)
sequence_data = []
sequences = deque(maxlen=n_steps)
for entry, target in zip(df[feature_columns].values, df['future'].values):
    sequences.append(entry)
    if len(sequences) == n_steps:
        sequence_data.append([np.array(sequences), target])
# get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
# for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
# this last_sequence will be used to predict in future dates that are not available in the dataset
last_sequence = list(sequences) + list(last_sequence)
# shift the last sequence by -1
last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
# add to result
result['last_sequence'] = last_sequence
# construct the X's and y's
X, y = [], []
for seq, target in sequence_data:
    X.append(seq)
    y.append(target)
# convert to numpy arrays
X = np.array(X)
y = np.array(y)
# reshape X to fit the neural network
X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
# split the dataset
result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, test_size=test_size, shuffle=shuffle)
# return the result
return result
phamdinhkhanh commented 3 years ago

data_source = 'VND' now have been stopped support, you should change to data_source = 'CAFE' to clone data.