How to convert History to DataFrame?

aleksandermajos commented 6 years ago

hootnot commented 6 years ago

@hapiri Hi, I will see to provide you an example

aleksandermajos commented 6 years ago

By the way - exelent job with this Framework.It will be great to have example with convert to DataFrame , CSV and NDArray.

hootnot commented 6 years ago

I'm not a panda's guru, but it could be something like this:

from __future__ import print_function

import json
import oandapyV20
from oandapyV20 import API
import oandapyV20.endpoints.instruments as v20instruments

import numpy as np
import pandas as pd

from collections import OrderedDict

token = "..."
accountID = "..."

def DataFrameFactory(r, colmap=None, conv=None):
    def convrec(r, m):
        """convrec - convert OANDA candle record.

        return array of values, dynamically constructed, corresponding with config in mapping m.
        """
        v = []
        for keys in [x.split(":") for x in m.keys()]:
            _v = r.get(keys[0])
            for k in keys[1:]:
                _v = _v.get(k)
            v.append(_v)

        return v

    record_converter = convrec if conv is None else conv
    column_map_ohlcv = OrderedDict([
       ('time', 'D'),
       ('mid:o', 'O'),
       ('mid:h', 'H'),
       ('mid:l', 'L'),
       ('mid:c', 'C'),
       ('volume', 'V')
    ])
    cmap = column_map_ohlcv if colmap is None else colmap

    df = pd.DataFrame([list(record_converter(rec, cmap)) for rec in r.get('candles')])
    df.columns = list(cmap.values())
    #df.rename(columns=colmap, inplace=True)  # no need to use rename, cmap values are ordered
    df.set_index(pd.DatetimeIndex(df['D']), inplace=True)
    del df['D']
    df = df.apply(pd.to_numeric)  # OANDA returns string values: make all numeric
    return df

if __name__ == "__main__":
    api = API(access_token=token)
    params = {
          "count": 50,
          "granularity": "D"
    }
    instruments = ["EUR_USD", "EUR_GBP"]
    df = dict()

    for instr in instruments:
        try:
            r = v20instruments.InstrumentsCandles(instrument=instr,
                                                  params=params)
            api.request(r)
        except Exception as err:
            print("Error: {}".format(err))
            exit(2)
        else:
           df.update({instr: DataFrameFactory(r.response)})

    # Do something with the dataframes
    for I in instruments:
        print(df[I].head())

    print(df['EUR_USD'].iloc[4])
    print(df['EUR_USD'].loc["2017-08-20 21:00:00"])
    print(df['EUR_USD'].loc["2017-08-20 21:00:00"]['C'] * 10 )

will give output:

                                   O        H         L      C      V
D                                                                      
2017-08-14T21:00:00.000000000Z  1.17799  1.17928  1.16873  1.17350   46708
2017-08-15T21:00:00.000000000Z  1.17347  1.17790  1.16813  1.17676   58174
2017-08-16T21:00:00.000000000Z  1.17659  1.17902  1.16622  1.17242   61016
2017-08-17T21:00:00.000000000Z  1.17219  1.17749  1.17087  1.17607   59562
2017-08-20T21:00:00.000000000Z  1.17550  1.18280  1.17314  1.18146   33113
                                   O        H         L      C      V
D                                                                      
2017-08-14T21:00:00.000000000Z  0.90864  0.91328  0.90509  0.91184   20442
2017-08-15T21:00:00.000000000Z  0.91193  0.91430  0.90828  0.91282   21589
2017-08-16T21:00:00.000000000Z  0.91276  0.91367  0.90634  0.91105   21697
2017-08-17T21:00:00.000000000Z  0.91080  0.91494  0.90911  0.91334   20655
2017-08-20T21:00:00.000000000Z  0.91347  0.91620  0.91112  0.91591   13605

Switching the conversion from ohlcv to cv by changing:


           df.update({instr: DataFrameFactory(r.response,
                                              column_map_cv  # <--
                                              )})

                                  Close  Volume
Time                                           
2017-08-14T21:00:00.000000000Z  1.17350   46708
2017-08-15T21:00:00.000000000Z  1.17676   58174
2017-08-16T21:00:00.000000000Z  1.17242   61016
2017-08-17T21:00:00.000000000Z  1.17607   59562
2017-08-20T21:00:00.000000000Z  1.18146   33113
                                  Close  Volume
Time                                           
2017-08-14T21:00:00.000000000Z  0.91184   20442
2017-08-15T21:00:00.000000000Z  0.91282   21589
2017-08-16T21:00:00.000000000Z  0.91105   21697
2017-08-17T21:00:00.000000000Z  0.91334   20655
2017-08-20T21:00:00.000000000Z  0.91591   13605

hootnot commented 6 years ago

CSV is just a variant on the above:


def CSVFactory(response, colmap, conv, delim=","):
    for rec in response.get('candles'):
        yield delim.join([str(x) for x in list(conv(rec, colmap))])

and in main:

            for _r in CSVFactory(r.response, column_map_ohlcv, convrec):
                print(_r)

will give:

...
2017-10-12T21:00:00.000000000Z,0.89289,0.89378,0.88761,0.88985,24663
2017-10-15T21:00:00.000000000Z,0.88814,0.89228,0.88560,0.89018,17034
2017-10-16T21:00:00.000000000Z,0.89024,0.89300,0.88574,0.89198,20783
2017-10-17T21:00:00.000000000Z,0.89218,0.89470,0.88996,0.89264,17805
2017-10-18T21:00:00.000000000Z,0.89290,0.90128,0.89248,0.90060,21466
2017-10-19T21:00:00.000000000Z,0.90076,0.90226,0.89242,0.89314,22763

...
2017-08-14T21:00:00.000000000Z,1.17350,46708
2017-08-15T21:00:00.000000000Z,1.17676,58174
2017-08-16T21:00:00.000000000Z,1.17242,61016
2017-08-17T21:00:00.000000000Z,1.17607,59562
2017-08-20T21:00:00.000000000Z,1.18146,33113
2017-08-21T21:00:00.000000000Z,1.17615,36600

pass delim to set another delimiter

aleksandermajos commented 6 years ago

Thank You

hootnot commented 6 years ago

... will see to integrate under oandapyV20.contrib.... factories

ManPython commented 5 years ago

for i in range(bars): P = df['EUR_USD']['D'][i]) #D as Date time column

You can get error

File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 2059, in getitem return self._getitem_column(key) File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 2066, in _getitem_column return self._get_item_cache(key) File "C:\Python27\lib\site-packages\pandas\core\generic.py", line 1386, in _get_item_cache values = self._data.get(item) File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3543, in get loc = self.items.get_loc(item) File "C:\Python27\lib\site-packages\pandas\indexes\base.py", line 2136, in get_loc return self._engine.get_loc(self._maybe_cast_indexer(key)) File "pandas\index.pyx", line 132, in pandas.index.IndexEngine.get_loc (pandas\index.c:4433) File "pandas\index.pyx", line 154, in pandas.index.IndexEngine.get_loc (pandas\index.c:4279) File "pandas\src\hashtable_class_helper.pxi", line 732, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742) File "pandas\src\hashtable_class_helper.pxi", line 740, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696) KeyError: 'D'

If you planing some lib to use/integrate pandas should be implemented some solution to remove unicode eg for i in range(bars): P = float(df['EUR_USD']['C'][i])) #P as Price C as Close

hootnot commented 5 years ago

Hi @ManPython,

Since I use:

    df.set_index(pd.DatetimeIndex(df['D']), inplace=True)
    del df['D']

the dataframe has a datetimeindex and the 'D' column doesn't exist anymore. Rows can be accessed using loc, iloc and iterrows():

    print(df['EUR_USD'].iloc[4])    # 5th record
    print(df['EUR_USD'].loc["2017-10-18 21:00:00"])    # 5th record by date

To get rid of the float conversion I added:

    df = df.apply(pd.to_numeric)

hootnot / oanda-api-v20

How to convert History to DataFrame? #104