JECSand / yahoofinancials

A powerful financial data module used for pulling data from Yahoo Finance. This module can pull fundamental and technical data for stocks, indexes, currencies, cryptos, ETFs, Mutual Funds, U.S. Treasuries, and commodity futures.
https://pypi.python.org/pypi/yahoofinancials
MIT License
896 stars 214 forks source link

Convert get_financial_stmts data to pandas DataFrame #98

Open chinobing opened 3 years ago

chinobing commented 3 years ago

Sample Code

from yahoofinancials import YahooFinancials
import pandas as pd

def dropnested(alist):
    outputdict = {}
    dates = []
    for dic in alist:
        for key, value in dic.items():
            dates.append(key.replace('-',''))
            if isinstance(value, dict):
                for k2, v2, in value.items():
                    outputdict[k2] = outputdict.get(k2, []) + [v2]
            else:
                outputdict[key] = outputdict.get(key, []) + [value]

    return outputdict, dates    
tickers = ['601012.SS','002129.SZ']
yahoo_financials = YahooFinancials(tickers)
income = yahoo_financials.get_financial_stmts('annual', 'income', reformat=True)
data = income['incomeStatementHistory']
df =  pd.DataFrame()
for ticker in tickers:
    t_data = data[ticker]
    outputdict, dates = dropnested(t_data)
    _df = pd.DataFrame.from_dict(outputdict).apply(pd.to_numeric)
    _df['code'] = ticker
    end_date = pd.Series(dates, name='end_date')
    norm_df = pd.concat([_df, end_date], axis=1)
    norm_df = norm_df.set_index('end_date')
    df = df.append(norm_df)

output

image

Credits:

AwesomeCap commented 1 year ago

ValueError Traceback (most recent call last) /var/folders/5z/nfxgl_ps6qlf5tvrct964q980000gn/T/ipykernel_25037/2953129566.py in 12 t_data = data[ticker] 13 outputdict, dates = dropnested(t_data) ---> 14 _df = pd.DataFrame.from_dict(outputdict).apply(pd.to_numeric) 15 _df['code'] = ticker 16 end_date = pd.Series(dates, name='end_date')

~/Library/Python/3.7/lib/python/site-packages/pandas/core/frame.py in from_dict(cls, data, orient, dtype, columns) 1307 raise ValueError("only recognize index or columns for orient") 1308 -> 1309 return cls(data, index=index, columns=columns, dtype=dtype) 1310 1311 def to_numpy(

~/Library/Python/3.7/lib/python/site-packages/pandas/core/frame.py in init(self, data, index, columns, dtype, copy) 466 467 elif isinstance(data, dict): --> 468 mgr = init_dict(data, index, columns, dtype=dtype) 469 elif isinstance(data, ma.MaskedArray): 470 import numpy.ma.mrecords as mrecords

~/Library/Python/3.7/lib/python/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype) 281 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays 282 ] --> 283 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) 284 285

~/Library/Python/3.7/lib/python/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity) 76 # figure out the index, if necessary 77 if index is None: ---> 78 index = extract_index(arrays) 79 else: 80 index = ensure_index(index)

~/Library/Python/3.7/lib/python/site-packages/pandas/core/internals/construction.py in extract_index(data) 395 lengths = list(set(raw_lengths)) 396 if len(lengths) > 1: --> 397 raise ValueError("arrays must all be same length") 398 399 if have_dicts:

ValueError: arrays must all be same length

roadfoodr commented 1 year ago

@AwesomeCap in the third code block, modify the 5th code line as:

_df = pd.DataFrame.from_dict(outputdict, orient='index').T.apply(pd.to_numeric)

See https://stackoverflow.com/a/40442094