robertmartin8 / PyPortfolioOpt

Financial portfolio optimisation in python, including classical efficient frontier, Black-Litterman, Hierarchical Risk Parity
https://pyportfolioopt.readthedocs.io/
MIT License
4.24k stars 927 forks source link

Add fama-french 3 and fama-french 5 factors (can be also 4 or 6) for expected returns #595

Open nikrizzi opened 2 months ago

nikrizzi commented 2 months ago

I trying to be more reliable on expected returns calculation by using Fama-French 3 and 5 factors-

Add a new functionality as (mean_historical_return, ema_historical_return or capm_return) to return ff3 or ff5 expected return.

Below code could be a starting point:

import pandas as pd import numpy as np import statsmodels.api as sm import getfactormodels as gfm

df is a panda dataframe same of input for mean_historical_return, ema_historical_return or capm_return

model can be 'ff3' or 'ff5'

frequency is usually 252 or what you want

return is a series (tickers, expected returns) same as mean_historical_return, ema_historical_return or capm_return

def ff_expected_returns(df, model, frequency): factors = gfm.get_factors(model=model, frequency='d', start_date="2000-01-01") df.index = pd.to_datetime(df.index)

common_dates = factors.index.intersection(df.index)
df = df.loc[common_dates]
factors = factors.loc[common_dates]

excess_returns = df.pct_change().dropna()
daily_rf = factors['RF'] / 100
daily_rf = daily_rf.reindex(excess_returns.index)
excess_returns = excess_returns.subtract(daily_rf.values, axis=0)

if model=='ff3' :
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML']])
elif model=='ff5':
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])

# Align the indices of excess_returns and factors_data
aligned_index = excess_returns.index.intersection(factors_data.index)
excess_returns = excess_returns.loc[aligned_index]
factors_data = factors_data.loc[aligned_index]

# Fit the Fama-French model using OLS
model = sm.OLS(excess_returns, factors_data)
result = model.fit()

relevant_factors_data = factors_data.iloc[:, 1:]  # Exclude the intercept term from factors_data

# Calculate expected excess returns for each stock individually
expected_excess_returns = []
for i, ticker in enumerate(df.columns):
    stock_params = result.params.iloc[:, i]
    stock_expected_excess_returns = np.dot(relevant_factors_data.values, stock_params[1:])
    expected_excess_returns.append(stock_expected_excess_returns)

expected_excess_returns = np.array(expected_excess_returns).T

# Calculate the daily risk-free rate for the entire period
risk_free_rate = factors['RF'] /100

expected_returns = expected_excess_returns + daily_rf.values[:, np.newaxis] 
expected_returns_series = pd.Series([expected_return.mean()*frequency for expected_return in expected_returns.T], index=df.columns)

return expected_returns_series
nikrizzi commented 2 months ago

This is final revised code (feel free to change what you want, I'm a python [and statistician] beginner):

import pandas as pd import numpy as np import statsmodels.api as sm import getfactormodels as gfm

def ff_expected_returns(df, modello, frequency): factors = gfm.get_factors(model=modello, frequency='d', start_date="2000-01-01")

df.index = pd.to_datetime(df.index)

common_dates = factors.index.intersection(df.index)
df = df.loc[common_dates]
factors = factors.loc[common_dates]

excess_returns = df.pct_change().dropna()
daily_rf = factors['RF'] / 100
daily_rf = daily_rf.reindex(excess_returns.index)
excess_returns = excess_returns.subtract(daily_rf.values, axis=0)

if modello=='ff3' :
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML']])
elif modello=='ff5':
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])

# Align the indices of excess_returns and factors_data
aligned_index = excess_returns.index.intersection(factors_data.index)
excess_returns = excess_returns.loc[aligned_index]
factors_data = factors_data.loc[aligned_index]

# Fit the Fama-French model using OLS
model = sm.OLS(excess_returns, factors_data)
result = model.fit()

factor_coefs = result.params.iloc[1:]

relevant_factors_data = factors_data.iloc[:, 1:]  # Exclude the intercept term from factors_data

# Calculate expected excess returns for each stock individually
expected_excess_returns = []
for i, ticker in enumerate(df.columns):
    stock_params = result.params.iloc[:, i]

    if modello=='ff3':
        stock_expected_excess_returns = (
        stock_params['Mkt-RF'] * factors_data['Mkt-RF'] * frequency +
        stock_params['SMB'] * factors_data['SMB'] +
        stock_params['HML'] * factors_data['HML']
         )
    elif modello=='ff5':
        stock_expected_excess_returns = (
        stock_params['Mkt-RF'] * factors_data['Mkt-RF'] * frequency +
        stock_params['SMB'] * factors_data['SMB'] +
        stock_params['HML'] * factors_data['HML'] +
        stock_params['RMW'] * factors_data['RMW'] +
        stock_params['CMA'] * factors_data['CMA']
        ) 

    expected_excess_returns.append(stock_expected_excess_returns)

expected_excess_returns = np.array(expected_excess_returns).T

expected_returns = (expected_excess_returns + daily_rf.values[:, np.newaxis])

expected_returns_series = pd.Series([expected_return.mean(axis=0) for expected_return in expected_returns.T], index=df.columns)

return expected_returns_series
nikrizzi commented 2 months ago

python import pandas as pd import numpy as np import statsmodels.api as sm import getfactormodels as gfm

def ff_expected_returns(df, modello, frequency): factors = gfm.get_factors(model=modello, frequency='d', start_date="2000-01-01")

df.index = pd.to_datetime(df.index)

common_dates = factors.index.intersection(df.index)
df = df.loc[common_dates]
factors = factors.loc[common_dates]

excess_returns = df.pct_change().dropna()
daily_rf = factors['RF'] / 100
daily_rf = daily_rf.reindex(excess_returns.index)
excess_returns = excess_returns.subtract(daily_rf.values, axis=0)

if modello=='ff3' :
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML']])
elif modello=='ff5':
    factors_data = sm.add_constant(factors[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])

# Align the indices of excess_returns and factors_data
aligned_index = excess_returns.index.intersection(factors_data.index)
excess_returns = excess_returns.loc[aligned_index]
factors_data = factors_data.loc[aligned_index]

# Fit the Fama-French model using OLS
model = sm.OLS(excess_returns, factors_data)
result = model.fit()

factor_coefs = result.params.iloc[1:]

relevant_factors_data = factors_data.iloc[:, 1:]  # Exclude the intercept term from factors_data

# Calculate expected excess returns for each stock individually
expected_excess_returns = []
for i, ticker in enumerate(df.columns):
    stock_params = result.params.iloc[:, i]
    if modello=='ff3':
        stock_expected_excess_returns = (
        stock_params['Mkt-RF'] * factors_data['Mkt-RF'] * frequency +
        stock_params['SMB'] * factors_data['SMB'] +
        stock_params['HML'] * factors_data['HML']
         )
    elif modello=='ff5':
        stock_expected_excess_returns = (
        stock_params['Mkt-RF'] * factors_data['Mkt-RF'] * frequency +
        stock_params['SMB'] * factors_data['SMB'] +
        stock_params['HML'] * factors_data['HML'] +
        stock_params['RMW'] * factors_data['RMW'] +
        stock_params['CMA'] * factors_data['CMA']
        ) 

    expected_excess_returns.append(stock_expected_excess_returns)

expected_excess_returns = np.array(expected_excess_returns).T

expected_returns = (expected_excess_returns + daily_rf.values[:, np.newaxis])

expected_returns_series = pd.Series([expected_return.mean(axis=0) for expected_return in expected_returns.T], index=df.columns)

return expected_returns_series