dipanb / quant_models

Readymade functions and classes to perform a host of quantitative analysis like returns simulation, stock price evolution, Value at Risk, Bonds valuation, Vasicek model for interest rates, Black Scholes for options, Monte Carlo simulations for stocks, bonds, interest rates and options, etc
1 stars 0 forks source link

Code for life expectncy HRS, Death Model and Cocoon #1

Open dipanb opened 1 year ago

dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[3]:

import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan")

In[4]:

HRS_data = pd.io.stata.read_stata('randhrs1992_2018v2.dta')

In[5]:

columns = pd.DataFrame(HRS_data.columns.to_list(),columns = ['Variables'])

columns.to_csv('HRS Variables.csv',index = False)

In[6]:

HRS_keep = HRS_data[['hhidpn','r1mstat','r1mpart','r2mstat','r2mpart','r3mstat','r3mpart','r4mstat','r4mpart', 'r5mstat','r5mpart','r6mstat','r6mpart','r7mstat','r7mpart','r8mstat','r8mpart','r9mstat', 'r9mpart','r10mstat','r10mpart','r11mstat','r11mpart','r12mstat','r12mpart','r13mstat', 'r13mpart','r14mstat','r14mpart','r1mrct','r2mrct','r3mrct','r4mrct','r5mrct','r6mrct', 'r7mrct','r8mrct','r9mrct','r10mrct','r11mrct','r12mrct','r13mrct','r14mrct','r1mcurln' ,'r2mcurln','r3mcurln','r4mcurln','r5mcurln','r6mcurln','r7mcurln','r8mcurln','r9mcurln', 'r10mcurln','r11mcurln','r12mcurln','r13mcurln','r14mcurln','r1mlenm','r2mlenm','r3mlenm', 'r4mlenm','r5mlenm','r6mlenm','r7mlenm','r8mlenm','r9mlenm','r10mlenm','r11mlenm','r12mlenm', 'r13mlenm','r14mlenm','r1mdiv','r2mdiv','r3mdiv','r4mdiv','r5mdiv','r6mdiv','r7mdiv','r8mdiv', 'r9mdiv','r10mdiv','r11mdiv','r12mdiv','r13mdiv','r14mdiv','r1cenreg','r2cenreg','r3cenreg', 'r4cenreg','r5cenreg','r6cenreg','r7cenreg','r8cenreg','r9cenreg','r10cenreg','r11cenreg', 'r12cenreg','r13cenreg','r14cenreg','r1cendiv','r2cendiv','r3cendiv','r4cendiv','r5cendiv', 'r6cendiv','r7cendiv','r8cendiv','r9cendiv','r10cendiv','r11cendiv','r12cendiv','r13cendiv', 'r14cendiv','h1hhresp','h4hhresp','h5hhresp','h6hhresp','h7hhresp','h8hhresp','h9hhresp', 'h10hhresp','h11hhresp','h12hhresp','h13hhresp','h14hhresp','h1cpl','h4cpl','h5cpl','h6cpl', 'h7cpl','h8cpl','h9cpl','h10cpl','h11cpl','h12cpl','h13cpl','h14cpl','ragender','raracem', 'rabmonth','rabyear','rabdate','r1iwbeg','r2iwbeg','r3iwbeg','r4iwbeg','r5iwbeg','r6iwbeg', 'r7iwbeg','r8iwbeg','r9iwbeg','r10iwbeg','r11iwbeg','r12iwbeg','r13iwbeg','r14iwbeg','r1iwend', 'r2iwend','r3iwend','r4iwend','r5iwend','r6iwend','r7iwend','r8iwend','r9iwend','r10iwend', 'r11iwend','r12iwend','r13iwend','r14iwend','r1iwbegf','r2iwbegf','r1iwendf','r2iwendf', 'r3iwendf','r1iwmid','r2iwmid','r3iwmid','r4iwmid','r5iwmid','r6iwmid','r7iwmid', 'r8iwmid','r9iwmid','r10iwmid','r11iwmid','r12iwmid','r13iwmid','r14iwmid','r1iwmidf', 'r2iwmidf','r3iwmidf','r4iwmidf','r5iwmidf','r6iwmidf','r7iwmidf','r8iwmidf','r9iwmidf', 'r10iwmidf','r11iwmidf','r12iwmidf','r13iwmidf','r14iwmidf','r1iwstat','r2iwstat','r3iwstat', 'r4iwstat','r5iwstat','r6iwstat','r7iwstat','r8iwstat','r9iwstat','r10iwstat','r11iwstat', 'r12iwstat','r13iwstat','r14iwstat','radmonth','radyear','raddate','radage_m','radage_y', 'raedyrs','raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','r2pexit', 'r3pexit','r4pexit','r5pexit','r6pexit','r7pexit','r8pexit','r9pexit','r10pexit','r11pexit', 'r12pexit','r13pexit','r14pexit','r1agem_b','r2agem_b','r3agem_b','r4agem_b','r5agem_b', 'r6agem_b','r7agem_b','r8agem_b','r9agem_b','r10agem_b','r11agem_b','r12agem_b','r13agem_b', 'r14agem_b','r1agem_e','r2agem_e','r3agem_e','r4agem_e','r5agem_e','r6agem_e','r7agem_e', 'r8agem_e','r9agem_e','r10agem_e','r11agem_e','r12agem_e','r13agem_e','r14agem_e','r1agem_m', 'r2agem_m','r3agem_m','r4agem_m','r5agem_m','r6agem_m','r7agem_m','r8agem_m','r9agem_m', 'r10agem_m','r11agem_m','r12agem_m','r13agem_m','r14agem_m','r1agey_b','r2agey_b','r3agey_b', 'r4agey_b','r5agey_b','r6agey_b','r7agey_b','r8agey_b','r9agey_b','r10agey_b','r11agey_b', 'r12agey_b','r13agey_b','r14agey_b','r1agey_e','r2agey_e','r3agey_e','r4agey_e','r5agey_e', 'r6agey_e','r7agey_e','r8agey_e','r9agey_e','r10agey_e','r11agey_e','r12agey_e','r13agey_e', 'r14agey_e','r1agey_m','r2agey_m','r3agey_m','r4agey_m','r5agey_m','r6agey_m','r7agey_m', 'r8agey_m','r9agey_m','r10agey_m','r11agey_m','r12agey_m','r13agey_m','r14agey_m','rawtsamp', 'r1wthh','r2wthh','r3wthh','r4wthh','r5wthh','r6wthh','r7wthh','r8wthh','r9wthh','r10wthh', 'r11wthh','r12wthh','r13wthh','r14wthh','r1wtresp','r2wtresp','r3wtresp','r4wtresp', 'r5wtresp','r6wtresp','r7wtresp','r8wtresp','r9wtresp','r10wtresp','r11wtresp','r12wtresp', 'r13wtresp','r14wtresp','h2cpl','h2hhresp','h3cpl','h3hhresp','racohbyr','r1iwendm', 'r2iwendm','r3iwendm','r4iwendm','r5iwendm','r6iwendm','r7iwendm','r8iwendm','r9iwendm', 'r10iwendm','r11iwendm','r12iwendm','r13iwendm','r14iwendm','r1iwendy','r2iwendy','r3iwendy', 'r4iwendy','r5iwendy','r6iwendy','r7iwendy','r8iwendy','r9iwendy','r10iwendy','r11iwendy', 'r12iwendy','r13iwendy','r14iwendy','recendiv','recenreg','remstat','rempart','reiwbeg', 'reiwmid','reiwend','reiwmidf','respagem_b','respagem_m','respagem_e','respagey_b', 'respagey_m','respagey_e','r1shlt','r2shlt','r3shlt','r4shlt','r5shlt','r6shlt', 'r7shlt','r8shlt','r9shlt','r10shlt','r11shlt','r12shlt','r13shlt','r14shlt','r1hltc5', 'r2hltc5','r3hltc5','r4hltc5','r5hltc5','r6hltc5','r3hltc5f','r4hltc5f','r5hltc5f', 'r6hltc5f','r1hltc3','r2hltc3','r3hltc3','r4hltc3','r5hltc3','r6hltc3','r7hltc3', 'r8hltc3','r9hltc3','r10hltc3','r11hltc3','r12hltc3','r13hltc3','r14hltc3','r7vgactx', 'r8vgactx','r9vgactx','r10vgactx','r11vgactx','r12vgactx','r13vgactx','r14vgactx', 'r7mdactx','r8mdactx','r9mdactx','r10mdactx','r11mdactx','r12mdactx','r13mdactx', 'r14mdactx','r7ltactx','r8ltactx','r9ltactx','r10ltactx','r11ltactx','r12ltactx', 'r13ltactx','r14ltactx','r1bmi','r2bmi','r3bmi','r4bmi','r5bmi','r6bmi','r7bmi', 'r8bmi','r9bmi','r10bmi','r11bmi','r12bmi','r13bmi','r14bmi','r1height','r2height', 'r3height','r4height','r5height','r6height','r7height','r8height','r9height','r10height', 'r11height','r12height','r13height','r14height','r1weight','r2weight','r3weight', 'r4weight','r5weight','r6weight','r7weight','r8weight','r9weight','r10weight','r11weight', 'r12weight','r13weight','r14weight','r1back','r2back','r3back','r4back','r5back','r6back', 'r7back','r8back','r9back','r10back','r11back','r12back','r13back','r14back','r1smokev', 'r2smokev','r3smokev','r4smokev','r5smokev','r6smokev','r7smokev','r8smokev','r9smokev', 'r10smokev','r11smokev','r12smokev','r13smokev','r14smokev','r1smoken','r2smoken','r3smoken', 'r4smoken','r5smoken','r6smoken','r7smoken','r8smoken','r9smoken','r10smoken','r11smoken', 'r12smoken','r13smoken','r14smoken','r1drink','r2drink','r3drink','r4drink','r5drink', 'r6drink','r7drink','r8drink','r9drink','r10drink','r11drink','r12drink','r13drink', 'r14drink','r1drinkr','r2drinkr','r3drinkd','r4drinkd','r5drinkd','r6drinkd','r7drinkd', 'r8drinkd','r9drinkd','r10drinkd','r11drinkd','r12drinkd','r13drinkd','r14drinkd', 'r3drinkn','r4drinkn','r5drinkn','r6drinkn','r7drinkn','r8drinkn','r9drinkn','r10drinkn', 'r11drinkn','r12drinkn','r13drinkn','r14drinkn','r1hibp','r2hibp','r3hibp','r4hibp','r5hibp', 'r6hibp','r7hibp','r8hibp','r9hibp','r10hibp','r11hibp','r12hibp','r13hibp','r14hibp', 'r1hibpe','r2hibpe','r3hibpe','r4hibpe','r5hibpe','r6hibpe','r7hibpe','r8hibpe','r9hibpe', 'r10hibpe','r11hibpe','r12hibpe','r13hibpe','r14hibpe','r1diab','r2diab','r3diab','r4diab', 'r5diab','r6diab','r7diab','r8diab','r9diab','r10diab','r11diab','r12diab','r13diab', 'r14diab','r1diabe','r2diabe','r3diabe','r4diabe','r5diabe','r6diabe','r7diabe','r8diabe', 'r9diabe','r10diabe','r11diabe','r12diabe','r13diabe','r14diabe','r1cancr','r2cancr', 'r3cancr','r4cancr','r5cancr','r6cancr','r7cancr','r8cancr','r9cancr','r10cancr','r11cancr', 'r12cancr','r13cancr','r14cancr','r1cancre','r2cancre','r3cancre','r4cancre','r5cancre', 'r6cancre','r7cancre','r8cancre','r9cancre','r10cancre','r11cancre','r12cancre','r13cancre', 'r14cancre','r1lung','r2lung','r3lung','r4lung','r5lung','r6lung','r7lung','r8lung', 'r9lung','r10lung','r11lung','r12lung','r13lung','r14lung','r1lunge','r2lunge','r3lunge', 'r4lunge','r5lunge','r6lunge','r7lunge','r8lunge','r9lunge','r10lunge','r11lunge','r12lunge', 'r13lunge','r14lunge','r1heart','r2heart','r3heart','r4heart','r5heart','r6heart','r7heart', 'r8heart','r9heart','r10heart','r11heart','r12heart','r13heart','r14heart','r1hearte', 'r2hearte','r3hearte','r4hearte','r5hearte','r6hearte','r7hearte','r8hearte','r9hearte', 'r10hearte','r11hearte','r12hearte','r13hearte','r14hearte','r1strok','r2strok','r3strok', 'r4strok','r5strok','r6strok','r7strok','r8strok','r9strok','r10strok','r11strok','r12strok', 'r13strok','r14strok','r1stroke','r2stroke','r3stroke','r4stroke','r5stroke','r6stroke', 'r7stroke','r8stroke','r9stroke','r10stroke','r11stroke','r12stroke','r13stroke','r14stroke', 'r1psych','r2psych','r3psych','r4psych','r5psych','r6psych','r7psych','r8psych','r9psych', 'r10psych','r11psych','r12psych','r13psych','r14psych','r1psyche','r2psyche','r3psyche', 'r4psyche','r5psyche','r6psyche','r7psyche','r8psyche','r9psyche','r10psyche','r11psyche', 'r12psyche','r13psyche','r14psyche','r1arthr','r2arthr','r3arthr','r4arthr','r5arthr', 'r6arthr','r7arthr','r8arthr','r9arthr','r10arthr','r11arthr','r12arthr','r13arthr', 'r14arthr','r1arthre','r2arthre','r3arthre','r4arthre','r5arthre','r6arthre','r7arthre', 'r8arthre','r9arthre','r10arthre','r11arthre','r12arthre','r13arthre','r14arthre', 'r4memry','r5memry','r6memry','r7memry','r8memry','r9memry','r4memrye','r5memrye', 'r6memrye','r7memrye','r8memrye','r9memrye','r10alzhe','r11alzhe','r12alzhe','r13alzhe', 'r14alzhe','r10alzhee','r11alzhee','r12alzhee','r13alzhee','r14alzhee','r10demen','r11demen', 'r12demen','r13demen','r14demen','r10demene','r11demene','r12demene','r13demene','r14demene', 'r1conde','r2conde','r3conde','r4conde','r5conde','r6conde','r7conde','r8conde','r9conde', 'r10conde','r11conde','r12conde','r13conde','r14conde','r8bpsys','r9bpsys','r10bpsys', 'r11bpsys','r12bpsys','r13bpsys','r14bpsys','r8bpdia','r9bpdia','r10bpdia','r11bpdia', 'r12bpdia','r13bpdia','r14bpdia','r8bppuls','r9bppuls','r10bppuls','r11bppuls','r12bppuls', 'r13bppuls','r14bppuls','h1atotn','h1atotb','h2atotn','h2atotb','h3atotn','h3atotb', 'h4atotn','h4atotb','h5atotn','h5atotb','h6atotn','h6atotb','h7atotn','h7atotb','h8atotn', 'h8atotb','h9atotn','h9atotb','h10atotn','h10atotb','h11atotn','h11atotb','h12atotn', 'h12atotb','h13atotn','h13atotb','h14atotn','h14atotb','r1iearn','h1icap','r2iearn','h2icap', 'r3iearn','h3icap','r4iearn','h4icap','r5iearn','h5icap','r6iearn','h6icap','r7iearn', 'h7icap','r8iearn','h8icap','r9iearn','h9icap','r10iearn','h10icap','r11iearn','h11icap', 'r12iearn','h12icap','r13iearn','h13icap','r14iearn','h14icap','r1sayret','r2sayret', 'r3sayret','r4sayret','r5sayret','r6sayret','r7sayret','r8sayret','r9sayret','r10sayret', 'r11sayret','r12sayret','r13sayret','r14sayret','r1retmon','r2retmon','r3retmon','r4retmon', 'r5retmon','r6retmon','r7retmon','r8retmon','r9retmon','r10retmon','r11retmon','r12retmon', 'r13retmon','r14retmon','r1retyr','r2retyr','r3retyr','r4retyr','r5retyr','r6retyr', 'r7retyr','r8retyr','r9retyr','r10retyr','r11retyr','r12retyr','r13retyr','r14retyr', 'r1rplnyr','r2rplnyr','r3rplnyr','r4rplnyr','r5rplnyr','r6rplnyr','r7rplnyr','r8rplnyr', 'r9rplnyr','r10rplnyr','r11rplnyr','r12rplnyr','r13rplnyr','r14rplnyr','r1jjobs', 'r2jjobs','r3jjobs','r4jjobs','r5jjobs','r6jjobs','r7jjobs','r8jjobs','r9jjobs', 'r10jjobs','r11jjobs','r12jjobs','r13jjobs','r14jjobs','r1jyears','r2jyears', 'r3jyears','r4jyears','r5jyears','r6jyears','r7jyears','r8jyears','r9jyears', 'r10jyears','r11jyears','r12jyears','r13jyears','r14jyears','h1hhres','h1child', 'r1dadliv','r1momliv','r1dadage','r1momage','h2hhres','h2child','r2dadliv','r2momliv', 'r2dadage','r2momage','h3hhres','h3child','r3dadliv','r3momliv','r3dadage','r3momage', 'h4hhres','h4child','r4dadliv','r4momliv','r4dadage','r4momage','h5hhres','h5child', 'r5dadliv','r5momliv','r5dadage','r5momage','h6hhres','h6child','r6dadliv','r6momliv', 'r6dadage','r6momage','h7hhres','h7child','r7dadliv','r7momliv','r7dadage','r7momage', 'h8hhres','h8child','r8dadliv','r8momliv','r8dadage','r8momage','h9hhres','h9child', 'r9dadliv','r9momliv','r9dadage','r9momage','h10hhres','h10child','r10dadliv','r10momliv', 'r10dadage','r10momage','h11hhres','h11child','r11dadliv','r11momliv','r11dadage', 'r11momage','h12hhres','h12child','r12dadliv','r12momliv','r12dadage','r12momage', 'h13hhres','h13child','r13dadliv','r13momliv','r13dadage','r13momage','h14hhres', 'h14child','r14dadliv','r14momliv','r14dadage','r14momage']].copy()

HRS_keep.to_csv('HRS Columns Filtered.csv')

In[7]:

########Death dataframe###################### temp_df = HRS_keep.copy()

HRS_Death = pd.DataFrame()

HRS_Death['Death_Year'] = HRS_keep['radyear']

temp_df = temp_df.loc[temp_df['radyear']>0]

In[8]:

temp_df.to_csv('HRS Death.csv')

1.Death Age and Death year

3. Find wave number exit interview

4. All demographic variables for previous wave and ever haves - death wave values not reported - fill death wave values for ever haves by last wave data

5. Sample weights

- Run regression analysis

- Calculate sum(sample wts) and count - for each year, each condition

Death data

Death_df = pd.DataFrame() Death_df[['Household_Id','Death_Year','Death_Age','Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion', 'Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt', 'Cohort_Of_Respondent','Division','Region', 'Marital_Status','Partnership_Status',]] = temp_df[['hhidpn','radyear','radage_y','ragender','raracem','raedyrs', 'raedegrm','raeduc','rarelig','ravetrn', 'rameduc','rafeduc','rabplace','rawtsamp', 'racohbyr','recendiv','recenreg','remstat','rempart',]]

In[9]:

def find_death_wave(df): temp = df.copy() temp['Death_Wave'] = 0 for i in range(df.shape[0]):

print(f'row:{i+1}')

    for j in range(1,15):

print(f'wave:{j}')

        if df[f'r{j}iwstat'].iloc[i]=='5.nr,died this wv':
            temp['Death_Wave'].iloc[i] = j
            break
return temp['Death_Wave']

Death_df['Death_Wave'] = find_death_wave(temp_df)

In[10]:

temp_df['r1drinkn'] = temp_df['r1drinkr'] temp_df['r2drinkn'] = temp_df['r2drinkr']

In[11]:

Death_df['Times_Married'] = None Death_df['Length_Current_Marriage'] = None Death_df['Length_Longest_Marriage'] = None Death_df['Times_Divorced'] = None Death_df['Household_Size'] = None Death_df['Whether_Couple_HH'] = None Death_df['HH_Wt'] = None Death_df['Person_Level_Wt'] = None Death_df['Self_Report_Health'] = None Death_df['Self_Report_Health_Change_3_Category'] = None Death_df['Frequent_Vigorous_Physical_Activity'] = None Death_df['Frequent_Moderate_Physical_Activity'] = None Death_df['Frequent_Light_Physical_Activity'] = None Death_df['BMI'] = None Death_df['Height'] = None Death_df['Weight'] = None Death_df['Back_Problem'] = None Death_df['Smoke'] = None Death_df['Smoke_Now'] = None Death_df['Drink'] = None Death_df['Drink_Days_Per_Week'] = None Death_df['Drink_Per_Day'] = None Death_df['Blood_Pressure'] = None Death_df['Diabetes'] = None Death_df['Cancer'] = None Death_df['Lung_Disease'] = None Death_df['Heart_Problem'] = None Death_df['Stroke'] = None Death_df['Pyschological_Problem'] = None Death_df['Arthritis'] = None Death_df['Alzheimers'] = None Death_df['Dementia'] = None Death_df['Sum_Conditions'] = None Death_df['Systolic_BP'] = None Death_df['Diastolic_BP'] = None Death_df['Pulse'] = None Death_df['Non_Housing_Assets'] = None Death_df['Total_Assets'] = None Death_df['Income'] = None Death_df['Capital_Income_HH'] = None Death_df['Reports_Retired'] = None Death_df['Retirement_Year'] = None Death_df['Planned_Retirement_Year'] = None Death_df['Job_History'] = None Death_df['Job_Years'] = None Death_df['Household_Size'] = None Death_df['Number_Living_Children'] = None Death_df['Father_Alive'] = None Death_df['Mother_Alive'] = None Death_df['Father_Age'] = None Death_df['Mother_Age'] = None

for i in range(Death_df.shape[0]): d_wave = Death_df['Death_Wave'].iloc[i] if d_wave==1: continue d_wave = Death_df['Death_Wave'].iloc[i] Death_df['Times_Married'].iloc[i] = temp_df[f'r{d_wave-1}mrct'].iloc[i] Death_df['Length_Current_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mcurln'].iloc[i] Death_df['Length_Longest_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mlenm'].iloc[i] Death_df['Times_Divorced'].iloc[i] = temp_df[f'r{d_wave-1}mdiv'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhresp'].iloc[i] Death_df['Whether_Couple_HH'].iloc[i] = temp_df[f'h{d_wave-1}cpl'].iloc[i] Death_df['HH_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wthh'].iloc[i] Death_df['Person_Level_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wtresp'].iloc[i] Death_df['Self_Report_Health'].iloc[i] = temp_df[f'r{d_wave-1}shlt'].iloc[i] Death_df['Self_Report_Health_Change_3_Category'].iloc[i] = temp_df[f'r{d_wave-1}hltc3'].iloc[i] Death_df['BMI'].iloc[i] = temp_df[f'r{d_wave-1}bmi'].iloc[i] Death_df['Height'].iloc[i] = temp_df[f'r{d_wave-1}height'].iloc[i] Death_df['Weight'].iloc[i] = temp_df[f'r{d_wave-1}weight'].iloc[i] Death_df['Back_Problem'].iloc[i] = temp_df[f'r{d_wave-1}back'].iloc[i] Death_df['Smoke'].iloc[i] = temp_df[f'r{d_wave-1}smokev'].iloc[i] Death_df['Smoke_Now'].iloc[i] = temp_df[f'r{d_wave-1}smoken'].iloc[i] Death_df['Drink'].iloc[i] = temp_df[f'r{d_wave-1}drink'].iloc[i] Death_df['Drink_Per_Day'].iloc[i] = temp_df[f'r{d_wave-1}drinkn'].iloc[i] Death_df['Blood_Pressure'].iloc[i] = temp_df[f'r{d_wave-1}hibpe'].iloc[i] Death_df['Diabetes'].iloc[i] = temp_df[f'r{d_wave-1}diabe'].iloc[i] Death_df['Cancer'].iloc[i] = temp_df[f'r{d_wave-1}cancre'].iloc[i] Death_df['Lung_Disease'].iloc[i] = temp_df[f'r{d_wave-1}lunge'].iloc[i] Death_df['Heart_Problem'].iloc[i] = temp_df[f'r{d_wave-1}hearte'].iloc[i] Death_df['Stroke'].iloc[i] = temp_df[f'r{d_wave-1}stroke'].iloc[i] Death_df['Pyschological_Problem'].iloc[i] = temp_df[f'r{d_wave-1}psyche'].iloc[i] Death_df['Arthritis'].iloc[i] = temp_df[f'r{d_wave-1}arthre'].iloc[i] Death_df['Sum_Conditions'].iloc[i] = temp_df[f'r{d_wave-1}conde'].iloc[i] Death_df['Non_Housing_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotn'].iloc[i] Death_df['Total_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotb'].iloc[i] Death_df['Income'].iloc[i] = temp_df[f'r{d_wave-1}iearn'].iloc[i] Death_df['Capital_Income_HH'].iloc[i] = temp_df[f'h{d_wave-1}icap'].iloc[i] Death_df['Reports_Retired'].iloc[i] = temp_df[f'r{d_wave-1}sayret'].iloc[i] Death_df['Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}retyr'].iloc[i] Death_df['Planned_Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}rplnyr'].iloc[i] Death_df['Job_History'].iloc[i] = temp_df[f'r{d_wave-1}jjobs'].iloc[i] Death_df['Job_Years'].iloc[i] = temp_df[f'r{d_wave-1}jyears'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhres'].iloc[i] Death_df['Number_Living_Children'].iloc[i] = temp_df[f'h{d_wave-1}child'].iloc[i] Death_df['Father_Alive'].iloc[i] = temp_df[f'r{d_wave-1}dadliv'].iloc[i] Death_df['Mother_Alive'].iloc[i] = temp_df[f'r{d_wave-1}momliv'].iloc[i] Death_df['Father_Age'].iloc[i] = temp_df[f'r{d_wave-1}dadage'].iloc[i] Death_df['Mother_Age'].iloc[i] = temp_df[f'r{d_wave-1}momage'].iloc[i]

if d_wave<=3:
    continue
Death_df['Drink_Days_Per_Week'].iloc[i] = temp_df[f'r{d_wave-1}drinkd'].iloc[i]

if d_wave<=7:
    continue
Death_df['Frequent_Vigorous_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}vgactx'].iloc[i]
Death_df['Frequent_Moderate_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}mdactx'].iloc[i]
Death_df['Frequent_Light_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}ltactx'].iloc[i]

if d_wave<=8:
    continue
Death_df['Systolic_BP'].iloc[i] = temp_df[f'r{d_wave-1}bpsys'].iloc[i]
Death_df['Diastolic_BP'].iloc[i] = temp_df[f'r{d_wave-1}bpdia'].iloc[i]
Death_df['Pulse'].iloc[i] = temp_df[f'r{d_wave-1}bppuls'].iloc[i]

if d_wave<=10:
    continue
Death_df['Alzheimers'].iloc[i] = temp_df[f'r{d_wave-1}alzhee'].iloc[i]
Death_df['Dementia'].iloc[i] = temp_df[f'r{d_wave-1}demene'].iloc[i]

In[ ]:

Death_df.to_csv('HRS Death Cleaned.csv',index = False)

In[10]:

temp_df = HRS_keep.copy()

HRS_keep['radyear'].unique()

In[11]:

Incidence Data

0. For each wave - remove people who died before this wave - exit interview before this wave

1.Interview end Year

2.Ever haves - Each condition

- calculate sum(sample wts) and cout - for each year and each condtions

def find_if_death_wave(df):

temp = df.copy()

temp['Death_Wave'] = None

for i in range(df.shape[0]):

if not temp['radyear'].iloc[i]>0:

continue

for j in range(1,15):

if df[f'r{j}iwstat'].loc[i]=='5.nr,died this wv':

temp['Death_Wave'].iloc[i] = j

return temp['Death_Wave']

temp_df['Death_Wave'] = find_if_death_wave(temp_df)

In[12]:

def count_chronic(name_c,df):

count_c = []

for i in range(1,15):

if f'r{i}{name_c}' not in temp_df.columns:

count_c.append(0)

else:

count_c.append(temp_df[f'r{i}{name_c}'].value_counts()['1.yes'])

return count_c

In[13]:

Chronic_Incidence = pd.DataFrame(list(range(1,15)),columns=[''])

In[14]:

name_c = 'hibpe'

count_c = {}

sumwt_c = {}

Also include Age in this loop

for i in range(1,15):

years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()

for year in years_list:

temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]

for age in range(55,101):

temp1 = temp.loc[temp_df[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

Count

if f'{int(year)}_{age}' not in count_c.keys():

if f'r{i}{name_c}' in temp1.columns:

countc[f'{int(year)}{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']

else:

countc[f'{int(year)}{age}'] = 0

else:

if f'r{i}{name_c}' in temp1.columns:

countc[f'{int(year)}{age}'] += temp1[f'r{i}{name_c}'].value_counts()['1.yes']

else:

countc[f'{int(year)}{age}'] += 0

Weight Sum

if f'{int(year)}_{age}' not in sumwt_c.keys():

if f'r{i}{name_c}' in temp1.columns:

sumwtc[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()

else:

sumwtc[f'{int(year)}{age}'] = 0

else:

if f'r{i}{name_c}' in temp1.columns:

sumwtc[f'{int(year)}{age}'] += temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()

else:

sumwtc[f'{int(year)}{age}'] += 0

count_c_list = []

sumwt_c_list = []

for i in range(1992,2020):

for j in range(55,101):

if f'{i}_{j}' in count_c.keys():

count_c_list.append([i,j,countc[f'{i}{j}']])

if f'{i}_{j}' in sumwt_c.keys():

sumwt_c_list.append([i,j,sumwtc[f'{i}{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age','Count'])

sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age','Sum_Wt'])

sumwt_df = pd.merge(count_df, sumwt_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

In[22]:

Incidence Data

def incidence_chronic(name_c,df): temp_df = df.copy() count_c = {} sumwt_c = {}

#Also include Age in this loop
for i in range(1,15):
    years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()
    for year in years_list:
        temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]
        for age in range(55,101):
            temp1 = temp.loc[temp[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

            #Count
            if f'{int(year)}_{age}' not in count_c.keys():
                if f'r{i}{name_c}' in temp1.columns:
                    count_c[f'{int(year)}_{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']
                else:
                    count_c[f'{int(year)}_{age}'] = 0
            else:
                if f'r{i}{name_c}' in temp1.columns:
                    count_c[f'{int(year)}_{age}'] += temp1[f'r{i}{name_c}'].value_counts()['1.yes']
                else:
                    count_c[f'{int(year)}_{age}'] += 0    

            #Weight Sum
            if f'{int(year)}_{age}' not in sumwt_c.keys():
                if f'r{i}{name_c}' in temp1.columns:
                    sumwt_c[f'{int(year)}_{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
                else:
                    sumwt_c[f'{int(year)}_{age}'] = 0
            else:
                if f'r{i}{name_c}' in temp1.columns:
                    sumwt_c[f'{int(year)}_{age}'] += temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
                else:
                    sumwt_c[f'{int(year)}_{age}'] += 0 

count_c_list = []
sumwt_c_list = []
for i in range(1992,2020):
    for j in range(55,101):
        if f'{i}_{j}' in count_c.keys():
            count_c_list.append([i,j,count_c[f'{i}_{j}']])
        if f'{i}_{j}' in sumwt_c.keys():
            sumwt_c_list.append([i,j,sumwt_c[f'{i}_{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age',f'{name_c}_Count'])
sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age',f'{name_c}_Sum_Wt'])

final_df = pd.merge(count_df, sumwt_df,  how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
return final_df

In[23]:

Male

temp_df = HRS_keep.loc[HRS_keep['ragender']=='1.male'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Male = Incidence.copy()

Female

temp_df = HRS_keep.loc[HRS_keep['ragender']=='2.female'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Female = Incidence.copy()

In[25]:

Incidence.to_csv("Incidence Counts and Sum of Weights.csv",index = False)

In[26]:

Death Counts and Sum Weigts

Male

temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Male = Death_count_df.copy()

Male

temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Female = Death_count_df.copy()

Death_count_df.to_csv("Death Counts and Sum of Weights.csv",index = False)

In[28]:

with pd.ExcelWriter('Counts and Sum Weights Chronic - 8 May 2023.xlsx') as writer: Incidence_Male.to_excel(writer, sheet_name='Incidence_Male') Death_count_Male.to_excel(writer, sheet_name='Death_count_Male') Incidence_Female.to_excel(writer, sheet_name='Incidence_Female') Death_count_Female.to_excel(writer, sheet_name='Death_count_Female')

In[16]:

Total Count for Persons

Incidence Data

def total_person_count(df): temp_df = df.copy() count_c = {} sumwt_c = {}

count_c_death = {}

sumwt_c_death = {}

#Also include Age in this loop
for i in range(1,15):
    years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()
    for year in years_list:
        temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]
        for age in range(55,101):
            temp1 = temp.loc[temp[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

            #Count
            if f'{int(year)}_{age}' not in count_c.keys():
                count_c[f'{int(year)}_{age}'] = temp1.shape[0]

count_cdeath[f'{int(year)}{age}'] = temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']

            else:
                count_c[f'{int(year)}_{age}'] += temp1.shape[0] 

count_cdeath[f'{int(year)}{age}'] += temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']

            #Weight Sum
            if f'{int(year)}_{age}' not in sumwt_c.keys():
                sumwt_c[f'{int(year)}_{age}'] = temp1[f'rawtsamp'].sum()

sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()

            else:
                sumwt_c[f'{int(year)}_{age}'] += temp1[f'rawtsamp'].sum()

sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()

count_c_list = []
sumwt_c_list = []

count_c_death_list = []

sumwt_c_death_list = []

for i in range(1992,2020):
    for j in range(55,101):
        if f'{i}_{j}' in count_c.keys():
            count_c_list.append([i,j,count_c[f'{i}_{j}']])

count_c_death_list.append([i,j,count_cdeath[f'{i}{j}']])

        if f'{i}_{j}' in sumwt_c.keys():
            sumwt_c_list.append([i,j,sumwt_c[f'{i}_{j}']])

sumwt_c_death_list.append([i,j,sumwt_cdeath[f'{i}{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age',f'total_Count'])
sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age',f'total_Sum_Wt'])

count_death_df = pd.DataFrame(count_c_death_list,columns = ['Year','Age',f'Death_Count'])

sumwt_death_df = pd.DataFrame(sumwt_c_death_list,columns = ['Year','Age',f'Death_Sum_Wt'])

final_df = pd.merge(count_df, sumwt_df,  how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

final_df = pd.merge(final_df, count_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

final_df = pd.merge(final_df, sumwt_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

return final_df

person_counts_male = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='1.male']) person_counts_female = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='2.female'])

In[17]:

Total Counts of Death

Male

temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_male = temp_df.copy()

Female

temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_female = temp_df.copy()

In[20]:

mortality_df = pd.merge(person_counts, death_counts, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

mortality_df.columns = ['Year','Age','Total_Count','Total_Sum','Death_Count','Death_Sum']

person_counts.to_csv('Total and Death Person Counts and Sum Wts.csv')

In[21]:

temp_df.to_csv('Total Death Counts.csv')

In[19]:

with pd.ExcelWriter('Total Person and Total Death Counts and Sum 8 may 2023.xlsx') as writer: person_counts_male.to_excel(writer, sheet_name='person_counts_male') death_count_male.to_excel(writer, sheet_name='death_count_male') person_counts_female.to_excel(writer, sheet_name='person_counts_female') death_count_female.to_excel(writer, sheet_name='death_count_female')

In[ ]:

In[ ]:

In[ ]:

In[ ]:

dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[1]:

import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan") df = pd.read_csv('HRS Columns Filtered.csv',index_col = [0]) df.reset_index(inplace=True,drop=True)

In[2]:

df['r1drinkn'] = df['r1drinkr'] df['r2drinkn'] = df['r2drinkr']

In[3]:

df['Is_Dead'] = [1 if df['radyear'].iloc[i].item()>0 else 0 for i in range(df.shape[0])]

In[45]:

df['Is_Dead'].sum()

In[4]:

df['Death_Wave'] = 100 for i in range(df.shape[0]): if df['Is_Dead'].iloc[i].item()==1: for j in range(1,15): if df[f'r{j}iwstat'].iloc[i]=='5.nr,died this wv': df['Death_Wave'].iloc[i] = j

In[47]:

df['Death_Wave'].value_counts()

In[60]:

Extended_Df = pd.DataFrame(columns = ['Household_Id','Is_Dead','Death_Wave','Death_Year','Death_Age', 'Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion','Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt','Cohort_Of_Respondent', 'Division_At_Birth',

'Marital_Status','Partnership_Status',

        'Wave_No',
        'Times_Married','Length_Current_Marriage','Length_Longest_Marriage','Times_Divorced',
        'Household_Size','Whether_Couple_HH','HH_Wt','Person_Level_Wt','Self_Report_Health',
        'Self_Report_Health_Change_3_Category',

'Frequent_Vigorous_Physical_Activity','Frequent_Moderate_Physical_Activity','Frequent_Light_Physical_Activity',

        'BMI','Height','Weight','Back_Problem','Smoke','Smoke_Now','Drink','Drink_Days_Per_Week',
        'Drink_Per_Day','Blood_Pressure','Diabetes','Cancer','Lung_Disease','Heart_Problem','Stroke',
        'Pyschological_Problem','Arthritis',

'Alzheimers','Dementia',

        'Sum_Conditions',

'Systolic_BP','Diastolic_BP','Pulse',

        'Non_Housing_Assets','Total_Assets','Income','Capital_Income_HH','Reports_Retired',
        'Retirement_Year','Planned_Retirement_Year','Job_History','Job_Years','Household_Size',
        'Number_Living_Children','Father_Alive','Mother_Alive','Father_Age','Mother_Age',
        'Marital_Status','Partnership_Status','Current_Age'])

count = 0 for i in range(df.shape[0]):

for i in range(10):

print(f'{i}/{df.shape[0]-1}')
if df['Death_Wave'].iloc[i]>2:

    for wave in range(3,15):          
        if df['Death_Wave'].iloc[i]>wave:

print(df['hhidpn'].iloc[i],df['radage_y'].iloc[i])

print(f'{i} {wave}')

            c_list = []
            c_list.extend(df[['hhidpn','Is_Dead','Death_Wave','radyear','radage_y','ragender','raracem','raedyrs',
            'raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','rawtsamp',
                      'racohbyr','rabplace']].iloc[i])
            b_list = []

            b_list = [wave,
                      df[f'r{wave}mrct'].iloc[i],df[f'r{wave}mcurln'].iloc[i],df[f'r{wave}mlenm'].iloc[i],
                      df[f'r{wave}mdiv'].iloc[i],df[f'h{wave}hhresp'].iloc[i],df[f'h{wave}cpl'].iloc[i],
                      df[f'r{wave}wthh'].iloc[i],df[f'r{wave}wtresp'].iloc[i],df[f'r{wave}shlt'].iloc[i],
                      df[f'r{wave}hltc3'].iloc[i],
        #               df[f'r{wave}vgactx'].iloc[i],df[f'r{wave}mdactx'].iloc[i],
        #               df[f'r{wave}ltactx'].iloc[i],
                      df[f'r{wave}bmi'].iloc[i],df[f'r{wave}height'].iloc[i],
                      df[f'r{wave}weight'].iloc[i],df[f'r{wave}back'].iloc[i],df[f'r{wave}smokev'].iloc[i],
                      df[f'r{wave}smoken'].iloc[i],df[f'r{wave}drink'].iloc[i],df[f'r{wave}drinkd'].iloc[i],
                      df[f'r{wave}drinkn'].iloc[i],df[f'r{wave}hibpe'].iloc[i],df[f'r{wave}diabe'].iloc[i],
                      df[f'r{wave}cancre'].iloc[i],df[f'r{wave}lunge'].iloc[i],df[f'r{wave}hearte'].iloc[i],
                      df[f'r{wave}stroke'].iloc[i],df[f'r{wave}psyche'].iloc[i],df[f'r{wave}arthre'].iloc[i],
        #               df[f'r{wave}alzhee'].iloc[i],df[f'r{wave}demene'].iloc[i],
                      df[f'r{wave}conde'].iloc[i],
        #               df[f'r{wave}bpsys'].iloc[i],df[f'r{wave}bpdia'].iloc[i],df[f'r{wave}bppuls'].iloc[i],
                      df[f'h{wave}atotn'].iloc[i],df[f'h{wave}atotb'].iloc[i],df[f'r{wave}iearn'].iloc[i],
                      df[f'h{wave}icap'].iloc[i],df[f'r{wave}sayret'].iloc[i],df[f'r{wave}retyr'].iloc[i],
                      df[f'r{wave}rplnyr'].iloc[i],df[f'r{wave}jjobs'].iloc[i],df[f'r{wave}jyears'].iloc[i],
                      df[f'h{wave}hhres'].iloc[i],df[f'h{wave}child'].iloc[i],df[f'r{wave}dadliv'].iloc[i],
                      df[f'r{wave}momliv'].iloc[i],df[f'r{wave}dadage'].iloc[i],df[f'r{wave}momage'].iloc[i],

                      df[f'r{wave}mstat'].iloc[i],df[f'r{wave}mpart'].iloc[i],df[f'r{wave}agey_e'].iloc[i]]

            c_list.extend(b_list)

print(f'{i} {wave} {len(c_list)}')

            Extended_Df.loc[count] = c_list
            count = count + 1

In[61]:

Extended_Df['Death_Age'].head(25)

In[6]:

Extended_Df.to_csv('HRS Expanded v2.csv')

In[63]:

Extended_Df = pd.read_csv('HRS Expanded v2.csv', index_col = 0)

In[64]:

Extended_Df = Extended_Df.loc[:,~Extended_Df.columns.duplicated()].copy()

Extended_Df.drop('Household_Id', inplace=True, axis=1)

Extended_Df.drop('Sample_Wt', inplace=True, axis=1) Extended_Df.drop('HH_Wt', inplace=True, axis=1) Extended_Df.drop('Person_Level_Wt', inplace=True, axis=1)

Extended_Df.drop('Self_Report_Health_Change_3_Category', inplace=True, axis=1) Extended_Df.drop('Self_Report_Health', inplace=True, axis=1)

Extended_Df.drop('Systolic_BP', inplace=True, axis=1)

Extended_Df.drop('Pulse', inplace=True, axis=1)

Extended_Df.drop('Diastolic_BP', inplace=True, axis=1)

Extended_Df.drop('Dementia', inplace=True, axis=1)

Extended_Df.drop('Alzheimers', inplace=True, axis=1)

Extended_Df.drop('Back_Problem', inplace=True, axis=1)

Extended_Df.drop('Frequent_Vigorous_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Frequent_Moderate_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Frequent_Light_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Planned_Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Household_Size.1', inplace=True, axis=1)

In[35]:

Extended_Df1 = Extended_Df.copy()

Extended_Df.iloc[:,:].isna().sum()

Extended_Df['Current_Age'].value_counts()

Extended_Df.shape

In[65]:

Extended_Df['Death_Age'].fillna(value = 200, inplace = True) Extended_Df['Race'].fillna(value = '1.white/caucasian', inplace = True)

Extended_Df.loc[Extended_Df['Highest_Degree']=='1.ged','Highest_Degree'] = '3.hs/ged' Extended_Df.loc[Extended_Df['Highest_Degree']=='2.hs','Highest_Degree'] = '3.hs/ged'

Extended_Df.loc[Extended_Df['Years_Education']=='17.17+ yrs','Years_Education'] = '17' Extended_Df.loc[Extended_Df['Years_Education']=='0.none','Years_Education'] = '0' Extended_Df['Years_Education'].fillna(value = '-1', inplace = True)

Extended_Df['Education_Category'].fillna(value = '1.lt high-school', inplace = True)

Extended_Df['Religion'].fillna(value = '5.other', inplace = True)

Extended_Df['Veteran_Status'].fillna(value = '0.no', inplace = True)

Extended_Df['Division_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)

Extended_Df['Cohort_Of_Respondent'].fillna(value = '0.not in any cohort', inplace = True)

Extended_Df['BMI'].fillna(value = Extended_Df['BMI'].mean(), inplace = True) Extended_Df['Height'].fillna(value = Extended_Df['Height'].mean(), inplace = True) Extended_Df['Weight'].fillna(value = Extended_Df['Weight'].mean(), inplace = True) Extended_Df['Length_Current_Marriage'].fillna(value = Extended_Df['Length_Current_Marriage'].mean(), inplace = True) Extended_Df['Length_Longest_Marriage'].fillna(value = Extended_Df['Length_Longest_Marriage'].mean(), inplace = True) Extended_Df['Times_Married'].fillna(value = int(Extended_Df['Times_Married'].mean()), inplace = True) Extended_Df['Household_Size'].fillna(value = int(Extended_Df['Household_Size'].mean()), inplace = True)

Extended_Df['Whether_Couple_HH'].fillna(value = 'No value provided', inplace = True)

Extended_Df['Mother_Years_Of_Education'].fillna(value = '-1', inplace = True) Extended_Df['Father_Years_Of_Education'].fillna(value = '-1', inplace = True)

Extended_Df.loc[Extended_Df['Division_At_Birth']=='10.us/na division','Division_At_Birth'] = '11.not us/inc us terr' Extended_Df['Division_At_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)

Extended_Df.loc[Extended_Df['Marital_Status']=='4.separated','Marital_Status'] = '6.separated/divorced' Extended_Df.loc[Extended_Df['Marital_Status']=='5.divorced','Marital_Status'] = '6.separated/divorced' Extended_Df['Marital_Status'].fillna(value = 'No value given', inplace = True)

Extended_Df['Partnership_Status'].fillna(value = '0. no', inplace = True)

Extended_Df['Times_Divorced'].fillna(value = 0.0, inplace = True)

Extended_Df['Smoke'].fillna(value = '1.yes', inplace = True) Extended_Df['Smoke_Now'].fillna(value = '0.no', inplace = True)

Extended_Df['Drink'].fillna(value = '0.no', inplace = True) Extended_Df['Drink_Per_Day'].fillna(value = '0.0 or doesnt drink', inplace = True) Extended_Df['Drink_Days_Per_Week'].fillna(value = '0.0 or doesnt drink', inplace = True)

Extended_Df['Reports_Retired'].fillna(value = 'No value given', inplace = True)

Extended_Df['Reports_Retired'].fillna(value = 'No value given', inplace = True)

Extended_Df['Father_Alive'].fillna(value = '0.no', inplace = True) Extended_Df['Mother_Alive'].fillna(value = '0.no', inplace = True)

Extended_Df['Father_Age'].fillna(value = int(Extended_Df['Father_Age'].mean()), inplace = True) Extended_Df['Mother_Age'].fillna(value = int(Extended_Df['Mother_Age'].mean()), inplace = True)

Extended_Df['Number_Living_Children'].fillna(value = int(Extended_Df['Number_Living_Children'].mean()), inplace = True)

Extended_Df.loc[Extended_Df['Drink_Per_Day']=='0.0 or doesnt drink','Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=="0.doesn't drink",'Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='1.lt 1/day','Drink_Per_Day'] = '1' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='2.1-2/day','Drink_Per_Day'] = '2' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='3.3-4/day','Drink_Per_Day'] = '3' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='4.5+/day','Drink_Per_Day'] = '4' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='99.all day','Drink_Per_Day'] = '45' Extended_Df['Drink_Per_Day'] = pd.to_numeric(Extended_Df['Drink_Per_Day'])

Extended_Df.loc[Extended_Df['Drink_Days_Per_Week']=='0.0 or doesnt drink','Drink_Days_Per_Week'] = '0' Extended_Df['Drink_Days_Per_Week'] = pd.to_numeric(Extended_Df['Drink_Days_Per_Week'])

Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='17.17+ yrs','Father_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='0.none','Father_Years_Of_Education'] = '0' Extended_Df['Father_Years_Of_Education'] = pd.to_numeric(Extended_Df['Father_Years_Of_Education'])

Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='17.17+ yrs','Mother_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='0.none','Mother_Years_Of_Education'] = '0' Extended_Df['Mother_Years_Of_Education'] = pd.to_numeric(Extended_Df['Mother_Years_Of_Education'])

Extended_Df['Years_Education'] = pd.to_numeric(Extended_Df['Years_Education'])

In[84]:

Extended_Df = Extended_Df[Extended_Df['Blood_Pressure'].notna()]

Extended_Df.iloc[:,:].isna().sum()

Extended_Df['Death_Age'].value_counts()

Extended_Df.shape

In[ ]:

In[85]:

Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==1] Non_Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==0]

In[86]:

print(Extended_Df.shape) print(Death_Df.shape) print(Non_Death_Df.shape)

In[97]:

from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score

In[105]:

labels = np.array(Death_Df['Death_Age']) model_df = Death_Df.drop('Death_Age', axis = 1) train, test, train_labels, test_labels = train_test_split(model_df,labels,test_size = 0.2)

In[106]:

train.iloc[:,:].isna().sum()

In[107]:

Other_Data_Train = train[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]

train = train.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)

train = pd.get_dummies(train) train = train.astype('float64') len(train.columns.to_list())

In[108]:

Other_Data_Test = test[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]

test = test.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)

test = pd.get_dummies(test) test = test.astype('float64') len(test.columns.to_list())

In[109]:

Other_Data_Non_Death_Df = Non_Death_Df[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age']]

Non_Death_Df = Non_Death_Df.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age'],axis=1)

Non_Death_Df = pd.get_dummies(Non_Death_Df) Non_Death_Df = Non_Death_Df.astype('float64') print(len(Non_Death_Df.columns.to_list()))

In[ ]:

In[110]:

rf = RandomForestRegressor(n_estimators = 1000, random_state = 42,verbose=True) rf.fit(train, train_labels)

In[111]:

predictions = rf.predict(train) errors = abs(predictions - train_labels) mape = 100 * (errors / train_labels) accuracy = 100 - np.mean(mape) accuracy

In[112]:

R2 = r2_score(train_labels, predictions) Adj_R2 = 1-(1-R2)*(train.shape[0]-1)/(train.shape[0]-train.shape[1]-1) Adj_R2

In[113]:

predictions_test = rf.predict(test) test_errors = abs(predictions_test - test_labels) mape = 100 * (test_errors / test_labels) accuracy = 100 - np.mean(mape) accuracy

In[114]:

importances = list(rf.featureimportances) feature_importances = [(feature, round(importance, 4)) for feature, importance in zip(train.columns, importances)] feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True) feature_importances

In[116]:

test = pd.concat([test, Other_Data_Test], axis=1) test['Actual_Death_Age'] = test_labels test['Predicted_Death_Age'] = predictions_test test.head() test.to_excel("Death Model Tests Predicted v2.xlsx")

In[118]:

predictions_non_death = rf.predict(Non_Death_Df) Non_Death_Df = pd.concat([Non_Death_Df, Other_Data_Non_Death_Df], axis=1) Non_Death_Df['Predicted_Death_Age'] = predictions_non_death Non_Death_Df.to_excel("Death Model Non Death Predicted v2.xlsx")

In[ ]:

dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[3]:

import pandas as pd import numpy as np import datetime as dt from dateutil.relativedelta import relativedelta import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D import time

In[4]:

Federal Tax Values

STD_Single = 12400 STD_HOH = 18650 STD_MFJ = 24800 STD_MFS = 12400

PE_Single = 0 PE_HOH = 0 PE_MFJ = 0 PE_MFS = 0 DependentExemption = 0

Single, HOH, MFJ, MFS

Inc_MTR = [.1,.12,.22,.24,.32,.35,.37]

Inc_Br = [] Inc_Br.append([9875,40125,85525,163300,207350,518400,10000000000]) Inc_Br.append([14101,53701,85501,163300,207350,518400,10000000000]) Inc_Br.append([19750,80250,171050,326600,414700,622050,10000000000]) Inc_Br.append([9875,40125,85525,163300,207350,311025,10000000000])

Q_Br = [] Q_MTR = [0,.15,.2] Q_Br.append([0,40000,441450]) Q_Br.append([0,53600,469050]) Q_Br.append([0,80000,496600]) Q_Br.append([0,40000,248300])

Min_EarnedIncome_ChildCredit = 2500 Percentage_EIRefundable_ChildCredit = 0.15 Max_Refundable_ChildCredit = 1400

FICA Tax rates, thresholds and variables

SS_WithholdingRate = 0.062 MedicareRate = 0.0145 HighIncomeMedicareRate = 0.0235 SS_WithholdingThreshold = 128400 MedicareRateThreshold_Single = 200000 MedicareRateThreshold_HOH = 200000 MedicareRateThreshold_MFJ = 250000 MedicareRateThreshold_MFS = 125000 MinSEIforTaxation = 400

Net Investment Income Tax threshold amount and variables

NIIT_Threshold_Single = 200000 NIIT_Threshold_HOH = 200000 NIIT_Threshold_MFJ = 250000 NIIT_Threshold_MFS = 125000 NIIT_TaxRate = 0.038

Social Security taxation

SS_CombinedIncome_Contribution = 0.5 SS_Taxation_Rate1 = 0.5 SS_Taxation_Rate2 = 0.85

Social security lower limit on Combined income for taxation

TSS_LL_Single = 25000 TSS_LL_HOH = 32000 TSS_LL_MFJ = 32000 TSS_LL_MFS = 25000

Social security upper limit on Combined income for taxation

TSS_UL_Single = 34000 TSS_UL_HOH = 44000 TSS_UL_MFJ = 44000 TSS_UL_MFS = 34000

Exemption lower limit on AGI

ELL_Single = 266700 ELL_HOH = 293350 ELL_MFJ = 320000 ELL_MFS = 160000

Exemption upper limit on AGI

EUL_Single = 389200 EUL_HOH = 415850 EUL_MFJ = 442500 EUL_MFS = 221250

Itemized Deduction limits

ItemizedDeduction_Deflator_AGI = 0.03 ItemizedDeduction_Deflator = 0.8

Tax year 2018 Capital loss limit

CLL_Single = 3000 CLL_HOH = 3000 CLL_MFJ = 3000 CLL_MFS = 1500

Tax year 2018 Standard deduction Old Age amounts

STD_Single_HOH_MFS_65 = 1650 STD_MFJ_65 = 1300 STD_MFJ_65_65 = 2600

In[139]:

def IncomeTax(FilingStatus, Age, S_Age, WageIncome, SelfEmployedIncome, NetAdjustedCapitalGains, Qual_Dividend, Ord_UnearnedIncome, SocialSecurity,Adjustments, ItemizedDeduction,Child_Dependent, Non_Child_Dependent):

if (FilingStatus=="Single"):
    NFILIng = 1
    MedicareRateThreshold = MedicareRateThreshold_Single
    StandardDeduction = STD_Single
    PersonalExemption = PE_Single
    ExemptionLowerLimit = ELL_Single
    ExemptionUpperLimit = EUL_Single
    TaxableSSLowerLimit = TSS_LL_Single
    TaxableSSUpperLimit = TSS_UL_Single
    CapitalLossLimit = CLL_Single
    NIIT_Threshold = NIIT_Threshold_Single
elif (FilingStatus=="HOH"):
    NFILIng = 2
    MedicareRateThreshold = MedicareRateThreshold_HOH
    StandardDeduction = STD_HOH
    PersonalExemption = PE_HOH
    ExemptionLowerLimit = ELL_HOH
    ExemptionUpperLimit = EUL_HOH
    TaxableSSLowerLimit = TSS_LL_HOH
    TaxableSSUpperLimit = TSS_UL_HOH
    CapitalLossLimit = CLL_HOH
    NIIT_Threshold = NIIT_Threshold_HOH
elif (FilingStatus=="MFJ"):
    NFILIng = 3
    MedicareRateThreshold = MedicareRateThreshold_MFJ
    StandardDeduction = STD_MFJ
    PersonalExemption = PE_MFJ
    ExemptionLowerLimit = ELL_MFJ
    ExemptionUpperLimit = EUL_MFJ
    TaxableSSLowerLimit = TSS_LL_MFJ
    TaxableSSUpperLimit = TSS_UL_MFJ
    CapitalLossLimit = CLL_MFJ
    NIIT_Threshold = NIIT_Threshold_MFJ
elif (FilingStatus=="MFS"):
    NFILIng = 4
    MedicareRateThreshold = MedicareRateThreshold_MFS
    StandardDeduction = STD_MFS
    PersonalExemption = PE_MFS
    ExemptionLowerLimit = ELL_MFS
    ExemptionUpperLimit = EUL_MFS
    TaxableSSLowerLimit = TSS_LL_MFS
    TaxableSSUpperLimit = TSS_UL_MFS
    CapitalLossLimit = CLL_MFS
    NIIT_Threshold = NIIT_Threshold_MFS

InflationFactor = 1
SocialSecurityTax = SS_WithholdingRate * min(WageIncome, SS_WithholdingThreshold * InflationFactor)
MedicareTax = min(WageIncome, MedicareRateThreshold) * MedicareRate + max(0, WageIncome - MedicareRateThreshold) * HighIncomeMedicareRate
FICATax = SocialSecurityTax + MedicareTax

if (SelfEmployedIncome * (1 - (SS_WithholdingRate + MedicareRate)) <= MinSEIforTaxation): 
    SelfEmploymentTax = 0
else:
    SelfEmploymentTax = 2 * SS_WithholdingRate * min(max(SS_WithholdingThreshold * InflationFactor - WageIncome, 0), SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))) + 2 * MedicareRate * SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))

EarnedIncome = WageIncome + SelfEmployedIncome
Qual_UnearnedIncome = max(-CapitalLossLimit, NetAdjustedCapitalGains) + Qual_Dividend
GrossIncome = EarnedIncome + Ord_UnearnedIncome + Qual_UnearnedIncome - Qual_Dividend + SocialSecurity
AdjustedGrossIncome = GrossIncome - Adjustments
NonTaxableInterest = 0

print(EarnedIncome,Qual_UnearnedIncome,Ord_UnearnedIncome,GrossIncome,AdjustedGrossIncome)

if (SocialSecurity == 0):
    TaxableSS = 0
else:
    PostRetirementCombinedIncome = (AdjustedGrossIncome - SocialSecurity) + NonTaxableInterest + SS_CombinedIncome_Contribution * SocialSecurity
    SS_85 = min(SocialSecurity, max(PostRetirementCombinedIncome - TaxableSSUpperLimit, 0)) / SocialSecurity

    if (PostRetirementCombinedIncome < TaxableSSLowerLimit):
        SS_50 = 0
    else: 
        SS_50 = min(TaxableSSUpperLimit - TaxableSSLowerLimit, PostRetirementCombinedIncome - TaxableSSLowerLimit) / SocialSecurity

    TaxableSS = SS_85 * SocialSecurity * SS_Taxation_Rate2 + SS_50 * SocialSecurity * SS_Taxation_Rate1
    TaxableSS = min(TaxableSS, SocialSecurity * SS_Taxation_Rate2)

GrossIncome = GrossIncome - SocialSecurity + TaxableSS
AdjustedGrossIncome = GrossIncome - Adjustments

print(TaxableSS,GrossIncome,AdjustedGrossIncome)

if (FilingStatus == "MFJ"):
    if (Age >= 65 and S_Age >= 65): 
        StandardDeduction_F = ((StandardDeduction + STD_MFJ_65_65) * InflationFactor)
    elif ((Age < 65 and S_Age >= 65) or (Age >= 65 and S_Age < 65)):
        StandardDeduction_F = ((StandardDeduction + STD_MFJ_65) * InflationFactor)
    else: 
        StandardDeduction_F = (StandardDeduction * InflationFactor)
else:
    if (Age >= 65): 
        StandardDeduction_F = ((StandardDeduction + STD_Single_HOH_MFS_65) * InflationFactor)
    else: 
        StandardDeduction_F = (StandardDeduction * InflationFactor)

ItemizedDeduction_Phaseout = 0

if (AdjustedGrossIncome > (ExemptionLowerLimit * InflationFactor)): 
    ItemizedDeduction_Phaseout = min(ItemizedDeduction_Deflator_AGI * (AdjustedGrossIncome - (ExemptionLowerLimit * InflationFactor)), ItemizedDeduction_Deflator * (ItemizedDeduction))
else: 
    ItemizedDeduction_Phaseout = 0

Deduction = max(StandardDeduction, StandardDeduction_F, ItemizedDeduction - ItemizedDeduction_Phaseout)
Exemption = 0

print(Deduction)

#Taxable Income calculation
if ((Deduction + Exemption) > (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend + TaxableSS - Adjustments)): 
    OtherTaxableIncome = 0
    Qual_UnearnedIncome = Qual_UnearnedIncome - (Deduction + Exemption - (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS))
else: 
    OtherTaxableIncome = EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS - Deduction - Exemption

print(OtherTaxableIncome,EarnedIncome,Ord_UnearnedIncome,Qual_Dividend,TaxableSS,Deduction)

if (Qual_UnearnedIncome < 0):
    OtherTaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)
    Qual_UnearnedIncome = 0

TaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)

print(OtherTaxableIncome,TaxableIncome)

#Tax calculation for Qualified Unearned Income(LTCG, Qualified dividends)
if (TaxableIncome <= (Q_Br[NFILIng-1][1] * InflationFactor)): 
    CapitalGainTax = 0
elif ((TaxableIncome > (Q_Br[NFILIng-1][1] * InflationFactor)) and (TaxableIncome <= (Q_Br[NFILIng-1][2] * InflationFactor))):
    if (OtherTaxableIncome <= 0): 
        CapitalGainTax = Q_MTR[1] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][1] * InflationFactor))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)): 
        CapitalGainTax = ((Q_Br[NFILIng-1][1]* InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome))
    else: 
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * Qual_UnearnedIncome
elif TaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor):
    if (OtherTaxableIncome <= 0):
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - (Q_Br[NFILIng-1][1] * InflationFactor)) + Q_MTR[2] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][2] * InflationFactor))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)):
        CapitalGainTax = ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Q_Br[NFILIng-1][2]* InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor - OtherTaxableIncome) +(Q_Br[NFILIng-1][2] * InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor)))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][2]* InflationFactor)):
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome))
    elif (OtherTaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor)):
        CapitalGainTax = Qual_UnearnedIncome * Q_MTR[2]

print(CapitalGainTax)

#Tax on Other Taxable Income - EarnedIncome + Ord_UnearnedIncome
counter = 1
Tax1 = 0
if (OtherTaxableIncome < 0): 
    Tax1 = 0
elif (OtherTaxableIncome <= (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):
    Tax1 = OtherTaxableIncome * Inc_MTR[counter-1]
else: 
    while (OtherTaxableIncome > (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):

print(Inc_Br[NFILIng-1][counter-1])

        if (counter > 1): 

print(counter,Tax1,Inc_Br[NFILIng-1][counter-1],Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

            Tax1 = Tax1 + (Inc_Br[NFILIng-1][counter-1] - Inc_Br[NFILIng-1][counter-2]) * Inc_MTR[counter-1] * InflationFactor
        else:

print(counter,Tax1,Inc_Br[NFILIng-1][counter-1],Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

            Tax1 = Tax1 + Inc_Br[NFILIng-1][counter-1] * Inc_MTR[counter-1] * InflationFactor
        #print(Tax1)    
        counter = counter + 1

print(counter,Tax1,Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

    Tax1 = Tax1 + (OtherTaxableIncome - Inc_Br[NFILIng-1][counter-2] * InflationFactor) * Inc_MTR[counter-1]

print(Tax1)

print(NFILIng-1,Inc_Br[NFILIng-1],Inc_MTR)

print(OtherTaxableIncome,Inc_Br[NFILIng-1][counter-1],Inc_MTR[counter-1],Tax1)

#Federal Tax, Federal marginal rate and next Federal marginal rate
FederalTax = CapitalGainTax + Tax1

return FederalTax
#return FederalTax,Tax1,CapitalGainTax,TaxableIncome,Inc_Br[NFILIng-1][0]

In[140]:

IncomeTax(FilingStatus, Age, S_Age, WageIncome, SelfEmployedIncome, NetAdjustedCapitalGains, Qual_Dividend,

Ord_UnearnedIncome, SocialSecurity,Adjustments, ItemizedDeduction,Child_Dependent, Non_Child_Dependent)

IncomeTax("MFJ", 71, 71, 100000, 0, 10150.75, 517.5,42956.66,40000,0,0,0,0)

In[141]:

RMD_Divisor = pd.DataFrame(data = [[70,27.4],[71,26.5],[72,25.6],[73,24.7],[74,23.8],[75,22.9],[76,22],[77,21.2], [78,20.3],[79,19.5],[80,18.7],[81,17.9],[82,17.1],[83,16.3],[84,15.5],[85,14.8], [86,14.1],[87,13.4],[88,12.7],[89,12],[90,11.4],[91,10.8],[92,10.2],[93,9.6], [94,9.1],[95,8.6],[96,8.1],[97,7.6],[98,7.1],[99,6.7],[100,6.3],[101,5.9], [102,5.5],[103,5.2],[104,4.9],[105,4.5],[106,4.2],[107,3.9],[108,3.7],[109,3.4], [110,3.1],[111,2.9],[112,2.6],[113,2.4],[114,2.1],[115,1.9],[116,1.9],[117,1.9], [118,1.9],[119,1.9],[120,1.9],[121,1.9],[122,1.9],[123,1.9],[124,1.9],[125,1.9], [126,1.9],[127,1.9]], columns = ['Age','Divisor'])

In[183]:

def Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total, Ret_Interest, Ret_Dividend, PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD, TotalATWDReq,WHPer, SocialSecurity, WageIncome, UnearnedIncome, WageWH, UIncWH, LastTax,LastWH, CFLast):

if (Age>=70):
    RMD_D_C = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==Age,'Divisor'])[0]
    RMD_C = TradBal_C/RMD_D_C
else:
    RMD_C = 0
    RMD_S = 0

if (S_Age>=70):
    RMD_D_S = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==S_Age,'Divisor'])[0]
    RMD_S = TradBal_S/RMD_D_S
else:
    RMD_S = 0

print(RMD_C,RMD_S)

TradPerPerm = TradPer
TaxablePerPerm = TaxablePer
RothPerPerm = RothPer

if ((TradBal_C+TradBal_S)>0):
    Trad_CFraction = TradBal_C/(TradBal_C+TradBal_S)
else:
    Trad_CFraction = 0

TradBal_C = TradBal_C * (1+Ret_Total)
TradBal_S = TradBal_S * (1+Ret_Total)
Interest = TaxableBal * Ret_Interest
Dividend = TaxableBal * Ret_Dividend
TaxableCB = max(TaxableCB + Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT,0)
Distributions = Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT
TaxableBal = TaxableBal * (1+Ret_Total)
RothBal = RothBal * (1+Ret_Total)

print(TradBal_C,TradBal_S,Interest,Dividend,TaxableCB,Distributions,TaxableBal,RothBal)

TradBal = TradBal_C + TradBal_S
TotalBal = TradBal + TaxableBal + RothBal

if (Strategy == "PRORATA"):
    TradPer = TradBal/TotalBal
    TaxablePer = TaxableBal/TotalBal
    RothBal = RothBal/TotalBal

TrueUp = LastTax - LastWH

if (ATWD==0):
    ATWD = -min(TrueUp, WageIncome-WageWH+UnearnedIncome-UIncWH + SocialSecurity - TotalATWDReq)

if (TradPer==0 and TaxablePer==0 and RothPer==0):
    TradPer = 1/3
    TaxablePer = 1/3
    RothPer = 1/3

print(TradPer,TaxablePer,RothPer)

if (TradPer>0 and TradBal<0.1):
    TradPer = 0
    if ((TaxablePer+RothPer) == 0):
        TaxablePer = 0.5
        RothPer = 0.5
    else:
        temp = TaxablePer + RothPer
        TaxablePer = TaxablePer/temp
        RothPer = RothPer/temp

if (TaxablePer>0 and TaxableBal<0.1):
    TaxablePer = 0
    if ((TradPer+RothPer) == 0):
        TradPer = 0.5
        RothPer = 0.5
    else:
        temp = TradPer + RothPer
        TradPer = TradPer/temp
        RothPer = RothPer/temp

if (RothPer>0 and RothBal<0.1):
    RothPer = 0
    if ((TaxablePer+TradPer) == 0):
        TaxablePer = 0.5
        TradPer = 0.5
    else:
        temp = TaxablePer + TradPer
        TaxablePer = TaxablePer/temp
        TradPer = TradPer/temp

print(TradPer,TaxablePer,RothPer)

PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)

print(PreTaxWD,TradBal,TaxableBal,RothBal,TradPer,TaxablePer,RothPer)

if ((TradPer*PreTaxWD)>TradBal):
    PreTaxWD = max(0,ATWD+TrueUp+TradBal*WHPer)
    R_Adj = TradPer - TradBal/PreTaxWD
    TradPer = TradBal/PreTaxWD
    if ((TaxablePer+RothPer) == 0):
        TaxablePer = TaxablePer + R_Adj/2
        RothPer = 1 - TradPer - TaxablePer
    else:
        TaxablePer = TaxablePer + R_Adj * TaxablePer/(TaxablePer+RothPer)
        RothPer = 1 - TradPer - TaxablePer

print(TradPer,TaxablePer,RothPer)

if ((TaxablePer*PreTaxWD)>TaxableBal):
    if ((TradPer+RothPer) == 0):
        PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer/2) + TaxableBal)
        R_Adj = TaxablePer - TaxableBal/PreTaxWD
        TaxablePer = TaxableBal/PreTaxWD
        TradPer = TradPer + R_Adj/2
        RothPer = 1 - TradPer - TaxablePer
    else:
        PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer*TradPer/(TradPer+RothPer)) + TaxableBal)
        R_Adj = TaxablePer - TaxableBal/PreTaxWD
        TaxablePer = TaxableBal/PreTaxWD
        TradPer = TradPer + R_Adj*TradPer/(TradPer+RothPer)
        RothPer = 1 - TradPer - TaxablePer

print(TradPer,TaxablePer,RothPer)

if ((RothPer*PreTaxWD)>RothBal):
    if ((TaxablePer+TradPer) == 0):
        PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer/2) + RothBal)
        R_Adj = RothPer - RothBal/PreTaxWD
        RothPer = RothBal/PreTaxWD
        TradPer = TradPer + R_Adj/2
        TaxablePer = 1 - TradPer - RothPer
    else:
        PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer*TradPer/(TradPer+TaxablePer)) + RothBal)
        R_Adj = RothPer - RothBal/PreTaxWD
        RothPer = RothBal/PreTaxWD
        TradPer = TradPer + R_Adj*TradPer/(TradPer+TaxablePer)
        TaxablePer = 1 - TradPer - RothPer

PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)

WdTrad = PreTaxWD * TradPer
WdTaxable = PreTaxWD * TaxablePer
WdRoth = PreTaxWD * RothPer

print(TradPer,TaxablePer,RothPer,PreTaxWD,WdTrad,WdTaxable,WdRoth)

if ((RMD_C+RMD_S)>WdTrad):
    WdTrad = RMD_C + RMD_S
    if ((TaxablePerPerm+RothPerPerm)>0):
        WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*TaxablePerPerm/(TaxablePerPerm+RothPerPerm))  
        WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)
    else:
        WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*0.5)  
        WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)        

WdTrad = min(WdTrad,TradBal)
WdTaxable = min(WdTaxable,TaxableBal)
WdRoth = min(WdRoth,RothBal)

PreTaxWDReqAftrR1 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth

print(WdTrad,WdTaxable,WdRoth)

if (PreTaxWDReqAftrR1>0.1):
    if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
        WdTrad = TradBal
        WdTaxable = TaxableBal
        WdRoth = RothBal

    else:
        if ((TradBal-WdTrad)<.1):
            TradPer = 0
            if ((WdTaxable+WdRoth)==0):
                TaxablePer = 0.5
                RothPer = 0.5
            else:
                TaxablePer = WdTaxable/(WdTaxable+WdRoth)
                RothPer = 1 - TaxablePer
        elif ((TaxableBal-WdTaxable)<.1):
            TaxablePer = 0
            if ((TradPer+RothPer)==0):
                TradPer = 0.5
                RothPer = 0.5
            else:
                TradPer = TradPer/(TradPer+RothPer)
                RothPer = 1 - TradPer

            if (WdTrad>(PreTaxWD-WdTaxable)*TradPer):
                TradPer = 0
                RothPer = 1
        elif ((RothBal-WdRoth)<.1):
            RothPer = 0
            if ((TradPer+TaxablePer)==0):
                TaxablePer = 0.5
                TradPer = 0.5
            else:
                TradPer = TradPer/(TradPer+TaxablePer)
                TaxablePer = 1 - TradPer

print(WdTrad,TradPer,(PreTaxWD-WdRoth)*TradPer)

            if (WdTrad>(PreTaxWD-WdRoth)*TradPer):
                TradPer = 0
                TaxablePer = 1

        PreTaxWDReqAftrR1 = PreTaxWDReqAftrR1/(1-WHPer*TradPer)
        WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR1*TradPer)
        WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR1*TaxablePer)
        WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR1*RothPer)

PreTaxWDReqAftrR2 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth                   
if (PreTaxWDReqAftrR2>.1):
    if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
        WdTrad = TradBal
        WdTaxable = TaxableBal
        WdRoth = RothBal

    else:
        if ((TradBal-WdTrad)<.1 and (TaxableBal-WdTaxable)<.1):
            RothPer = 1
            TradPer = 0
            TaxablePer = 0
        elif ((TradBal-WdTrad)<.1 and (RothBal-WdRoth)<.1):
            TaxablePer = 1
            RothPer = 0
            TradPer = 0
        elif ((TaxableBal-WdTaxable)<.1 and (RothBal-WdRoth)<.1):
            TradPer = 1
            TaxablePer = 0
            RothPer = 0

PreTaxWDReqAftrR3 = PreTaxWDReqAftrR2/(1-WHPer*TradPer)
WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR3*TradPer)
WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR3*TaxablePer)
WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR3*RothPer)

WdTaxableDist = min(WdTaxable,Distributions)
WdTaxableBal = WdTaxable - WdTaxableDist
if (TaxableBal==0):
    RealizedGain = 0
else:
    RealizedGain = WdTaxableBal *(1-(TaxableCB-Distributions)/(TaxableBal-Distributions)) + PRLT * TaxableBal/(1+Ret_Total) * (Ret_Total - Ret_Interest - Ret_Dividend) 

CF = min(CFLast,0) + RealizedGain
RealizedGain = max(0,CF)

Tax = IncomeTax(FilingStatus, Age, S_Age, WageIncome, 0, RealizedGain, Dividend,
          WdTrad+Interest+UnearnedIncome+Dividend, SocialSecurity,0,0,0,0)

print(FilingStatus, Age, S_Age, WageIncome, 0, RealizedGain, Dividend,

WdTrad+Interest+UnearnedIncome+Dividend, SocialSecurity,0,0,0,0,Tax)

NetIncome = WdTaxable + WdRoth + WdTrad * (1-WHPer) - TrueUp

Shortfall = ATWD - NetIncome

print(TradBal,TaxableBal,TaxableCB,RothBal,WdTaxableDist)

TradBal = TradBal - WdTrad
TradBal_C = TradBal* Trad_CFraction
TradBal_S = TradBal * (1-Trad_CFraction)
if (TaxableBal-WdTaxable)==0:
    TaxableCB = 0
else:
    TaxableCB = TaxableCB-WdTaxableDist - (TaxableCB-WdTaxableDist)*WdTaxableBal/(TaxableBal-WdTaxableDist)
TaxableBal = TaxableBal - WdTaxable
RothBal = RothBal - WdRoth

CF = RealizedGain

return (Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,Tax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CF)

In[184]:

FilingStatus = "MFJ"

Age = 71

S_Age = 71

TradBal_C = 675000

TradBal_S = 0

TaxableBal = 75000

TaxableCB = 45000

RothBal = 0

Ret_Total = 0.0462

Ret_Interest = .0103

Ret_Dividend = .0069

PRLT = .3

Strategy = "RATIO"

TradPer = .5

TaxablePer = .3

RothPer = .2

ATWD = 62500

TotalATWDReq = 192500

WHPer = .1

SocialSecurity = 40000

WageIncome = 100000

UnearnedIncome = 0

WageWH = .1

UIncWH = .1

LastTax = 0

LastWH = 0

CFLast = 0

#

Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal,

Ret_Total, Ret_Interest, Ret_Dividend, PRLT,

Strategy, TradPer, TaxablePer, RothPer, ATWD, TotalATWDReq,WHPer,

SocialSecurity, WageIncome, UnearnedIncome, WageWH, UIncWH,

LastTax,LastWH, CFLast)

In[185]:

def Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon):

Longevity = 0
for i in range(1,Horizon+1):

    #print(i)

    WageWH = WageIncome_Vec[i-1]*WageWHPer
    UIncWH = UInc_Vec[i-1]*UIWHPer
    if i==1:
        LastTax = 0
        LastWH = 0
        CFLast = 0

    Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,LastTax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CFLast = Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal,Ret_Total_Vec[i-1], Ret_Interest_Vec[i-1], Ret_Dividend_Vec[i-1],PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD_Vec[i-1],TotalATWDReq_Vec[i-1],WHPer,SocialSecurity_Vec[i-1], WageIncome_Vec[i-1], UInc_Vec[i-1], WageWH, UIncWH,LastTax,LastWH, CFLast)
    LastWH = WageWH + UIncWH + WdTrad*WHPer

    #print(i,"-",round(WdTrad,2),round(WdTaxable,2),round(WdRoth,2),
    #      round(LastTax,2),round(TradBal_C,2),round(TradBal_S,2),
    #      round(TaxableBal,2),round(TaxableCB,2),round(RothBal,2))

    Age = Age+1
    S_Age = S_Age+1

    if (TradBal_C+TradBal_S+TaxableBal+RothBal)<0.01:
        Longevity = Longevity + 1 - Shortfall/ATWD_Vec[i-1]
        break
    else:
        Longevity = Longevity + 1

return Longevity,(TradBal_C+TradBal_S+TaxableBal+RothBal)

In[186]:

Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"

Calculated

Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))

if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################

SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)

Calculated

ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)

TradPer = 0.0 TaxablePer = 0.6 RothPer = 0.4 a = Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon)

print(10)

In[27]:

def Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon): max_l = 0 max_bal = 0 max_trad = 0 max_taxable = 0

for i in range(0, 11):
    for j in range(0, 11):
        if (i + j) / 10 <= 1:

            l, bal = Simulate(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal,
                              Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec,
                              PRLT, Strategy, WHPer, WageWHPer, UIWHPer,
                              ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec,
                              i / 10, j / 10, 1 - i / 10 - j / 10, Horizon)

            if i == 0 and j == 0:
                solutions = pd.DataFrame([[l, i / 10, j / 10]], columns=['Longevity', 'Trad', 'Taxable'])
            else:
                solutions.loc[len(solutions)] = [l, i / 10, j / 10]

            if (l > max_l or (l == max_l and bal > max_bal)):
                max_trad = i / 10
                max_taxable = j / 10
                max_l = l
                max_bal = bal

#                 print(i/10,j/10,l,bal,max_l,max_bal)

return max_trad, max_taxable, round(1 - max_trad - max_taxable, 1), solutions, max_l

Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"

Calculated

Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))

if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################

SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)

Calculated

ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)

TradPer = .6 TaxablePer = 0 RothPer = .40

a = Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon)

print(a)

In[31]:

In[32]:

In[ ]:

class standardCase: @property def filing_status(self) -> str: return self._filing_status

@filing_status.setter
def filing_status(self, filing_status):
    self._filing_status = filing_status

@property
def client_DOB(self) -> dt.datetime:
    return self._client_DOB

@client_DOB.setter
def client_DOB(self, client_DOB):
    self._client_DOB = client_DOB

@property
def spouse_DOB(self) -> dt.datetime:
    return self._spouse_DOB

@spouse_DOB.setter
def spouse_DOB(self, spouse_DOB):
    self._spouse_DOB = spouse_DOB

@property
def client_age(self) -> int:
    return self._client_age

@client_age.setter
def client_age(self, client_age):
    self._client_age = client_age

@property
def spouse_age(self) -> int:
    return self._spouse_age

@spouse_age.setter
def spouse_age(self, spouse_age):
    self._spouse_age = spouse_age

@property
def expenses(self) -> int:
    return self._expenses

@expenses.setter
def expenses(self, expenses):
    self._expenses = expenses

@property
def trad_bal(self) -> int:
    return self._trad_bal

@trad_bal.setter
def trad_bal(self, trad_bal):
    self._trad_bal = trad_bal

@property
def tax_bal(self) -> int:
    return self._tax_bal

@tax_bal.setter
def tax_bal(self, tax_bal):
    self._tax_bal = tax_bal

@property
def tax_cb(self) -> int:
    return self._tax_cb

@tax_cb.setter
def tax_cb(self, tax_cb):
    self._tax_cb = tax_cb

@property
def roth_bal(self) -> int:
    return self._roth_bal

@roth_bal.setter
def roth_bal(self, roth_bal):
    self._roth_bal = roth_bal

def __init__(self, filing_status: str = None, client_DOB: dt.datetime = None, spouse_DOB: dt.datetime = None, \
             client_age: int = None, spouse_age: int = None, expenses: int = None, \
             trad_bal: int = None, tax_bal: int = None, tax_cb: int = None, roth_bal: int = None):
    self.filing_status = filing_status
    self.client_DOB = client_DOB
    self.spouse_DOB = spouse_DOB
    self.client_age = client_age
    self.spouse_age = spouse_age
    self.expenses = expenses
    self.trad_bal = trad_bal
    self.tax_bal = tax_bal
    self.tax_cb = tax_cb
    self.roth_bal = roth_bal

def __str__(self):
    return self.__repr__()

def __repr__(self):
    return "FilingStatus: {}\n" \
           "Client DOB: {}\n" \
           "Spouse DOB: {}\n" \
           "Client Age: {}\n" \
           "Spouse Age: {}\n" \
           "Expenses: {}\n" \
           "Traditional Balance: {}\n" \
           "Taxable Balance: {}\n" \
           "Taxable Cost Basis: {}\n" \
           "Roth Balance: {}".format(self.filing_status, self.client_DOB, self.spouse_DOB, self.client_age,
                                     self.spouse_age, \
                                     self.expenses, self.trad_bal, self.tax_bal, self.tax_cb, self.roth_bal)

In[ ]:

Read in all cases

standard_cases_data = pd.read_csv("Standard_Cases.csv").set_index(["Index"]) stochastic_returns = pd.read_csv("250 Returns 50% TAM.csv").set_index(["SIM\YEAR"])

Static_Returns = {'20%': [0.0303, 0.0146, 0.0028], '30%': [0.0363, 0.0136, 0.0042], '40%': [0.0412, 0.012, 0.0055], '50%': [0.0462, 0.0103, 0.0069], '60%': [0.0504, 0.0086, 0.0083], '70%': [0.0531, 0.0063, 0.0097], '85%': [0.0596, 0.0035, 0.0118]}

number_of_cases = 1600 case_list = []

for index in range(number_of_cases): filing_status = standard_cases_data.iat[index, 0] client_DOB = standard_cases_data.iat[index, 1] spouse_DOB = standard_cases_data.iat[index, 2] client_age = standard_cases_data.iat[index, 3] spouse_age = standard_cases_data.iat[index, 4] expenses = standard_cases_data.iat[index, 20] trad_bal = standard_cases_data.iat[index, 21] tax_bal = standard_cases_data.iat[index, 22] tax_cb = standard_cases_data.iat[index, 23] roth_bal = standard_cases_data.iat[index, 24]

new_case = standardCase(filing_status, client_DOB, spouse_DOB, client_age, spouse_age, expenses, trad_bal, tax_bal,
                        tax_cb, roth_bal)
case_list.append(new_case)

########Set Constants################### CurrentDate = dt.datetime(2020, 1, 1) LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "ORDER"

start_time = time.time()

index = 0 for case in case_list: print(f"Index {str(index)} Starting!")

Calculated
Horizon = 110 - (case.client_age if case.filing_status != 'MFJ' else min(case.client_age, case.spouse_age))

if Strategy == "ORDER":
    TaxablePer = .9999
    TradPer = 1 - TaxablePer
    RothPer = 0
############################

SocialSecurity_Vec = np.repeat(0, Horizon)
TotalATWDReq_Vec = np.repeat(case.expenses, Horizon)
WageIncome_Vec = np.repeat(0, Horizon)
UInc_Vec = np.repeat(0, Horizon)

######Calculated##########################
ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec * (1 - WageWHPer) - UInc_Vec * (1 - UIWHPer) - SocialSecurity_Vec
Ret_Total_Vec = np.repeat(Static_Returns[LTAA][0], Horizon)
Ret_Interest_Vec = np.repeat(Static_Returns[LTAA][1], Horizon)
Ret_Dividend_Vec = np.repeat(Static_Returns[LTAA][2], Horizon)

##### conventional wisdom, static returns######
# longevity_obj = Simulate(
#     FilingStatus=case.filing_status,
#     Age=case.client_age,
#     S_Age=case.spouse_age,
#     TradBal_C=case.trad_bal,
#     TradBal_S=0,
#     TaxableBal=case.tax_bal,
#     TaxableCB=case.tax_cb,
#     RothBal=case.roth_bal,
#     Ret_Total_Vec=Ret_Total_Vec,
#     Ret_Interest_Vec=Ret_Interest_Vec,
#     Ret_Dividend_Vec=Ret_Dividend_Vec,
#     PRLT=PRLT,
#     Strategy=Strategy,
#     WHPer=WHPer,
#     WageWHPer=WageWHPer,
#     UIWHPer=UIWHPer,
#     ATWD_Vec=ATWD_Vec,
#     SocialSecurity_Vec=SocialSecurity_Vec,
#     TotalATWDReq_Vec=TotalATWDReq_Vec,
#     WageIncome_Vec=TotalATWDReq_Vec,
#     UInc_Vec=UInc_Vec,
#     TradPer=.9999,
#     TaxablePer=0.0001,
#     RothPer=0,
#     Horizon=Horizon)

# standard_cases_data.iat[index, 25] = longevity_obj[0]

##### conventional wisdom, 90% CL#########
results_by_sim = []
for sim in range(1, 250):
    longevity_obj = Simulate(
        FilingStatus=case.filing_status,
        Age=case.client_age,
        S_Age=case.spouse_age,
        TradBal_C=case.trad_bal,
        TradBal_S=0,
        TaxableBal=case.tax_bal,
        TaxableCB=case.tax_cb,
        RothBal=case.roth_bal,
        Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
        Ret_Interest_Vec=Ret_Interest_Vec,
        Ret_Dividend_Vec=Ret_Dividend_Vec,
        PRLT=PRLT,
        Strategy=Strategy,
        WHPer=WHPer,
        WageWHPer=WageWHPer,
        UIWHPer=UIWHPer,
        ATWD_Vec=ATWD_Vec,
        SocialSecurity_Vec=SocialSecurity_Vec,
        TotalATWDReq_Vec=TotalATWDReq_Vec,
        WageIncome_Vec=TotalATWDReq_Vec,
        UInc_Vec=UInc_Vec,
        TradPer=.9999,
        TaxablePer=0.0001,
        RothPer=0,
        Horizon=Horizon)
    results_by_sim.append(longevity_obj[0])

ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
print(ninety_percent_confidence_longevity)
standard_cases_data.iat[index, 26] = ninety_percent_confidence_longevity

##### conventional wisdom, 50% CL#########

fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
print(fifty_percent_confidence_longevity)
standard_cases_data.iat[index, 25] = fifty_percent_confidence_longevity

##### ratio withdrawals, static returns####

winning_ratio_obj = Grid10Optim(
    FilingStatus=case.filing_status,
    Age=case.client_age,
    S_Age=case.spouse_age,
    TradBal_C=case.trad_bal,
    TradBal_S=0, TaxableBal=case.tax_bal,
    TaxableCB=case.tax_cb,
    RothBal=case.roth_bal,
    Ret_Total_Vec=Ret_Total_Vec,
    Ret_Interest_Vec=Ret_Interest_Vec,
    Ret_Dividend_Vec=Ret_Dividend_Vec,
    PRLT=PRLT,
    Strategy="RATIO",
    WHPer=WHPer,
    WageWHPer=WageWHPer,
    UIWHPer=UIWHPer,
    ATWD_Vec=ATWD_Vec,
    SocialSecurity_Vec=SocialSecurity_Vec,
    TotalATWDReq_Vec=TotalATWDReq_Vec,
    WageIncome_Vec=TotalATWDReq_Vec,
    UInc_Vec=UInc_Vec,
    Horizon=Horizon)

longevity_obj = Simulate(
    FilingStatus=case.filing_status,
    Age=case.client_age,
    S_Age=case.spouse_age,
    TradBal_C=case.trad_bal,
    TradBal_S=0,
    TaxableBal=case.tax_bal,
    TaxableCB=case.tax_cb,
    RothBal=case.roth_bal,
    Ret_Total_Vec=Ret_Total_Vec,
    Ret_Interest_Vec=Ret_Interest_Vec,
    Ret_Dividend_Vec=Ret_Dividend_Vec,
    PRLT=PRLT,
    Strategy="RATIO",
    WHPer=WHPer,
    WageWHPer=WageWHPer,
    UIWHPer=UIWHPer,
    ATWD_Vec=ATWD_Vec,
    SocialSecurity_Vec=SocialSecurity_Vec,
    TotalATWDReq_Vec=TotalATWDReq_Vec,
    WageIncome_Vec=TotalATWDReq_Vec,
    UInc_Vec=UInc_Vec,
    TradPer=winning_ratio_obj[0],
    TaxablePer=winning_ratio_obj[1],
    RothPer=winning_ratio_obj[2],
    Horizon=Horizon)

# standard_cases_data.iat[index, 27] = longevity_obj[0]

#### ratio withdrawals, 90% cl####

results_by_sim = []
for sim in range(1, 250):
    longevity_obj = Simulate(
        FilingStatus=case.filing_status,
        Age=case.client_age,
        S_Age=case.spouse_age,
        TradBal_C=case.trad_bal,
        TradBal_S=0,
        TaxableBal=case.tax_bal,
        TaxableCB=case.tax_cb,
        RothBal=case.roth_bal,
        Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
        Ret_Interest_Vec=Ret_Interest_Vec,
        Ret_Dividend_Vec=Ret_Dividend_Vec,
        PRLT=PRLT,
        Strategy=Strategy,
        WHPer=WHPer,
        WageWHPer=WageWHPer,
        UIWHPer=UIWHPer,
        ATWD_Vec=ATWD_Vec,
        SocialSecurity_Vec=SocialSecurity_Vec,
        TotalATWDReq_Vec=TotalATWDReq_Vec,
        WageIncome_Vec=TotalATWDReq_Vec,
        UInc_Vec=UInc_Vec,
        TradPer=winning_ratio_obj[0],
        TaxablePer=winning_ratio_obj[1],
        RothPer=winning_ratio_obj[2],
        Horizon=Horizon)
    results_by_sim.append(longevity_obj[0])
ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
standard_cases_data.iat[index, 28] = ninety_percent_confidence_longevity

##### ratio withdrawals, 50% CL#########

fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
standard_cases_data.iat[index, 27] = fifty_percent_confidence_longevity

end_time = time.time()
print(f"{index} finished in {end_time - start_time:0.4f} seconds.")
index = index + 1

if index % 100 == 0:
    df = pd.DataFrame(standard_cases_data)
    df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")

df = pd.DataFrame(standard_cases_data) df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")

%%