dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[3]:

import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan")

In[4]:

HRS_data = pd.io.stata.read_stata('randhrs1992_2018v2.dta')

In[5]:

columns = pd.DataFrame(HRS_data.columns.to_list(),columns = ['Variables'])

columns.to_csv('HRS Variables.csv',index = False)

In[6]:

HRS_keep = HRS_data[['hhidpn','r1mstat','r1mpart','r2mstat','r2mpart','r3mstat','r3mpart','r4mstat','r4mpart', 'r5mstat','r5mpart','r6mstat','r6mpart','r7mstat','r7mpart','r8mstat','r8mpart','r9mstat', 'r9mpart','r10mstat','r10mpart','r11mstat','r11mpart','r12mstat','r12mpart','r13mstat', 'r13mpart','r14mstat','r14mpart','r1mrct','r2mrct','r3mrct','r4mrct','r5mrct','r6mrct', 'r7mrct','r8mrct','r9mrct','r10mrct','r11mrct','r12mrct','r13mrct','r14mrct','r1mcurln' ,'r2mcurln','r3mcurln','r4mcurln','r5mcurln','r6mcurln','r7mcurln','r8mcurln','r9mcurln', 'r10mcurln','r11mcurln','r12mcurln','r13mcurln','r14mcurln','r1mlenm','r2mlenm','r3mlenm', 'r4mlenm','r5mlenm','r6mlenm','r7mlenm','r8mlenm','r9mlenm','r10mlenm','r11mlenm','r12mlenm', 'r13mlenm','r14mlenm','r1mdiv','r2mdiv','r3mdiv','r4mdiv','r5mdiv','r6mdiv','r7mdiv','r8mdiv', 'r9mdiv','r10mdiv','r11mdiv','r12mdiv','r13mdiv','r14mdiv','r1cenreg','r2cenreg','r3cenreg', 'r4cenreg','r5cenreg','r6cenreg','r7cenreg','r8cenreg','r9cenreg','r10cenreg','r11cenreg', 'r12cenreg','r13cenreg','r14cenreg','r1cendiv','r2cendiv','r3cendiv','r4cendiv','r5cendiv', 'r6cendiv','r7cendiv','r8cendiv','r9cendiv','r10cendiv','r11cendiv','r12cendiv','r13cendiv', 'r14cendiv','h1hhresp','h4hhresp','h5hhresp','h6hhresp','h7hhresp','h8hhresp','h9hhresp', 'h10hhresp','h11hhresp','h12hhresp','h13hhresp','h14hhresp','h1cpl','h4cpl','h5cpl','h6cpl', 'h7cpl','h8cpl','h9cpl','h10cpl','h11cpl','h12cpl','h13cpl','h14cpl','ragender','raracem', 'rabmonth','rabyear','rabdate','r1iwbeg','r2iwbeg','r3iwbeg','r4iwbeg','r5iwbeg','r6iwbeg', 'r7iwbeg','r8iwbeg','r9iwbeg','r10iwbeg','r11iwbeg','r12iwbeg','r13iwbeg','r14iwbeg','r1iwend', 'r2iwend','r3iwend','r4iwend','r5iwend','r6iwend','r7iwend','r8iwend','r9iwend','r10iwend', 'r11iwend','r12iwend','r13iwend','r14iwend','r1iwbegf','r2iwbegf','r1iwendf','r2iwendf', 'r3iwendf','r1iwmid','r2iwmid','r3iwmid','r4iwmid','r5iwmid','r6iwmid','r7iwmid', 'r8iwmid','r9iwmid','r10iwmid','r11iwmid','r12iwmid','r13iwmid','r14iwmid','r1iwmidf', 'r2iwmidf','r3iwmidf','r4iwmidf','r5iwmidf','r6iwmidf','r7iwmidf','r8iwmidf','r9iwmidf', 'r10iwmidf','r11iwmidf','r12iwmidf','r13iwmidf','r14iwmidf','r1iwstat','r2iwstat','r3iwstat', 'r4iwstat','r5iwstat','r6iwstat','r7iwstat','r8iwstat','r9iwstat','r10iwstat','r11iwstat', 'r12iwstat','r13iwstat','r14iwstat','radmonth','radyear','raddate','radage_m','radage_y', 'raedyrs','raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','r2pexit', 'r3pexit','r4pexit','r5pexit','r6pexit','r7pexit','r8pexit','r9pexit','r10pexit','r11pexit', 'r12pexit','r13pexit','r14pexit','r1agem_b','r2agem_b','r3agem_b','r4agem_b','r5agem_b', 'r6agem_b','r7agem_b','r8agem_b','r9agem_b','r10agem_b','r11agem_b','r12agem_b','r13agem_b', 'r14agem_b','r1agem_e','r2agem_e','r3agem_e','r4agem_e','r5agem_e','r6agem_e','r7agem_e', 'r8agem_e','r9agem_e','r10agem_e','r11agem_e','r12agem_e','r13agem_e','r14agem_e','r1agem_m', 'r2agem_m','r3agem_m','r4agem_m','r5agem_m','r6agem_m','r7agem_m','r8agem_m','r9agem_m', 'r10agem_m','r11agem_m','r12agem_m','r13agem_m','r14agem_m','r1agey_b','r2agey_b','r3agey_b', 'r4agey_b','r5agey_b','r6agey_b','r7agey_b','r8agey_b','r9agey_b','r10agey_b','r11agey_b', 'r12agey_b','r13agey_b','r14agey_b','r1agey_e','r2agey_e','r3agey_e','r4agey_e','r5agey_e', 'r6agey_e','r7agey_e','r8agey_e','r9agey_e','r10agey_e','r11agey_e','r12agey_e','r13agey_e', 'r14agey_e','r1agey_m','r2agey_m','r3agey_m','r4agey_m','r5agey_m','r6agey_m','r7agey_m', 'r8agey_m','r9agey_m','r10agey_m','r11agey_m','r12agey_m','r13agey_m','r14agey_m','rawtsamp', 'r1wthh','r2wthh','r3wthh','r4wthh','r5wthh','r6wthh','r7wthh','r8wthh','r9wthh','r10wthh', 'r11wthh','r12wthh','r13wthh','r14wthh','r1wtresp','r2wtresp','r3wtresp','r4wtresp', 'r5wtresp','r6wtresp','r7wtresp','r8wtresp','r9wtresp','r10wtresp','r11wtresp','r12wtresp', 'r13wtresp','r14wtresp','h2cpl','h2hhresp','h3cpl','h3hhresp','racohbyr','r1iwendm', 'r2iwendm','r3iwendm','r4iwendm','r5iwendm','r6iwendm','r7iwendm','r8iwendm','r9iwendm', 'r10iwendm','r11iwendm','r12iwendm','r13iwendm','r14iwendm','r1iwendy','r2iwendy','r3iwendy', 'r4iwendy','r5iwendy','r6iwendy','r7iwendy','r8iwendy','r9iwendy','r10iwendy','r11iwendy', 'r12iwendy','r13iwendy','r14iwendy','recendiv','recenreg','remstat','rempart','reiwbeg', 'reiwmid','reiwend','reiwmidf','respagem_b','respagem_m','respagem_e','respagey_b', 'respagey_m','respagey_e','r1shlt','r2shlt','r3shlt','r4shlt','r5shlt','r6shlt', 'r7shlt','r8shlt','r9shlt','r10shlt','r11shlt','r12shlt','r13shlt','r14shlt','r1hltc5', 'r2hltc5','r3hltc5','r4hltc5','r5hltc5','r6hltc5','r3hltc5f','r4hltc5f','r5hltc5f', 'r6hltc5f','r1hltc3','r2hltc3','r3hltc3','r4hltc3','r5hltc3','r6hltc3','r7hltc3', 'r8hltc3','r9hltc3','r10hltc3','r11hltc3','r12hltc3','r13hltc3','r14hltc3','r7vgactx', 'r8vgactx','r9vgactx','r10vgactx','r11vgactx','r12vgactx','r13vgactx','r14vgactx', 'r7mdactx','r8mdactx','r9mdactx','r10mdactx','r11mdactx','r12mdactx','r13mdactx', 'r14mdactx','r7ltactx','r8ltactx','r9ltactx','r10ltactx','r11ltactx','r12ltactx', 'r13ltactx','r14ltactx','r1bmi','r2bmi','r3bmi','r4bmi','r5bmi','r6bmi','r7bmi', 'r8bmi','r9bmi','r10bmi','r11bmi','r12bmi','r13bmi','r14bmi','r1height','r2height', 'r3height','r4height','r5height','r6height','r7height','r8height','r9height','r10height', 'r11height','r12height','r13height','r14height','r1weight','r2weight','r3weight', 'r4weight','r5weight','r6weight','r7weight','r8weight','r9weight','r10weight','r11weight', 'r12weight','r13weight','r14weight','r1back','r2back','r3back','r4back','r5back','r6back', 'r7back','r8back','r9back','r10back','r11back','r12back','r13back','r14back','r1smokev', 'r2smokev','r3smokev','r4smokev','r5smokev','r6smokev','r7smokev','r8smokev','r9smokev', 'r10smokev','r11smokev','r12smokev','r13smokev','r14smokev','r1smoken','r2smoken','r3smoken', 'r4smoken','r5smoken','r6smoken','r7smoken','r8smoken','r9smoken','r10smoken','r11smoken', 'r12smoken','r13smoken','r14smoken','r1drink','r2drink','r3drink','r4drink','r5drink', 'r6drink','r7drink','r8drink','r9drink','r10drink','r11drink','r12drink','r13drink', 'r14drink','r1drinkr','r2drinkr','r3drinkd','r4drinkd','r5drinkd','r6drinkd','r7drinkd', 'r8drinkd','r9drinkd','r10drinkd','r11drinkd','r12drinkd','r13drinkd','r14drinkd', 'r3drinkn','r4drinkn','r5drinkn','r6drinkn','r7drinkn','r8drinkn','r9drinkn','r10drinkn', 'r11drinkn','r12drinkn','r13drinkn','r14drinkn','r1hibp','r2hibp','r3hibp','r4hibp','r5hibp', 'r6hibp','r7hibp','r8hibp','r9hibp','r10hibp','r11hibp','r12hibp','r13hibp','r14hibp', 'r1hibpe','r2hibpe','r3hibpe','r4hibpe','r5hibpe','r6hibpe','r7hibpe','r8hibpe','r9hibpe', 'r10hibpe','r11hibpe','r12hibpe','r13hibpe','r14hibpe','r1diab','r2diab','r3diab','r4diab', 'r5diab','r6diab','r7diab','r8diab','r9diab','r10diab','r11diab','r12diab','r13diab', 'r14diab','r1diabe','r2diabe','r3diabe','r4diabe','r5diabe','r6diabe','r7diabe','r8diabe', 'r9diabe','r10diabe','r11diabe','r12diabe','r13diabe','r14diabe','r1cancr','r2cancr', 'r3cancr','r4cancr','r5cancr','r6cancr','r7cancr','r8cancr','r9cancr','r10cancr','r11cancr', 'r12cancr','r13cancr','r14cancr','r1cancre','r2cancre','r3cancre','r4cancre','r5cancre', 'r6cancre','r7cancre','r8cancre','r9cancre','r10cancre','r11cancre','r12cancre','r13cancre', 'r14cancre','r1lung','r2lung','r3lung','r4lung','r5lung','r6lung','r7lung','r8lung', 'r9lung','r10lung','r11lung','r12lung','r13lung','r14lung','r1lunge','r2lunge','r3lunge', 'r4lunge','r5lunge','r6lunge','r7lunge','r8lunge','r9lunge','r10lunge','r11lunge','r12lunge', 'r13lunge','r14lunge','r1heart','r2heart','r3heart','r4heart','r5heart','r6heart','r7heart', 'r8heart','r9heart','r10heart','r11heart','r12heart','r13heart','r14heart','r1hearte', 'r2hearte','r3hearte','r4hearte','r5hearte','r6hearte','r7hearte','r8hearte','r9hearte', 'r10hearte','r11hearte','r12hearte','r13hearte','r14hearte','r1strok','r2strok','r3strok', 'r4strok','r5strok','r6strok','r7strok','r8strok','r9strok','r10strok','r11strok','r12strok', 'r13strok','r14strok','r1stroke','r2stroke','r3stroke','r4stroke','r5stroke','r6stroke', 'r7stroke','r8stroke','r9stroke','r10stroke','r11stroke','r12stroke','r13stroke','r14stroke', 'r1psych','r2psych','r3psych','r4psych','r5psych','r6psych','r7psych','r8psych','r9psych', 'r10psych','r11psych','r12psych','r13psych','r14psych','r1psyche','r2psyche','r3psyche', 'r4psyche','r5psyche','r6psyche','r7psyche','r8psyche','r9psyche','r10psyche','r11psyche', 'r12psyche','r13psyche','r14psyche','r1arthr','r2arthr','r3arthr','r4arthr','r5arthr', 'r6arthr','r7arthr','r8arthr','r9arthr','r10arthr','r11arthr','r12arthr','r13arthr', 'r14arthr','r1arthre','r2arthre','r3arthre','r4arthre','r5arthre','r6arthre','r7arthre', 'r8arthre','r9arthre','r10arthre','r11arthre','r12arthre','r13arthre','r14arthre', 'r4memry','r5memry','r6memry','r7memry','r8memry','r9memry','r4memrye','r5memrye', 'r6memrye','r7memrye','r8memrye','r9memrye','r10alzhe','r11alzhe','r12alzhe','r13alzhe', 'r14alzhe','r10alzhee','r11alzhee','r12alzhee','r13alzhee','r14alzhee','r10demen','r11demen', 'r12demen','r13demen','r14demen','r10demene','r11demene','r12demene','r13demene','r14demene', 'r1conde','r2conde','r3conde','r4conde','r5conde','r6conde','r7conde','r8conde','r9conde', 'r10conde','r11conde','r12conde','r13conde','r14conde','r8bpsys','r9bpsys','r10bpsys', 'r11bpsys','r12bpsys','r13bpsys','r14bpsys','r8bpdia','r9bpdia','r10bpdia','r11bpdia', 'r12bpdia','r13bpdia','r14bpdia','r8bppuls','r9bppuls','r10bppuls','r11bppuls','r12bppuls', 'r13bppuls','r14bppuls','h1atotn','h1atotb','h2atotn','h2atotb','h3atotn','h3atotb', 'h4atotn','h4atotb','h5atotn','h5atotb','h6atotn','h6atotb','h7atotn','h7atotb','h8atotn', 'h8atotb','h9atotn','h9atotb','h10atotn','h10atotb','h11atotn','h11atotb','h12atotn', 'h12atotb','h13atotn','h13atotb','h14atotn','h14atotb','r1iearn','h1icap','r2iearn','h2icap', 'r3iearn','h3icap','r4iearn','h4icap','r5iearn','h5icap','r6iearn','h6icap','r7iearn', 'h7icap','r8iearn','h8icap','r9iearn','h9icap','r10iearn','h10icap','r11iearn','h11icap', 'r12iearn','h12icap','r13iearn','h13icap','r14iearn','h14icap','r1sayret','r2sayret', 'r3sayret','r4sayret','r5sayret','r6sayret','r7sayret','r8sayret','r9sayret','r10sayret', 'r11sayret','r12sayret','r13sayret','r14sayret','r1retmon','r2retmon','r3retmon','r4retmon', 'r5retmon','r6retmon','r7retmon','r8retmon','r9retmon','r10retmon','r11retmon','r12retmon', 'r13retmon','r14retmon','r1retyr','r2retyr','r3retyr','r4retyr','r5retyr','r6retyr', 'r7retyr','r8retyr','r9retyr','r10retyr','r11retyr','r12retyr','r13retyr','r14retyr', 'r1rplnyr','r2rplnyr','r3rplnyr','r4rplnyr','r5rplnyr','r6rplnyr','r7rplnyr','r8rplnyr', 'r9rplnyr','r10rplnyr','r11rplnyr','r12rplnyr','r13rplnyr','r14rplnyr','r1jjobs', 'r2jjobs','r3jjobs','r4jjobs','r5jjobs','r6jjobs','r7jjobs','r8jjobs','r9jjobs', 'r10jjobs','r11jjobs','r12jjobs','r13jjobs','r14jjobs','r1jyears','r2jyears', 'r3jyears','r4jyears','r5jyears','r6jyears','r7jyears','r8jyears','r9jyears', 'r10jyears','r11jyears','r12jyears','r13jyears','r14jyears','h1hhres','h1child', 'r1dadliv','r1momliv','r1dadage','r1momage','h2hhres','h2child','r2dadliv','r2momliv', 'r2dadage','r2momage','h3hhres','h3child','r3dadliv','r3momliv','r3dadage','r3momage', 'h4hhres','h4child','r4dadliv','r4momliv','r4dadage','r4momage','h5hhres','h5child', 'r5dadliv','r5momliv','r5dadage','r5momage','h6hhres','h6child','r6dadliv','r6momliv', 'r6dadage','r6momage','h7hhres','h7child','r7dadliv','r7momliv','r7dadage','r7momage', 'h8hhres','h8child','r8dadliv','r8momliv','r8dadage','r8momage','h9hhres','h9child', 'r9dadliv','r9momliv','r9dadage','r9momage','h10hhres','h10child','r10dadliv','r10momliv', 'r10dadage','r10momage','h11hhres','h11child','r11dadliv','r11momliv','r11dadage', 'r11momage','h12hhres','h12child','r12dadliv','r12momliv','r12dadage','r12momage', 'h13hhres','h13child','r13dadliv','r13momliv','r13dadage','r13momage','h14hhres', 'h14child','r14dadliv','r14momliv','r14dadage','r14momage']].copy()

HRS_keep.to_csv('HRS Columns Filtered.csv')

In[7]:

########Death dataframe###################### temp_df = HRS_keep.copy()

HRS_Death = pd.DataFrame()

HRS_Death['Death_Year'] = HRS_keep['radyear']

temp_df = temp_df.loc[temp_df['radyear']>0]

In[8]:

temp_df.to_csv('HRS Death.csv')

1.Death Age and Death year

3. Find wave number exit interview

4. All demographic variables for previous wave and ever haves - death wave values not reported - fill death wave values for ever haves by last wave data

5. Sample weights

- Run regression analysis

- Calculate sum(sample wts) and count - for each year, each condition

Death data

Death_df = pd.DataFrame() Death_df[['Household_Id','Death_Year','Death_Age','Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion', 'Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt', 'Cohort_Of_Respondent','Division','Region', 'Marital_Status','Partnership_Status',]] = temp_df[['hhidpn','radyear','radage_y','ragender','raracem','raedyrs', 'raedegrm','raeduc','rarelig','ravetrn', 'rameduc','rafeduc','rabplace','rawtsamp', 'racohbyr','recendiv','recenreg','remstat','rempart',]]

In[9]:

def find_death_wave(df): temp = df.copy() temp['Death_Wave'] = 0 for i in range(df.shape[0]):

print(f'row:{i+1}')

    for j in range(1,15):

print(f'wave:{j}')

        if df[f'r{j}iwstat'].iloc[i]=='5.nr,died this wv':
            temp['Death_Wave'].iloc[i] = j
            break
return temp['Death_Wave']

Death_df['Death_Wave'] = find_death_wave(temp_df)

In[10]:

temp_df['r1drinkn'] = temp_df['r1drinkr'] temp_df['r2drinkn'] = temp_df['r2drinkr']

In[11]:

Death_df['Times_Married'] = None Death_df['Length_Current_Marriage'] = None Death_df['Length_Longest_Marriage'] = None Death_df['Times_Divorced'] = None Death_df['Household_Size'] = None Death_df['Whether_Couple_HH'] = None Death_df['HH_Wt'] = None Death_df['Person_Level_Wt'] = None Death_df['Self_Report_Health'] = None Death_df['Self_Report_Health_Change_3_Category'] = None Death_df['Frequent_Vigorous_Physical_Activity'] = None Death_df['Frequent_Moderate_Physical_Activity'] = None Death_df['Frequent_Light_Physical_Activity'] = None Death_df['BMI'] = None Death_df['Height'] = None Death_df['Weight'] = None Death_df['Back_Problem'] = None Death_df['Smoke'] = None Death_df['Smoke_Now'] = None Death_df['Drink'] = None Death_df['Drink_Days_Per_Week'] = None Death_df['Drink_Per_Day'] = None Death_df['Blood_Pressure'] = None Death_df['Diabetes'] = None Death_df['Cancer'] = None Death_df['Lung_Disease'] = None Death_df['Heart_Problem'] = None Death_df['Stroke'] = None Death_df['Pyschological_Problem'] = None Death_df['Arthritis'] = None Death_df['Alzheimers'] = None Death_df['Dementia'] = None Death_df['Sum_Conditions'] = None Death_df['Systolic_BP'] = None Death_df['Diastolic_BP'] = None Death_df['Pulse'] = None Death_df['Non_Housing_Assets'] = None Death_df['Total_Assets'] = None Death_df['Income'] = None Death_df['Capital_Income_HH'] = None Death_df['Reports_Retired'] = None Death_df['Retirement_Year'] = None Death_df['Planned_Retirement_Year'] = None Death_df['Job_History'] = None Death_df['Job_Years'] = None Death_df['Household_Size'] = None Death_df['Number_Living_Children'] = None Death_df['Father_Alive'] = None Death_df['Mother_Alive'] = None Death_df['Father_Age'] = None Death_df['Mother_Age'] = None

for i in range(Death_df.shape[0]): d_wave = Death_df['Death_Wave'].iloc[i] if d_wave==1: continue d_wave = Death_df['Death_Wave'].iloc[i] Death_df['Times_Married'].iloc[i] = temp_df[f'r{d_wave-1}mrct'].iloc[i] Death_df['Length_Current_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mcurln'].iloc[i] Death_df['Length_Longest_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mlenm'].iloc[i] Death_df['Times_Divorced'].iloc[i] = temp_df[f'r{d_wave-1}mdiv'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhresp'].iloc[i] Death_df['Whether_Couple_HH'].iloc[i] = temp_df[f'h{d_wave-1}cpl'].iloc[i] Death_df['HH_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wthh'].iloc[i] Death_df['Person_Level_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wtresp'].iloc[i] Death_df['Self_Report_Health'].iloc[i] = temp_df[f'r{d_wave-1}shlt'].iloc[i] Death_df['Self_Report_Health_Change_3_Category'].iloc[i] = temp_df[f'r{d_wave-1}hltc3'].iloc[i] Death_df['BMI'].iloc[i] = temp_df[f'r{d_wave-1}bmi'].iloc[i] Death_df['Height'].iloc[i] = temp_df[f'r{d_wave-1}height'].iloc[i] Death_df['Weight'].iloc[i] = temp_df[f'r{d_wave-1}weight'].iloc[i] Death_df['Back_Problem'].iloc[i] = temp_df[f'r{d_wave-1}back'].iloc[i] Death_df['Smoke'].iloc[i] = temp_df[f'r{d_wave-1}smokev'].iloc[i] Death_df['Smoke_Now'].iloc[i] = temp_df[f'r{d_wave-1}smoken'].iloc[i] Death_df['Drink'].iloc[i] = temp_df[f'r{d_wave-1}drink'].iloc[i] Death_df['Drink_Per_Day'].iloc[i] = temp_df[f'r{d_wave-1}drinkn'].iloc[i] Death_df['Blood_Pressure'].iloc[i] = temp_df[f'r{d_wave-1}hibpe'].iloc[i] Death_df['Diabetes'].iloc[i] = temp_df[f'r{d_wave-1}diabe'].iloc[i] Death_df['Cancer'].iloc[i] = temp_df[f'r{d_wave-1}cancre'].iloc[i] Death_df['Lung_Disease'].iloc[i] = temp_df[f'r{d_wave-1}lunge'].iloc[i] Death_df['Heart_Problem'].iloc[i] = temp_df[f'r{d_wave-1}hearte'].iloc[i] Death_df['Stroke'].iloc[i] = temp_df[f'r{d_wave-1}stroke'].iloc[i] Death_df['Pyschological_Problem'].iloc[i] = temp_df[f'r{d_wave-1}psyche'].iloc[i] Death_df['Arthritis'].iloc[i] = temp_df[f'r{d_wave-1}arthre'].iloc[i] Death_df['Sum_Conditions'].iloc[i] = temp_df[f'r{d_wave-1}conde'].iloc[i] Death_df['Non_Housing_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotn'].iloc[i] Death_df['Total_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotb'].iloc[i] Death_df['Income'].iloc[i] = temp_df[f'r{d_wave-1}iearn'].iloc[i] Death_df['Capital_Income_HH'].iloc[i] = temp_df[f'h{d_wave-1}icap'].iloc[i] Death_df['Reports_Retired'].iloc[i] = temp_df[f'r{d_wave-1}sayret'].iloc[i] Death_df['Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}retyr'].iloc[i] Death_df['Planned_Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}rplnyr'].iloc[i] Death_df['Job_History'].iloc[i] = temp_df[f'r{d_wave-1}jjobs'].iloc[i] Death_df['Job_Years'].iloc[i] = temp_df[f'r{d_wave-1}jyears'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhres'].iloc[i] Death_df['Number_Living_Children'].iloc[i] = temp_df[f'h{d_wave-1}child'].iloc[i] Death_df['Father_Alive'].iloc[i] = temp_df[f'r{d_wave-1}dadliv'].iloc[i] Death_df['Mother_Alive'].iloc[i] = temp_df[f'r{d_wave-1}momliv'].iloc[i] Death_df['Father_Age'].iloc[i] = temp_df[f'r{d_wave-1}dadage'].iloc[i] Death_df['Mother_Age'].iloc[i] = temp_df[f'r{d_wave-1}momage'].iloc[i]

if d_wave<=3:
    continue
Death_df['Drink_Days_Per_Week'].iloc[i] = temp_df[f'r{d_wave-1}drinkd'].iloc[i]

if d_wave<=7:
    continue
Death_df['Frequent_Vigorous_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}vgactx'].iloc[i]
Death_df['Frequent_Moderate_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}mdactx'].iloc[i]
Death_df['Frequent_Light_Physical_Activity'].iloc[i] = temp_df[f'r{d_wave-1}ltactx'].iloc[i]

if d_wave<=8:
    continue
Death_df['Systolic_BP'].iloc[i] = temp_df[f'r{d_wave-1}bpsys'].iloc[i]
Death_df['Diastolic_BP'].iloc[i] = temp_df[f'r{d_wave-1}bpdia'].iloc[i]
Death_df['Pulse'].iloc[i] = temp_df[f'r{d_wave-1}bppuls'].iloc[i]

if d_wave<=10:
    continue
Death_df['Alzheimers'].iloc[i] = temp_df[f'r{d_wave-1}alzhee'].iloc[i]
Death_df['Dementia'].iloc[i] = temp_df[f'r{d_wave-1}demene'].iloc[i]

In[ ]:

Death_df.to_csv('HRS Death Cleaned.csv',index = False)

In[10]:

temp_df = HRS_keep.copy()

HRS_keep['radyear'].unique()

In[11]:

Incidence Data

0. For each wave - remove people who died before this wave - exit interview before this wave

1.Interview end Year

2.Ever haves - Each condition

- calculate sum(sample wts) and cout - for each year and each condtions

def find_if_death_wave(df):

temp = df.copy()

temp['Death_Wave'] = None

for i in range(df.shape[0]):

if not temp['radyear'].iloc[i]>0:

continue

for j in range(1,15):

if df[f'r{j}iwstat'].loc[i]=='5.nr,died this wv':

temp['Death_Wave'].iloc[i] = j

return temp['Death_Wave']

temp_df['Death_Wave'] = find_if_death_wave(temp_df)

In[12]:

def count_chronic(name_c,df):

count_c = []

for i in range(1,15):

if f'r{i}{name_c}' not in temp_df.columns:

count_c.append(0)

else:

count_c.append(temp_df[f'r{i}{name_c}'].value_counts()['1.yes'])

return count_c

In[13]:

Chronic_Incidence = pd.DataFrame(list(range(1,15)),columns=[''])

In[14]:

name_c = 'hibpe'

count_c = {}

sumwt_c = {}

Also include Age in this loop

for i in range(1,15):

years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()

for year in years_list:

temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]

for age in range(55,101):

temp1 = temp.loc[temp_df[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

Count

if f'{int(year)}_{age}' not in count_c.keys():

if f'r{i}{name_c}' in temp1.columns:

countc[f'{int(year)}{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']

else:

countc[f'{int(year)}{age}'] = 0

else:

if f'r{i}{name_c}' in temp1.columns:

countc[f'{int(year)}{age}'] += temp1[f'r{i}{name_c}'].value_counts()['1.yes']

else:

countc[f'{int(year)}{age}'] += 0

Weight Sum

if f'{int(year)}_{age}' not in sumwt_c.keys():

if f'r{i}{name_c}' in temp1.columns:

sumwtc[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()

else:

sumwtc[f'{int(year)}{age}'] = 0

else:

if f'r{i}{name_c}' in temp1.columns:

sumwtc[f'{int(year)}{age}'] += temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()

else:

sumwtc[f'{int(year)}{age}'] += 0

count_c_list = []

sumwt_c_list = []

for i in range(1992,2020):

for j in range(55,101):

if f'{i}_{j}' in count_c.keys():

count_c_list.append([i,j,countc[f'{i}{j}']])

if f'{i}_{j}' in sumwt_c.keys():

sumwt_c_list.append([i,j,sumwtc[f'{i}{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age','Count'])

sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age','Sum_Wt'])

sumwt_df = pd.merge(count_df, sumwt_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

In[22]:

Incidence Data

def incidence_chronic(name_c,df): temp_df = df.copy() count_c = {} sumwt_c = {}

#Also include Age in this loop
for i in range(1,15):
    years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()
    for year in years_list:
        temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]
        for age in range(55,101):
            temp1 = temp.loc[temp[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

            #Count
            if f'{int(year)}_{age}' not in count_c.keys():
                if f'r{i}{name_c}' in temp1.columns:
                    count_c[f'{int(year)}_{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']
                else:
                    count_c[f'{int(year)}_{age}'] = 0
            else:
                if f'r{i}{name_c}' in temp1.columns:
                    count_c[f'{int(year)}_{age}'] += temp1[f'r{i}{name_c}'].value_counts()['1.yes']
                else:
                    count_c[f'{int(year)}_{age}'] += 0    

            #Weight Sum
            if f'{int(year)}_{age}' not in sumwt_c.keys():
                if f'r{i}{name_c}' in temp1.columns:
                    sumwt_c[f'{int(year)}_{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
                else:
                    sumwt_c[f'{int(year)}_{age}'] = 0
            else:
                if f'r{i}{name_c}' in temp1.columns:
                    sumwt_c[f'{int(year)}_{age}'] += temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
                else:
                    sumwt_c[f'{int(year)}_{age}'] += 0 

count_c_list = []
sumwt_c_list = []
for i in range(1992,2020):
    for j in range(55,101):
        if f'{i}_{j}' in count_c.keys():
            count_c_list.append([i,j,count_c[f'{i}_{j}']])
        if f'{i}_{j}' in sumwt_c.keys():
            sumwt_c_list.append([i,j,sumwt_c[f'{i}_{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age',f'{name_c}_Count'])
sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age',f'{name_c}_Sum_Wt'])

final_df = pd.merge(count_df, sumwt_df,  how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
return final_df

In[23]:

Male

temp_df = HRS_keep.loc[HRS_keep['ragender']=='1.male'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Male = Incidence.copy()

Female

temp_df = HRS_keep.loc[HRS_keep['ragender']=='2.female'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Female = Incidence.copy()

In[25]:

Incidence.to_csv("Incidence Counts and Sum of Weights.csv",index = False)

In[26]:

Death Counts and Sum Weigts

Male

temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Male = Death_count_df.copy()

Male

temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Female = Death_count_df.copy()

Death_count_df.to_csv("Death Counts and Sum of Weights.csv",index = False)

In[28]:

with pd.ExcelWriter('Counts and Sum Weights Chronic - 8 May 2023.xlsx') as writer: Incidence_Male.to_excel(writer, sheet_name='Incidence_Male') Death_count_Male.to_excel(writer, sheet_name='Death_count_Male') Incidence_Female.to_excel(writer, sheet_name='Incidence_Female') Death_count_Female.to_excel(writer, sheet_name='Death_count_Female')

In[16]:

Total Count for Persons

Incidence Data

def total_person_count(df): temp_df = df.copy() count_c = {} sumwt_c = {}

count_c_death = {}

sumwt_c_death = {}

#Also include Age in this loop
for i in range(1,15):
    years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()
    for year in years_list:
        temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]
        for age in range(55,101):
            temp1 = temp.loc[temp[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

            #Count
            if f'{int(year)}_{age}' not in count_c.keys():
                count_c[f'{int(year)}_{age}'] = temp1.shape[0]

count_cdeath[f'{int(year)}{age}'] = temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']

            else:
                count_c[f'{int(year)}_{age}'] += temp1.shape[0]

count_cdeath[f'{int(year)}{age}'] += temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']

            #Weight Sum
            if f'{int(year)}_{age}' not in sumwt_c.keys():
                sumwt_c[f'{int(year)}_{age}'] = temp1[f'rawtsamp'].sum()

sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()

            else:
                sumwt_c[f'{int(year)}_{age}'] += temp1[f'rawtsamp'].sum()

sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()

count_c_list = []
sumwt_c_list = []

count_c_death_list = []

sumwt_c_death_list = []

for i in range(1992,2020):
    for j in range(55,101):
        if f'{i}_{j}' in count_c.keys():
            count_c_list.append([i,j,count_c[f'{i}_{j}']])

count_c_death_list.append([i,j,count_cdeath[f'{i}{j}']])

        if f'{i}_{j}' in sumwt_c.keys():
            sumwt_c_list.append([i,j,sumwt_c[f'{i}_{j}']])

sumwt_c_death_list.append([i,j,sumwt_cdeath[f'{i}{j}']])

count_df = pd.DataFrame(count_c_list,columns = ['Year','Age',f'total_Count'])
sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age',f'total_Sum_Wt'])

count_death_df = pd.DataFrame(count_c_death_list,columns = ['Year','Age',f'Death_Count'])

sumwt_death_df = pd.DataFrame(sumwt_c_death_list,columns = ['Year','Age',f'Death_Sum_Wt'])

final_df = pd.merge(count_df, sumwt_df,  how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

final_df = pd.merge(final_df, count_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

final_df = pd.merge(final_df, sumwt_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

return final_df

person_counts_male = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='1.male']) person_counts_female = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='2.female'])

In[17]:

Total Counts of Death

Male

temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_male = temp_df.copy()

Female

temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_female = temp_df.copy()

In[20]:

mortality_df = pd.merge(person_counts, death_counts, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])

mortality_df.columns = ['Year','Age','Total_Count','Total_Sum','Death_Count','Death_Sum']

person_counts.to_csv('Total and Death Person Counts and Sum Wts.csv')

In[21]:

temp_df.to_csv('Total Death Counts.csv')

In[19]:

with pd.ExcelWriter('Total Person and Total Death Counts and Sum 8 may 2023.xlsx') as writer: person_counts_male.to_excel(writer, sheet_name='person_counts_male') death_count_male.to_excel(writer, sheet_name='death_count_male') person_counts_female.to_excel(writer, sheet_name='person_counts_female') death_count_female.to_excel(writer, sheet_name='death_count_female')

In[ ]:

dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[1]:

import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan") df = pd.read_csv('HRS Columns Filtered.csv',index_col = [0]) df.reset_index(inplace=True,drop=True)

In[2]:

df['r1drinkn'] = df['r1drinkr'] df['r2drinkn'] = df['r2drinkr']

In[3]:

df['Is_Dead'] = [1 if df['radyear'].iloc[i].item()>0 else 0 for i in range(df.shape[0])]

In[45]:

df['Is_Dead'].sum()

In[4]:

df['Death_Wave'] = 100 for i in range(df.shape[0]): if df['Is_Dead'].iloc[i].item()==1: for j in range(1,15): if df[f'r{j}iwstat'].iloc[i]=='5.nr,died this wv': df['Death_Wave'].iloc[i] = j

In[47]:

df['Death_Wave'].value_counts()

In[60]:

Extended_Df = pd.DataFrame(columns = ['Household_Id','Is_Dead','Death_Wave','Death_Year','Death_Age', 'Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion','Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt','Cohort_Of_Respondent', 'Division_At_Birth',

'Marital_Status','Partnership_Status',

        'Wave_No',
        'Times_Married','Length_Current_Marriage','Length_Longest_Marriage','Times_Divorced',
        'Household_Size','Whether_Couple_HH','HH_Wt','Person_Level_Wt','Self_Report_Health',
        'Self_Report_Health_Change_3_Category',

'Frequent_Vigorous_Physical_Activity','Frequent_Moderate_Physical_Activity','Frequent_Light_Physical_Activity',

        'BMI','Height','Weight','Back_Problem','Smoke','Smoke_Now','Drink','Drink_Days_Per_Week',
        'Drink_Per_Day','Blood_Pressure','Diabetes','Cancer','Lung_Disease','Heart_Problem','Stroke',
        'Pyschological_Problem','Arthritis',

'Alzheimers','Dementia',

        'Sum_Conditions',

'Systolic_BP','Diastolic_BP','Pulse',

        'Non_Housing_Assets','Total_Assets','Income','Capital_Income_HH','Reports_Retired',
        'Retirement_Year','Planned_Retirement_Year','Job_History','Job_Years','Household_Size',
        'Number_Living_Children','Father_Alive','Mother_Alive','Father_Age','Mother_Age',
        'Marital_Status','Partnership_Status','Current_Age'])

count = 0 for i in range(df.shape[0]):

for i in range(10):

print(f'{i}/{df.shape[0]-1}')
if df['Death_Wave'].iloc[i]>2:

    for wave in range(3,15):          
        if df['Death_Wave'].iloc[i]>wave:

print(df['hhidpn'].iloc[i],df['radage_y'].iloc[i])

print(f'{i} {wave}')

            c_list = []
            c_list.extend(df[['hhidpn','Is_Dead','Death_Wave','radyear','radage_y','ragender','raracem','raedyrs',
            'raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','rawtsamp',
                      'racohbyr','rabplace']].iloc[i])
            b_list = []

            b_list = [wave,
                      df[f'r{wave}mrct'].iloc[i],df[f'r{wave}mcurln'].iloc[i],df[f'r{wave}mlenm'].iloc[i],
                      df[f'r{wave}mdiv'].iloc[i],df[f'h{wave}hhresp'].iloc[i],df[f'h{wave}cpl'].iloc[i],
                      df[f'r{wave}wthh'].iloc[i],df[f'r{wave}wtresp'].iloc[i],df[f'r{wave}shlt'].iloc[i],
                      df[f'r{wave}hltc3'].iloc[i],
        #               df[f'r{wave}vgactx'].iloc[i],df[f'r{wave}mdactx'].iloc[i],
        #               df[f'r{wave}ltactx'].iloc[i],
                      df[f'r{wave}bmi'].iloc[i],df[f'r{wave}height'].iloc[i],
                      df[f'r{wave}weight'].iloc[i],df[f'r{wave}back'].iloc[i],df[f'r{wave}smokev'].iloc[i],
                      df[f'r{wave}smoken'].iloc[i],df[f'r{wave}drink'].iloc[i],df[f'r{wave}drinkd'].iloc[i],
                      df[f'r{wave}drinkn'].iloc[i],df[f'r{wave}hibpe'].iloc[i],df[f'r{wave}diabe'].iloc[i],
                      df[f'r{wave}cancre'].iloc[i],df[f'r{wave}lunge'].iloc[i],df[f'r{wave}hearte'].iloc[i],
                      df[f'r{wave}stroke'].iloc[i],df[f'r{wave}psyche'].iloc[i],df[f'r{wave}arthre'].iloc[i],
        #               df[f'r{wave}alzhee'].iloc[i],df[f'r{wave}demene'].iloc[i],
                      df[f'r{wave}conde'].iloc[i],
        #               df[f'r{wave}bpsys'].iloc[i],df[f'r{wave}bpdia'].iloc[i],df[f'r{wave}bppuls'].iloc[i],
                      df[f'h{wave}atotn'].iloc[i],df[f'h{wave}atotb'].iloc[i],df[f'r{wave}iearn'].iloc[i],
                      df[f'h{wave}icap'].iloc[i],df[f'r{wave}sayret'].iloc[i],df[f'r{wave}retyr'].iloc[i],
                      df[f'r{wave}rplnyr'].iloc[i],df[f'r{wave}jjobs'].iloc[i],df[f'r{wave}jyears'].iloc[i],
                      df[f'h{wave}hhres'].iloc[i],df[f'h{wave}child'].iloc[i],df[f'r{wave}dadliv'].iloc[i],
                      df[f'r{wave}momliv'].iloc[i],df[f'r{wave}dadage'].iloc[i],df[f'r{wave}momage'].iloc[i],

                      df[f'r{wave}mstat'].iloc[i],df[f'r{wave}mpart'].iloc[i],df[f'r{wave}agey_e'].iloc[i]]

            c_list.extend(b_list)

print(f'{i} {wave} {len(c_list)}')

            Extended_Df.loc[count] = c_list
            count = count + 1

In[61]:

Extended_Df['Death_Age'].head(25)

In[6]:

Extended_Df.to_csv('HRS Expanded v2.csv')

In[63]:

Extended_Df = pd.read_csv('HRS Expanded v2.csv', index_col = 0)

In[64]:

Extended_Df = Extended_Df.loc[:,~Extended_Df.columns.duplicated()].copy()

Extended_Df.drop('Household_Id', inplace=True, axis=1)

Extended_Df.drop('Sample_Wt', inplace=True, axis=1) Extended_Df.drop('HH_Wt', inplace=True, axis=1) Extended_Df.drop('Person_Level_Wt', inplace=True, axis=1)

Extended_Df.drop('Self_Report_Health_Change_3_Category', inplace=True, axis=1) Extended_Df.drop('Self_Report_Health', inplace=True, axis=1)

Extended_Df.drop('Systolic_BP', inplace=True, axis=1)

Extended_Df.drop('Pulse', inplace=True, axis=1)

Extended_Df.drop('Diastolic_BP', inplace=True, axis=1)

Extended_Df.drop('Dementia', inplace=True, axis=1)

Extended_Df.drop('Alzheimers', inplace=True, axis=1)

Extended_Df.drop('Back_Problem', inplace=True, axis=1)

Extended_Df.drop('Frequent_Vigorous_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Frequent_Moderate_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Frequent_Light_Physical_Activity', inplace=True, axis=1)

Extended_Df.drop('Planned_Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Household_Size.1', inplace=True, axis=1)

In[35]:

Extended_Df1 = Extended_Df.copy()

Extended_Df.iloc[:,:].isna().sum()

Extended_Df['Current_Age'].value_counts()

Extended_Df.shape

In[65]:

Extended_Df['Death_Age'].fillna(value = 200, inplace = True) Extended_Df['Race'].fillna(value = '1.white/caucasian', inplace = True)

Extended_Df.loc[Extended_Df['Highest_Degree']=='1.ged','Highest_Degree'] = '3.hs/ged' Extended_Df.loc[Extended_Df['Highest_Degree']=='2.hs','Highest_Degree'] = '3.hs/ged'

Extended_Df.loc[Extended_Df['Years_Education']=='17.17+ yrs','Years_Education'] = '17' Extended_Df.loc[Extended_Df['Years_Education']=='0.none','Years_Education'] = '0' Extended_Df['Years_Education'].fillna(value = '-1', inplace = True)

Extended_Df['Education_Category'].fillna(value = '1.lt high-school', inplace = True)

Extended_Df['Religion'].fillna(value = '5.other', inplace = True)

Extended_Df['Veteran_Status'].fillna(value = '0.no', inplace = True)

Extended_Df['Division_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)

Extended_Df['Cohort_Of_Respondent'].fillna(value = '0.not in any cohort', inplace = True)

Extended_Df['BMI'].fillna(value = Extended_Df['BMI'].mean(), inplace = True) Extended_Df['Height'].fillna(value = Extended_Df['Height'].mean(), inplace = True) Extended_Df['Weight'].fillna(value = Extended_Df['Weight'].mean(), inplace = True) Extended_Df['Length_Current_Marriage'].fillna(value = Extended_Df['Length_Current_Marriage'].mean(), inplace = True) Extended_Df['Length_Longest_Marriage'].fillna(value = Extended_Df['Length_Longest_Marriage'].mean(), inplace = True) Extended_Df['Times_Married'].fillna(value = int(Extended_Df['Times_Married'].mean()), inplace = True) Extended_Df['Household_Size'].fillna(value = int(Extended_Df['Household_Size'].mean()), inplace = True)

Extended_Df['Whether_Couple_HH'].fillna(value = 'No value provided', inplace = True)

Extended_Df['Mother_Years_Of_Education'].fillna(value = '-1', inplace = True) Extended_Df['Father_Years_Of_Education'].fillna(value = '-1', inplace = True)

Extended_Df.loc[Extended_Df['Division_At_Birth']=='10.us/na division','Division_At_Birth'] = '11.not us/inc us terr' Extended_Df['Division_At_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)

Extended_Df.loc[Extended_Df['Marital_Status']=='4.separated','Marital_Status'] = '6.separated/divorced' Extended_Df.loc[Extended_Df['Marital_Status']=='5.divorced','Marital_Status'] = '6.separated/divorced' Extended_Df['Marital_Status'].fillna(value = 'No value given', inplace = True)

Extended_Df['Partnership_Status'].fillna(value = '0. no', inplace = True)

Extended_Df['Times_Divorced'].fillna(value = 0.0, inplace = True)

Extended_Df['Smoke'].fillna(value = '1.yes', inplace = True) Extended_Df['Smoke_Now'].fillna(value = '0.no', inplace = True)

Extended_Df['Drink'].fillna(value = '0.no', inplace = True) Extended_Df['Drink_Per_Day'].fillna(value = '0.0 or doesnt drink', inplace = True) Extended_Df['Drink_Days_Per_Week'].fillna(value = '0.0 or doesnt drink', inplace = True)

Extended_Df['Reports_Retired'].fillna(value = 'No value given', inplace = True)

Extended_Df['Father_Alive'].fillna(value = '0.no', inplace = True) Extended_Df['Mother_Alive'].fillna(value = '0.no', inplace = True)

Extended_Df['Father_Age'].fillna(value = int(Extended_Df['Father_Age'].mean()), inplace = True) Extended_Df['Mother_Age'].fillna(value = int(Extended_Df['Mother_Age'].mean()), inplace = True)

Extended_Df['Number_Living_Children'].fillna(value = int(Extended_Df['Number_Living_Children'].mean()), inplace = True)

Extended_Df.loc[Extended_Df['Drink_Per_Day']=='0.0 or doesnt drink','Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=="0.doesn't drink",'Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='1.lt 1/day','Drink_Per_Day'] = '1' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='2.1-2/day','Drink_Per_Day'] = '2' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='3.3-4/day','Drink_Per_Day'] = '3' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='4.5+/day','Drink_Per_Day'] = '4' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='99.all day','Drink_Per_Day'] = '45' Extended_Df['Drink_Per_Day'] = pd.to_numeric(Extended_Df['Drink_Per_Day'])

Extended_Df.loc[Extended_Df['Drink_Days_Per_Week']=='0.0 or doesnt drink','Drink_Days_Per_Week'] = '0' Extended_Df['Drink_Days_Per_Week'] = pd.to_numeric(Extended_Df['Drink_Days_Per_Week'])

Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='17.17+ yrs','Father_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='0.none','Father_Years_Of_Education'] = '0' Extended_Df['Father_Years_Of_Education'] = pd.to_numeric(Extended_Df['Father_Years_Of_Education'])

Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='17.17+ yrs','Mother_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='0.none','Mother_Years_Of_Education'] = '0' Extended_Df['Mother_Years_Of_Education'] = pd.to_numeric(Extended_Df['Mother_Years_Of_Education'])

Extended_Df['Years_Education'] = pd.to_numeric(Extended_Df['Years_Education'])

In[84]:

Extended_Df = Extended_Df[Extended_Df['Blood_Pressure'].notna()]

Extended_Df.iloc[:,:].isna().sum()

Extended_Df['Death_Age'].value_counts()

Extended_Df.shape

In[ ]:

In[85]:

Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==1] Non_Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==0]

In[86]:

print(Extended_Df.shape) print(Death_Df.shape) print(Non_Death_Df.shape)

In[97]:

from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score

In[105]:

labels = np.array(Death_Df['Death_Age']) model_df = Death_Df.drop('Death_Age', axis = 1) train, test, train_labels, test_labels = train_test_split(model_df,labels,test_size = 0.2)

In[106]:

train.iloc[:,:].isna().sum()

In[107]:

Other_Data_Train = train[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]

train = train.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)

train = pd.get_dummies(train) train = train.astype('float64') len(train.columns.to_list())

In[108]:

Other_Data_Test = test[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]

test = test.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)

test = pd.get_dummies(test) test = test.astype('float64') len(test.columns.to_list())

In[109]:

Other_Data_Non_Death_Df = Non_Death_Df[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age']]

Non_Death_Df = Non_Death_Df.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age'],axis=1)

Non_Death_Df = pd.get_dummies(Non_Death_Df) Non_Death_Df = Non_Death_Df.astype('float64') print(len(Non_Death_Df.columns.to_list()))

In[ ]:

In[110]:

rf = RandomForestRegressor(n_estimators = 1000, random_state = 42,verbose=True) rf.fit(train, train_labels)

In[111]:

predictions = rf.predict(train) errors = abs(predictions - train_labels) mape = 100 * (errors / train_labels) accuracy = 100 - np.mean(mape) accuracy

In[112]:

R2 = r2_score(train_labels, predictions) Adj_R2 = 1-(1-R2)*(train.shape[0]-1)/(train.shape[0]-train.shape[1]-1) Adj_R2

In[113]:

predictions_test = rf.predict(test) test_errors = abs(predictions_test - test_labels) mape = 100 * (test_errors / test_labels) accuracy = 100 - np.mean(mape) accuracy

In[114]:

importances = list(rf.featureimportances) feature_importances = [(feature, round(importance, 4)) for feature, importance in zip(train.columns, importances)] feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True) feature_importances

In[116]:

test = pd.concat([test, Other_Data_Test], axis=1) test['Actual_Death_Age'] = test_labels test['Predicted_Death_Age'] = predictions_test test.head() test.to_excel("Death Model Tests Predicted v2.xlsx")

In[118]:

predictions_non_death = rf.predict(Non_Death_Df) Non_Death_Df = pd.concat([Non_Death_Df, Other_Data_Non_Death_Df], axis=1) Non_Death_Df['Predicted_Death_Age'] = predictions_non_death Non_Death_Df.to_excel("Death Model Non Death Predicted v2.xlsx")

In[ ]:

dipanb commented 1 year ago

!/usr/bin/env python

coding: utf-8

In[3]:

import pandas as pd import numpy as np import datetime as dt from dateutil.relativedelta import relativedelta import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D import time

In[4]:

Federal Tax Values

STD_Single = 12400 STD_HOH = 18650 STD_MFJ = 24800 STD_MFS = 12400

PE_Single = 0 PE_HOH = 0 PE_MFJ = 0 PE_MFS = 0 DependentExemption = 0

Single, HOH, MFJ, MFS

Inc_MTR = [.1,.12,.22,.24,.32,.35,.37]

Inc_Br = [] Inc_Br.append([9875,40125,85525,163300,207350,518400,10000000000]) Inc_Br.append([14101,53701,85501,163300,207350,518400,10000000000]) Inc_Br.append([19750,80250,171050,326600,414700,622050,10000000000]) Inc_Br.append([9875,40125,85525,163300,207350,311025,10000000000])

Q_Br = [] Q_MTR = [0,.15,.2] Q_Br.append([0,40000,441450]) Q_Br.append([0,53600,469050]) Q_Br.append([0,80000,496600]) Q_Br.append([0,40000,248300])

Min_EarnedIncome_ChildCredit = 2500 Percentage_EIRefundable_ChildCredit = 0.15 Max_Refundable_ChildCredit = 1400

FICA Tax rates, thresholds and variables

SS_WithholdingRate = 0.062 MedicareRate = 0.0145 HighIncomeMedicareRate = 0.0235 SS_WithholdingThreshold = 128400 MedicareRateThreshold_Single = 200000 MedicareRateThreshold_HOH = 200000 MedicareRateThreshold_MFJ = 250000 MedicareRateThreshold_MFS = 125000 MinSEIforTaxation = 400

Net Investment Income Tax threshold amount and variables

NIIT_Threshold_Single = 200000 NIIT_Threshold_HOH = 200000 NIIT_Threshold_MFJ = 250000 NIIT_Threshold_MFS = 125000 NIIT_TaxRate = 0.038

Social Security taxation

SS_CombinedIncome_Contribution = 0.5 SS_Taxation_Rate1 = 0.5 SS_Taxation_Rate2 = 0.85

Social security lower limit on Combined income for taxation

TSS_LL_Single = 25000 TSS_LL_HOH = 32000 TSS_LL_MFJ = 32000 TSS_LL_MFS = 25000

Social security upper limit on Combined income for taxation

TSS_UL_Single = 34000 TSS_UL_HOH = 44000 TSS_UL_MFJ = 44000 TSS_UL_MFS = 34000

Exemption lower limit on AGI

ELL_Single = 266700 ELL_HOH = 293350 ELL_MFJ = 320000 ELL_MFS = 160000

Exemption upper limit on AGI

EUL_Single = 389200 EUL_HOH = 415850 EUL_MFJ = 442500 EUL_MFS = 221250

Itemized Deduction limits

ItemizedDeduction_Deflator_AGI = 0.03 ItemizedDeduction_Deflator = 0.8

Tax year 2018 Capital loss limit

CLL_Single = 3000 CLL_HOH = 3000 CLL_MFJ = 3000 CLL_MFS = 1500

Tax year 2018 Standard deduction Old Age amounts

STD_Single_HOH_MFS_65 = 1650 STD_MFJ_65 = 1300 STD_MFJ_65_65 = 2600

In[139]:

def IncomeTax(FilingStatus, Age, S_Age, WageIncome, SelfEmployedIncome, NetAdjustedCapitalGains, Qual_Dividend, Ord_UnearnedIncome, SocialSecurity,Adjustments, ItemizedDeduction,Child_Dependent, Non_Child_Dependent):

if (FilingStatus=="Single"):
    NFILIng = 1
    MedicareRateThreshold = MedicareRateThreshold_Single
    StandardDeduction = STD_Single
    PersonalExemption = PE_Single
    ExemptionLowerLimit = ELL_Single
    ExemptionUpperLimit = EUL_Single
    TaxableSSLowerLimit = TSS_LL_Single
    TaxableSSUpperLimit = TSS_UL_Single
    CapitalLossLimit = CLL_Single
    NIIT_Threshold = NIIT_Threshold_Single
elif (FilingStatus=="HOH"):
    NFILIng = 2
    MedicareRateThreshold = MedicareRateThreshold_HOH
    StandardDeduction = STD_HOH
    PersonalExemption = PE_HOH
    ExemptionLowerLimit = ELL_HOH
    ExemptionUpperLimit = EUL_HOH
    TaxableSSLowerLimit = TSS_LL_HOH
    TaxableSSUpperLimit = TSS_UL_HOH
    CapitalLossLimit = CLL_HOH
    NIIT_Threshold = NIIT_Threshold_HOH
elif (FilingStatus=="MFJ"):
    NFILIng = 3
    MedicareRateThreshold = MedicareRateThreshold_MFJ
    StandardDeduction = STD_MFJ
    PersonalExemption = PE_MFJ
    ExemptionLowerLimit = ELL_MFJ
    ExemptionUpperLimit = EUL_MFJ
    TaxableSSLowerLimit = TSS_LL_MFJ
    TaxableSSUpperLimit = TSS_UL_MFJ
    CapitalLossLimit = CLL_MFJ
    NIIT_Threshold = NIIT_Threshold_MFJ
elif (FilingStatus=="MFS"):
    NFILIng = 4
    MedicareRateThreshold = MedicareRateThreshold_MFS
    StandardDeduction = STD_MFS
    PersonalExemption = PE_MFS
    ExemptionLowerLimit = ELL_MFS
    ExemptionUpperLimit = EUL_MFS
    TaxableSSLowerLimit = TSS_LL_MFS
    TaxableSSUpperLimit = TSS_UL_MFS
    CapitalLossLimit = CLL_MFS
    NIIT_Threshold = NIIT_Threshold_MFS

InflationFactor = 1
SocialSecurityTax = SS_WithholdingRate * min(WageIncome, SS_WithholdingThreshold * InflationFactor)
MedicareTax = min(WageIncome, MedicareRateThreshold) * MedicareRate + max(0, WageIncome - MedicareRateThreshold) * HighIncomeMedicareRate
FICATax = SocialSecurityTax + MedicareTax

if (SelfEmployedIncome * (1 - (SS_WithholdingRate + MedicareRate)) <= MinSEIforTaxation): 
    SelfEmploymentTax = 0
else:
    SelfEmploymentTax = 2 * SS_WithholdingRate * min(max(SS_WithholdingThreshold * InflationFactor - WageIncome, 0), SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))) + 2 * MedicareRate * SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))

EarnedIncome = WageIncome + SelfEmployedIncome
Qual_UnearnedIncome = max(-CapitalLossLimit, NetAdjustedCapitalGains) + Qual_Dividend
GrossIncome = EarnedIncome + Ord_UnearnedIncome + Qual_UnearnedIncome - Qual_Dividend + SocialSecurity
AdjustedGrossIncome = GrossIncome - Adjustments
NonTaxableInterest = 0

print(EarnedIncome,Qual_UnearnedIncome,Ord_UnearnedIncome,GrossIncome,AdjustedGrossIncome)

if (SocialSecurity == 0):
    TaxableSS = 0
else:
    PostRetirementCombinedIncome = (AdjustedGrossIncome - SocialSecurity) + NonTaxableInterest + SS_CombinedIncome_Contribution * SocialSecurity
    SS_85 = min(SocialSecurity, max(PostRetirementCombinedIncome - TaxableSSUpperLimit, 0)) / SocialSecurity

    if (PostRetirementCombinedIncome < TaxableSSLowerLimit):
        SS_50 = 0
    else: 
        SS_50 = min(TaxableSSUpperLimit - TaxableSSLowerLimit, PostRetirementCombinedIncome - TaxableSSLowerLimit) / SocialSecurity

    TaxableSS = SS_85 * SocialSecurity * SS_Taxation_Rate2 + SS_50 * SocialSecurity * SS_Taxation_Rate1
    TaxableSS = min(TaxableSS, SocialSecurity * SS_Taxation_Rate2)

GrossIncome = GrossIncome - SocialSecurity + TaxableSS
AdjustedGrossIncome = GrossIncome - Adjustments

print(TaxableSS,GrossIncome,AdjustedGrossIncome)

if (FilingStatus == "MFJ"):
    if (Age >= 65 and S_Age >= 65): 
        StandardDeduction_F = ((StandardDeduction + STD_MFJ_65_65) * InflationFactor)
    elif ((Age < 65 and S_Age >= 65) or (Age >= 65 and S_Age < 65)):
        StandardDeduction_F = ((StandardDeduction + STD_MFJ_65) * InflationFactor)
    else: 
        StandardDeduction_F = (StandardDeduction * InflationFactor)
else:
    if (Age >= 65): 
        StandardDeduction_F = ((StandardDeduction + STD_Single_HOH_MFS_65) * InflationFactor)
    else: 
        StandardDeduction_F = (StandardDeduction * InflationFactor)

ItemizedDeduction_Phaseout = 0

if (AdjustedGrossIncome > (ExemptionLowerLimit * InflationFactor)): 
    ItemizedDeduction_Phaseout = min(ItemizedDeduction_Deflator_AGI * (AdjustedGrossIncome - (ExemptionLowerLimit * InflationFactor)), ItemizedDeduction_Deflator * (ItemizedDeduction))
else: 
    ItemizedDeduction_Phaseout = 0

Deduction = max(StandardDeduction, StandardDeduction_F, ItemizedDeduction - ItemizedDeduction_Phaseout)
Exemption = 0

print(Deduction)

#Taxable Income calculation
if ((Deduction + Exemption) > (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend + TaxableSS - Adjustments)): 
    OtherTaxableIncome = 0
    Qual_UnearnedIncome = Qual_UnearnedIncome - (Deduction + Exemption - (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS))
else: 
    OtherTaxableIncome = EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS - Deduction - Exemption

print(OtherTaxableIncome,EarnedIncome,Ord_UnearnedIncome,Qual_Dividend,TaxableSS,Deduction)

if (Qual_UnearnedIncome < 0):
    OtherTaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)
    Qual_UnearnedIncome = 0

TaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)

print(OtherTaxableIncome,TaxableIncome)

#Tax calculation for Qualified Unearned Income(LTCG, Qualified dividends)
if (TaxableIncome <= (Q_Br[NFILIng-1][1] * InflationFactor)): 
    CapitalGainTax = 0
elif ((TaxableIncome > (Q_Br[NFILIng-1][1] * InflationFactor)) and (TaxableIncome <= (Q_Br[NFILIng-1][2] * InflationFactor))):
    if (OtherTaxableIncome <= 0): 
        CapitalGainTax = Q_MTR[1] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][1] * InflationFactor))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)): 
        CapitalGainTax = ((Q_Br[NFILIng-1][1]* InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome))
    else: 
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * Qual_UnearnedIncome
elif TaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor):
    if (OtherTaxableIncome <= 0):
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - (Q_Br[NFILIng-1][1] * InflationFactor)) + Q_MTR[2] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][2] * InflationFactor))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)):
        CapitalGainTax = ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Q_Br[NFILIng-1][2]* InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor - OtherTaxableIncome) +(Q_Br[NFILIng-1][2] * InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor)))
    elif (OtherTaxableIncome < (Q_Br[NFILIng-1][2]* InflationFactor)):
        CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome))
    elif (OtherTaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor)):
        CapitalGainTax = Qual_UnearnedIncome * Q_MTR[2]

print(CapitalGainTax)

#Tax on Other Taxable Income - EarnedIncome + Ord_UnearnedIncome
counter = 1
Tax1 = 0
if (OtherTaxableIncome < 0): 
    Tax1 = 0
elif (OtherTaxableIncome <= (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):
    Tax1 = OtherTaxableIncome * Inc_MTR[counter-1]
else: 
    while (OtherTaxableIncome > (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):

print(Inc_Br[NFILIng-1][counter-1])

        if (counter > 1):

print(counter,Tax1,Inc_Br[NFILIng-1][counter-1],Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

            Tax1 = Tax1 + (Inc_Br[NFILIng-1][counter-1] - Inc_Br[NFILIng-1][counter-2]) * Inc_MTR[counter-1] * InflationFactor
        else:

print(counter,Tax1,Inc_Br[NFILIng-1][counter-1],Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

            Tax1 = Tax1 + Inc_Br[NFILIng-1][counter-1] * Inc_MTR[counter-1] * InflationFactor
        #print(Tax1)    
        counter = counter + 1

print(counter,Tax1,Inc_Br[NFILIng-1][counter-2],Inc_MTR[counter-1])

    Tax1 = Tax1 + (OtherTaxableIncome - Inc_Br[NFILIng-1][counter-2] * InflationFactor) * Inc_MTR[counter-1]

print(Tax1)

print(NFILIng-1,Inc_Br[NFILIng-1],Inc_MTR)

print(OtherTaxableIncome,Inc_Br[NFILIng-1][counter-1],Inc_MTR[counter-1],Tax1)

#Federal Tax, Federal marginal rate and next Federal marginal rate
FederalTax = CapitalGainTax + Tax1

return FederalTax
#return FederalTax,Tax1,CapitalGainTax,TaxableIncome,Inc_Br[NFILIng-1][0]

In[140]:

IncomeTax(FilingStatus, Age, S_Age, WageIncome, SelfEmployedIncome, NetAdjustedCapitalGains, Qual_Dividend,

Ord_UnearnedIncome, SocialSecurity,Adjustments, ItemizedDeduction,Child_Dependent, Non_Child_Dependent)

IncomeTax("MFJ", 71, 71, 100000, 0, 10150.75, 517.5,42956.66,40000,0,0,0,0)

In[141]:

RMD_Divisor = pd.DataFrame(data = [[70,27.4],[71,26.5],[72,25.6],[73,24.7],[74,23.8],[75,22.9],[76,22],[77,21.2], [78,20.3],[79,19.5],[80,18.7],[81,17.9],[82,17.1],[83,16.3],[84,15.5],[85,14.8], [86,14.1],[87,13.4],[88,12.7],[89,12],[90,11.4],[91,10.8],[92,10.2],[93,9.6], [94,9.1],[95,8.6],[96,8.1],[97,7.6],[98,7.1],[99,6.7],[100,6.3],[101,5.9], [102,5.5],[103,5.2],[104,4.9],[105,4.5],[106,4.2],[107,3.9],[108,3.7],[109,3.4], [110,3.1],[111,2.9],[112,2.6],[113,2.4],[114,2.1],[115,1.9],[116,1.9],[117,1.9], [118,1.9],[119,1.9],[120,1.9],[121,1.9],[122,1.9],[123,1.9],[124,1.9],[125,1.9], [126,1.9],[127,1.9]], columns = ['Age','Divisor'])

In[183]:

def Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total, Ret_Interest, Ret_Dividend, PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD, TotalATWDReq,WHPer, SocialSecurity, WageIncome, UnearnedIncome, WageWH, UIncWH, LastTax,LastWH, CFLast):

if (Age>=70):
    RMD_D_C = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==Age,'Divisor'])[0]
    RMD_C = TradBal_C/RMD_D_C
else:
    RMD_C = 0
    RMD_S = 0

if (S_Age>=70):
    RMD_D_S = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==S_Age,'Divisor'])[0]
    RMD_S = TradBal_S/RMD_D_S
else:
    RMD_S = 0

print(RMD_C,RMD_S)

TradPerPerm = TradPer
TaxablePerPerm = TaxablePer
RothPerPerm = RothPer

if ((TradBal_C+TradBal_S)>0):
    Trad_CFraction = TradBal_C/(TradBal_C+TradBal_S)
else:
    Trad_CFraction = 0

TradBal_C = TradBal_C * (1+Ret_Total)
TradBal_S = TradBal_S * (1+Ret_Total)
Interest = TaxableBal * Ret_Interest
Dividend = TaxableBal * Ret_Dividend
TaxableCB = max(TaxableCB + Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT,0)
Distributions = Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT
TaxableBal = TaxableBal * (1+Ret_Total)
RothBal = RothBal * (1+Ret_Total)

print(TradBal_C,TradBal_S,Interest,Dividend,TaxableCB,Distributions,TaxableBal,RothBal)

TradBal = TradBal_C + TradBal_S
TotalBal = TradBal + TaxableBal + RothBal

if (Strategy == "PRORATA"):
    TradPer = TradBal/TotalBal
    TaxablePer = TaxableBal/TotalBal
    RothBal = RothBal/TotalBal

TrueUp = LastTax - LastWH

if (ATWD==0):
    ATWD = -min(TrueUp, WageIncome-WageWH+UnearnedIncome-UIncWH + SocialSecurity - TotalATWDReq)

if (TradPer==0 and TaxablePer==0 and RothPer==0):
    TradPer = 1/3
    TaxablePer = 1/3
    RothPer = 1/3

print(TradPer,TaxablePer,RothPer)

if (TradPer>0 and TradBal<0.1):
    TradPer = 0
    if ((TaxablePer+RothPer) == 0):
        TaxablePer = 0.5
        RothPer = 0.5
    else:
        temp = TaxablePer + RothPer
        TaxablePer = TaxablePer/temp
        RothPer = RothPer/temp

if (TaxablePer>0 and TaxableBal<0.1):
    TaxablePer = 0
    if ((TradPer+RothPer) == 0):
        TradPer = 0.5
        RothPer = 0.5
    else:
        temp = TradPer + RothPer
        TradPer = TradPer/temp
        RothPer = RothPer/temp

if (RothPer>0 and RothBal<0.1):
    RothPer = 0
    if ((TaxablePer+TradPer) == 0):
        TaxablePer = 0.5
        TradPer = 0.5
    else:
        temp = TaxablePer + TradPer
        TaxablePer = TaxablePer/temp
        TradPer = TradPer/temp

print(TradPer,TaxablePer,RothPer)

PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)

print(PreTaxWD,TradBal,TaxableBal,RothBal,TradPer,TaxablePer,RothPer)

if ((TradPer*PreTaxWD)>TradBal):
    PreTaxWD = max(0,ATWD+TrueUp+TradBal*WHPer)
    R_Adj = TradPer - TradBal/PreTaxWD
    TradPer = TradBal/PreTaxWD
    if ((TaxablePer+RothPer) == 0):
        TaxablePer = TaxablePer + R_Adj/2
        RothPer = 1 - TradPer - TaxablePer
    else:
        TaxablePer = TaxablePer + R_Adj * TaxablePer/(TaxablePer+RothPer)
        RothPer = 1 - TradPer - TaxablePer

print(TradPer,TaxablePer,RothPer)

if ((TaxablePer*PreTaxWD)>TaxableBal):
    if ((TradPer+RothPer) == 0):
        PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer/2) + TaxableBal)
        R_Adj = TaxablePer - TaxableBal/PreTaxWD
        TaxablePer = TaxableBal/PreTaxWD
        TradPer = TradPer + R_Adj/2
        RothPer = 1 - TradPer - TaxablePer
    else:
        PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer*TradPer/(TradPer+RothPer)) + TaxableBal)
        R_Adj = TaxablePer - TaxableBal/PreTaxWD
        TaxablePer = TaxableBal/PreTaxWD
        TradPer = TradPer + R_Adj*TradPer/(TradPer+RothPer)
        RothPer = 1 - TradPer - TaxablePer

print(TradPer,TaxablePer,RothPer)

if ((RothPer*PreTaxWD)>RothBal):
    if ((TaxablePer+TradPer) == 0):
        PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer/2) + RothBal)
        R_Adj = RothPer - RothBal/PreTaxWD
        RothPer = RothBal/PreTaxWD
        TradPer = TradPer + R_Adj/2
        TaxablePer = 1 - TradPer - RothPer
    else:
        PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer*TradPer/(TradPer+TaxablePer)) + RothBal)
        R_Adj = RothPer - RothBal/PreTaxWD
        RothPer = RothBal/PreTaxWD
        TradPer = TradPer + R_Adj*TradPer/(TradPer+TaxablePer)
        TaxablePer = 1 - TradPer - RothPer

PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)

WdTrad = PreTaxWD * TradPer
WdTaxable = PreTaxWD * TaxablePer
WdRoth = PreTaxWD * RothPer

print(TradPer,TaxablePer,RothPer,PreTaxWD,WdTrad,WdTaxable,WdRoth)

if ((RMD_C+RMD_S)>WdTrad):
    WdTrad = RMD_C + RMD_S
    if ((TaxablePerPerm+RothPerPerm)>0):
        WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*TaxablePerPerm/(TaxablePerPerm+RothPerPerm))  
        WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)
    else:
        WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*0.5)  
        WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)        

WdTrad = min(WdTrad,TradBal)
WdTaxable = min(WdTaxable,TaxableBal)
WdRoth = min(WdRoth,RothBal)

PreTaxWDReqAftrR1 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth

print(WdTrad,WdTaxable,WdRoth)

if (PreTaxWDReqAftrR1>0.1):
    if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
        WdTrad = TradBal
        WdTaxable = TaxableBal
        WdRoth = RothBal

    else:
        if ((TradBal-WdTrad)<.1):
            TradPer = 0
            if ((WdTaxable+WdRoth)==0):
                TaxablePer = 0.5
                RothPer = 0.5
            else:
                TaxablePer = WdTaxable/(WdTaxable+WdRoth)
                RothPer = 1 - TaxablePer
        elif ((TaxableBal-WdTaxable)<.1):
            TaxablePer = 0
            if ((TradPer+RothPer)==0):
                TradPer = 0.5
                RothPer = 0.5
            else:
                TradPer = TradPer/(TradPer+RothPer)
                RothPer = 1 - TradPer

            if (WdTrad>(PreTaxWD-WdTaxable)*TradPer):
                TradPer = 0
                RothPer = 1
        elif ((RothBal-WdRoth)<.1):
            RothPer = 0
            if ((TradPer+TaxablePer)==0):
                TaxablePer = 0.5
                TradPer = 0.5
            else:
                TradPer = TradPer/(TradPer+TaxablePer)
                TaxablePer = 1 - TradPer

print(WdTrad,TradPer,(PreTaxWD-WdRoth)*TradPer)

            if (WdTrad>(PreTaxWD-WdRoth)*TradPer):
                TradPer = 0
                TaxablePer = 1

        PreTaxWDReqAftrR1 = PreTaxWDReqAftrR1/(1-WHPer*TradPer)
        WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR1*TradPer)
        WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR1*TaxablePer)
        WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR1*RothPer)

PreTaxWDReqAftrR2 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth                   
if (PreTaxWDReqAftrR2>.1):
    if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
        WdTrad = TradBal
        WdTaxable = TaxableBal
        WdRoth = RothBal

    else:
        if ((TradBal-WdTrad)<.1 and (TaxableBal-WdTaxable)<.1):
            RothPer = 1
            TradPer = 0
            TaxablePer = 0
        elif ((TradBal-WdTrad)<.1 and (RothBal-WdRoth)<.1):
            TaxablePer = 1
            RothPer = 0
            TradPer = 0
        elif ((TaxableBal-WdTaxable)<.1 and (RothBal-WdRoth)<.1):
            TradPer = 1
            TaxablePer = 0
            RothPer = 0

PreTaxWDReqAftrR3 = PreTaxWDReqAftrR2/(1-WHPer*TradPer)
WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR3*TradPer)
WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR3*TaxablePer)
WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR3*RothPer)

WdTaxableDist = min(WdTaxable,Distributions)
WdTaxableBal = WdTaxable - WdTaxableDist
if (TaxableBal==0):
    RealizedGain = 0
else:
    RealizedGain = WdTaxableBal *(1-(TaxableCB-Distributions)/(TaxableBal-Distributions)) + PRLT * TaxableBal/(1+Ret_Total) * (Ret_Total - Ret_Interest - Ret_Dividend) 

CF = min(CFLast,0) + RealizedGain
RealizedGain = max(0,CF)

Tax = IncomeTax(FilingStatus, Age, S_Age, WageIncome, 0, RealizedGain, Dividend,
          WdTrad+Interest+UnearnedIncome+Dividend, SocialSecurity,0,0,0,0)

print(FilingStatus, Age, S_Age, WageIncome, 0, RealizedGain, Dividend,

WdTrad+Interest+UnearnedIncome+Dividend, SocialSecurity,0,0,0,0,Tax)

NetIncome = WdTaxable + WdRoth + WdTrad * (1-WHPer) - TrueUp

Shortfall = ATWD - NetIncome

print(TradBal,TaxableBal,TaxableCB,RothBal,WdTaxableDist)

TradBal = TradBal - WdTrad
TradBal_C = TradBal* Trad_CFraction
TradBal_S = TradBal * (1-Trad_CFraction)
if (TaxableBal-WdTaxable)==0:
    TaxableCB = 0
else:
    TaxableCB = TaxableCB-WdTaxableDist - (TaxableCB-WdTaxableDist)*WdTaxableBal/(TaxableBal-WdTaxableDist)
TaxableBal = TaxableBal - WdTaxable
RothBal = RothBal - WdRoth

CF = RealizedGain

return (Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,Tax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CF)

In[184]:

FilingStatus = "MFJ"

Age = 71

S_Age = 71

TradBal_C = 675000

TradBal_S = 0

TaxableBal = 75000

TaxableCB = 45000

RothBal = 0

Ret_Total = 0.0462

Ret_Interest = .0103

Ret_Dividend = .0069

PRLT = .3

Strategy = "RATIO"

TradPer = .5

TaxablePer = .3

RothPer = .2

ATWD = 62500

TotalATWDReq = 192500

WHPer = .1

SocialSecurity = 40000

WageIncome = 100000

UnearnedIncome = 0

WageWH = .1

UIncWH = .1

LastTax = 0

LastWH = 0

CFLast = 0

#

Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal,

Ret_Total, Ret_Interest, Ret_Dividend, PRLT,

Strategy, TradPer, TaxablePer, RothPer, ATWD, TotalATWDReq,WHPer,

SocialSecurity, WageIncome, UnearnedIncome, WageWH, UIncWH,

LastTax,LastWH, CFLast)

In[185]:

def Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon):

Longevity = 0
for i in range(1,Horizon+1):

    #print(i)

    WageWH = WageIncome_Vec[i-1]*WageWHPer
    UIncWH = UInc_Vec[i-1]*UIWHPer
    if i==1:
        LastTax = 0
        LastWH = 0
        CFLast = 0

    Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,LastTax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CFLast = Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal,Ret_Total_Vec[i-1], Ret_Interest_Vec[i-1], Ret_Dividend_Vec[i-1],PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD_Vec[i-1],TotalATWDReq_Vec[i-1],WHPer,SocialSecurity_Vec[i-1], WageIncome_Vec[i-1], UInc_Vec[i-1], WageWH, UIncWH,LastTax,LastWH, CFLast)
    LastWH = WageWH + UIncWH + WdTrad*WHPer

    #print(i,"-",round(WdTrad,2),round(WdTaxable,2),round(WdRoth,2),
    #      round(LastTax,2),round(TradBal_C,2),round(TradBal_S,2),
    #      round(TaxableBal,2),round(TaxableCB,2),round(RothBal,2))

    Age = Age+1
    S_Age = S_Age+1

    if (TradBal_C+TradBal_S+TaxableBal+RothBal)<0.01:
        Longevity = Longevity + 1 - Shortfall/ATWD_Vec[i-1]
        break
    else:
        Longevity = Longevity + 1

return Longevity,(TradBal_C+TradBal_S+TaxableBal+RothBal)

In[186]:

Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"

Calculated

Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))

if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################

SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)

Calculated

ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)

TradPer = 0.0 TaxablePer = 0.6 RothPer = 0.4 a = Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon)

print(10)

In[27]:

def Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon): max_l = 0 max_bal = 0 max_trad = 0 max_taxable = 0

for i in range(0, 11):
    for j in range(0, 11):
        if (i + j) / 10 <= 1:

            l, bal = Simulate(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal,
                              Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec,
                              PRLT, Strategy, WHPer, WageWHPer, UIWHPer,
                              ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec,
                              i / 10, j / 10, 1 - i / 10 - j / 10, Horizon)

            if i == 0 and j == 0:
                solutions = pd.DataFrame([[l, i / 10, j / 10]], columns=['Longevity', 'Trad', 'Taxable'])
            else:
                solutions.loc[len(solutions)] = [l, i / 10, j / 10]

            if (l > max_l or (l == max_l and bal > max_bal)):
                max_trad = i / 10
                max_taxable = j / 10
                max_l = l
                max_bal = bal

#                 print(i/10,j/10,l,bal,max_l,max_bal)

return max_trad, max_taxable, round(1 - max_trad - max_taxable, 1), solutions, max_l

Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"

Calculated

Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))

if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################

SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)

Calculated

ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)

TradPer = .6 TaxablePer = 0 RothPer = .40

a = Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon)

print(a)

In[31]:

In[32]:

In[ ]:

class standardCase: @property def filing_status(self) -> str: return self._filing_status

@filing_status.setter
def filing_status(self, filing_status):
    self._filing_status = filing_status

@property
def client_DOB(self) -> dt.datetime:
    return self._client_DOB

@client_DOB.setter
def client_DOB(self, client_DOB):
    self._client_DOB = client_DOB

@property
def spouse_DOB(self) -> dt.datetime:
    return self._spouse_DOB

@spouse_DOB.setter
def spouse_DOB(self, spouse_DOB):
    self._spouse_DOB = spouse_DOB

@property
def client_age(self) -> int:
    return self._client_age

@client_age.setter
def client_age(self, client_age):
    self._client_age = client_age

@property
def spouse_age(self) -> int:
    return self._spouse_age

@spouse_age.setter
def spouse_age(self, spouse_age):
    self._spouse_age = spouse_age

@property
def expenses(self) -> int:
    return self._expenses

@expenses.setter
def expenses(self, expenses):
    self._expenses = expenses

@property
def trad_bal(self) -> int:
    return self._trad_bal

@trad_bal.setter
def trad_bal(self, trad_bal):
    self._trad_bal = trad_bal

@property
def tax_bal(self) -> int:
    return self._tax_bal

@tax_bal.setter
def tax_bal(self, tax_bal):
    self._tax_bal = tax_bal

@property
def tax_cb(self) -> int:
    return self._tax_cb

@tax_cb.setter
def tax_cb(self, tax_cb):
    self._tax_cb = tax_cb

@property
def roth_bal(self) -> int:
    return self._roth_bal

@roth_bal.setter
def roth_bal(self, roth_bal):
    self._roth_bal = roth_bal

def __init__(self, filing_status: str = None, client_DOB: dt.datetime = None, spouse_DOB: dt.datetime = None, \
             client_age: int = None, spouse_age: int = None, expenses: int = None, \
             trad_bal: int = None, tax_bal: int = None, tax_cb: int = None, roth_bal: int = None):
    self.filing_status = filing_status
    self.client_DOB = client_DOB
    self.spouse_DOB = spouse_DOB
    self.client_age = client_age
    self.spouse_age = spouse_age
    self.expenses = expenses
    self.trad_bal = trad_bal
    self.tax_bal = tax_bal
    self.tax_cb = tax_cb
    self.roth_bal = roth_bal

def __str__(self):
    return self.__repr__()

def __repr__(self):
    return "FilingStatus: {}\n" \
           "Client DOB: {}\n" \
           "Spouse DOB: {}\n" \
           "Client Age: {}\n" \
           "Spouse Age: {}\n" \
           "Expenses: {}\n" \
           "Traditional Balance: {}\n" \
           "Taxable Balance: {}\n" \
           "Taxable Cost Basis: {}\n" \
           "Roth Balance: {}".format(self.filing_status, self.client_DOB, self.spouse_DOB, self.client_age,
                                     self.spouse_age, \
                                     self.expenses, self.trad_bal, self.tax_bal, self.tax_cb, self.roth_bal)

In[ ]:

Read in all cases

standard_cases_data = pd.read_csv("Standard_Cases.csv").set_index(["Index"]) stochastic_returns = pd.read_csv("250 Returns 50% TAM.csv").set_index(["SIM\YEAR"])

Static_Returns = {'20%': [0.0303, 0.0146, 0.0028], '30%': [0.0363, 0.0136, 0.0042], '40%': [0.0412, 0.012, 0.0055], '50%': [0.0462, 0.0103, 0.0069], '60%': [0.0504, 0.0086, 0.0083], '70%': [0.0531, 0.0063, 0.0097], '85%': [0.0596, 0.0035, 0.0118]}

number_of_cases = 1600 case_list = []

for index in range(number_of_cases): filing_status = standard_cases_data.iat[index, 0] client_DOB = standard_cases_data.iat[index, 1] spouse_DOB = standard_cases_data.iat[index, 2] client_age = standard_cases_data.iat[index, 3] spouse_age = standard_cases_data.iat[index, 4] expenses = standard_cases_data.iat[index, 20] trad_bal = standard_cases_data.iat[index, 21] tax_bal = standard_cases_data.iat[index, 22] tax_cb = standard_cases_data.iat[index, 23] roth_bal = standard_cases_data.iat[index, 24]

new_case = standardCase(filing_status, client_DOB, spouse_DOB, client_age, spouse_age, expenses, trad_bal, tax_bal,
                        tax_cb, roth_bal)
case_list.append(new_case)

########Set Constants################### CurrentDate = dt.datetime(2020, 1, 1) LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "ORDER"

start_time = time.time()

index = 0 for case in case_list: print(f"Index {str(index)} Starting!")

Calculated

Horizon = 110 - (case.client_age if case.filing_status != 'MFJ' else min(case.client_age, case.spouse_age))

if Strategy == "ORDER":
    TaxablePer = .9999
    TradPer = 1 - TaxablePer
    RothPer = 0
############################

SocialSecurity_Vec = np.repeat(0, Horizon)
TotalATWDReq_Vec = np.repeat(case.expenses, Horizon)
WageIncome_Vec = np.repeat(0, Horizon)
UInc_Vec = np.repeat(0, Horizon)

######Calculated##########################
ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec * (1 - WageWHPer) - UInc_Vec * (1 - UIWHPer) - SocialSecurity_Vec
Ret_Total_Vec = np.repeat(Static_Returns[LTAA][0], Horizon)
Ret_Interest_Vec = np.repeat(Static_Returns[LTAA][1], Horizon)
Ret_Dividend_Vec = np.repeat(Static_Returns[LTAA][2], Horizon)

##### conventional wisdom, static returns######
# longevity_obj = Simulate(
#     FilingStatus=case.filing_status,
#     Age=case.client_age,
#     S_Age=case.spouse_age,
#     TradBal_C=case.trad_bal,
#     TradBal_S=0,
#     TaxableBal=case.tax_bal,
#     TaxableCB=case.tax_cb,
#     RothBal=case.roth_bal,
#     Ret_Total_Vec=Ret_Total_Vec,
#     Ret_Interest_Vec=Ret_Interest_Vec,
#     Ret_Dividend_Vec=Ret_Dividend_Vec,
#     PRLT=PRLT,
#     Strategy=Strategy,
#     WHPer=WHPer,
#     WageWHPer=WageWHPer,
#     UIWHPer=UIWHPer,
#     ATWD_Vec=ATWD_Vec,
#     SocialSecurity_Vec=SocialSecurity_Vec,
#     TotalATWDReq_Vec=TotalATWDReq_Vec,
#     WageIncome_Vec=TotalATWDReq_Vec,
#     UInc_Vec=UInc_Vec,
#     TradPer=.9999,
#     TaxablePer=0.0001,
#     RothPer=0,
#     Horizon=Horizon)

# standard_cases_data.iat[index, 25] = longevity_obj[0]

##### conventional wisdom, 90% CL#########
results_by_sim = []
for sim in range(1, 250):
    longevity_obj = Simulate(
        FilingStatus=case.filing_status,
        Age=case.client_age,
        S_Age=case.spouse_age,
        TradBal_C=case.trad_bal,
        TradBal_S=0,
        TaxableBal=case.tax_bal,
        TaxableCB=case.tax_cb,
        RothBal=case.roth_bal,
        Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
        Ret_Interest_Vec=Ret_Interest_Vec,
        Ret_Dividend_Vec=Ret_Dividend_Vec,
        PRLT=PRLT,
        Strategy=Strategy,
        WHPer=WHPer,
        WageWHPer=WageWHPer,
        UIWHPer=UIWHPer,
        ATWD_Vec=ATWD_Vec,
        SocialSecurity_Vec=SocialSecurity_Vec,
        TotalATWDReq_Vec=TotalATWDReq_Vec,
        WageIncome_Vec=TotalATWDReq_Vec,
        UInc_Vec=UInc_Vec,
        TradPer=.9999,
        TaxablePer=0.0001,
        RothPer=0,
        Horizon=Horizon)
    results_by_sim.append(longevity_obj[0])

ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
print(ninety_percent_confidence_longevity)
standard_cases_data.iat[index, 26] = ninety_percent_confidence_longevity

##### conventional wisdom, 50% CL#########

fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
print(fifty_percent_confidence_longevity)
standard_cases_data.iat[index, 25] = fifty_percent_confidence_longevity

##### ratio withdrawals, static returns####

winning_ratio_obj = Grid10Optim(
    FilingStatus=case.filing_status,
    Age=case.client_age,
    S_Age=case.spouse_age,
    TradBal_C=case.trad_bal,
    TradBal_S=0, TaxableBal=case.tax_bal,
    TaxableCB=case.tax_cb,
    RothBal=case.roth_bal,
    Ret_Total_Vec=Ret_Total_Vec,
    Ret_Interest_Vec=Ret_Interest_Vec,
    Ret_Dividend_Vec=Ret_Dividend_Vec,
    PRLT=PRLT,
    Strategy="RATIO",
    WHPer=WHPer,
    WageWHPer=WageWHPer,
    UIWHPer=UIWHPer,
    ATWD_Vec=ATWD_Vec,
    SocialSecurity_Vec=SocialSecurity_Vec,
    TotalATWDReq_Vec=TotalATWDReq_Vec,
    WageIncome_Vec=TotalATWDReq_Vec,
    UInc_Vec=UInc_Vec,
    Horizon=Horizon)

longevity_obj = Simulate(
    FilingStatus=case.filing_status,
    Age=case.client_age,
    S_Age=case.spouse_age,
    TradBal_C=case.trad_bal,
    TradBal_S=0,
    TaxableBal=case.tax_bal,
    TaxableCB=case.tax_cb,
    RothBal=case.roth_bal,
    Ret_Total_Vec=Ret_Total_Vec,
    Ret_Interest_Vec=Ret_Interest_Vec,
    Ret_Dividend_Vec=Ret_Dividend_Vec,
    PRLT=PRLT,
    Strategy="RATIO",
    WHPer=WHPer,
    WageWHPer=WageWHPer,
    UIWHPer=UIWHPer,
    ATWD_Vec=ATWD_Vec,
    SocialSecurity_Vec=SocialSecurity_Vec,
    TotalATWDReq_Vec=TotalATWDReq_Vec,
    WageIncome_Vec=TotalATWDReq_Vec,
    UInc_Vec=UInc_Vec,
    TradPer=winning_ratio_obj[0],
    TaxablePer=winning_ratio_obj[1],
    RothPer=winning_ratio_obj[2],
    Horizon=Horizon)

# standard_cases_data.iat[index, 27] = longevity_obj[0]

#### ratio withdrawals, 90% cl####

results_by_sim = []
for sim in range(1, 250):
    longevity_obj = Simulate(
        FilingStatus=case.filing_status,
        Age=case.client_age,
        S_Age=case.spouse_age,
        TradBal_C=case.trad_bal,
        TradBal_S=0,
        TaxableBal=case.tax_bal,
        TaxableCB=case.tax_cb,
        RothBal=case.roth_bal,
        Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
        Ret_Interest_Vec=Ret_Interest_Vec,
        Ret_Dividend_Vec=Ret_Dividend_Vec,
        PRLT=PRLT,
        Strategy=Strategy,
        WHPer=WHPer,
        WageWHPer=WageWHPer,
        UIWHPer=UIWHPer,
        ATWD_Vec=ATWD_Vec,
        SocialSecurity_Vec=SocialSecurity_Vec,
        TotalATWDReq_Vec=TotalATWDReq_Vec,
        WageIncome_Vec=TotalATWDReq_Vec,
        UInc_Vec=UInc_Vec,
        TradPer=winning_ratio_obj[0],
        TaxablePer=winning_ratio_obj[1],
        RothPer=winning_ratio_obj[2],
        Horizon=Horizon)
    results_by_sim.append(longevity_obj[0])
ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
standard_cases_data.iat[index, 28] = ninety_percent_confidence_longevity

##### ratio withdrawals, 50% CL#########

fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
standard_cases_data.iat[index, 27] = fifty_percent_confidence_longevity

end_time = time.time()
print(f"{index} finished in {end_time - start_time:0.4f} seconds.")
index = index + 1

if index % 100 == 0:
    df = pd.DataFrame(standard_cases_data)
    df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")

df = pd.DataFrame(standard_cases_data) df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")

dipanb / quant_models

Code for life expectncy HRS, Death Model and Cocoon #1

!/usr/bin/env python

coding: utf-8

In[3]:

In[4]:

In[5]:

columns = pd.DataFrame(HRS_data.columns.to_list(),columns = ['Variables'])

columns.to_csv('HRS Variables.csv',index = False)

In[6]:

HRS_keep.to_csv('HRS Columns Filtered.csv')

In[7]:

HRS_Death = pd.DataFrame()

HRS_Death['Death_Year'] = HRS_keep['radyear']

In[8]:

temp_df.to_csv('HRS Death.csv')

1.Death Age and Death year

3. Find wave number exit interview

4. All demographic variables for previous wave and ever haves - death wave values not reported - fill death wave values for ever haves by last wave data

5. Sample weights

- Run regression analysis

- Calculate sum(sample wts) and count - for each year, each condition

Death data

In[9]:

print(f'row:{i+1}')

print(f'wave:{j}')

In[10]:

In[11]:

In[ ]:

In[10]:

temp_df = HRS_keep.copy()

HRS_keep['radyear'].unique()

In[11]:

Incidence Data

0. For each wave - remove people who died before this wave - exit interview before this wave

1.Interview end Year

2.Ever haves - Each condition

- calculate sum(sample wts) and cout - for each year and each condtions

def find_if_death_wave(df):

temp = df.copy()

temp['Death_Wave'] = None

for i in range(df.shape[0]):

if not temp['radyear'].iloc[i]>0:

continue

for j in range(1,15):

if df[f'r{j}iwstat'].loc[i]=='5.nr,died this wv':

temp['Death_Wave'].iloc[i] = j

return temp['Death_Wave']

temp_df['Death_Wave'] = find_if_death_wave(temp_df)

In[12]:

def count_chronic(name_c,df):

count_c = []

for i in range(1,15):

if f'r{i}{name_c}' not in temp_df.columns:

count_c.append(0)

else:

count_c.append(temp_df[f'r{i}{name_c}'].value_counts()['1.yes'])

return count_c

In[13]:

Chronic_Incidence = pd.DataFrame(list(range(1,15)),columns=[''])

In[14]:

name_c = 'hibpe'

count_c = {}

sumwt_c = {}

Also include Age in this loop

for i in range(1,15):

years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()

for year in years_list:

temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]

for age in range(55,101):

temp1 = temp.loc[temp_df[f'r{i}agey_e']==age]

print(f'{i} {int(year)} {age} {temp1.shape[0]}')

Count

if f'{int(year)}_{age}' not in count_c.keys():

if f'r{i}{name_c}' in temp1.columns:

countc[f'{int(year)}{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']

else:

countc[f'{int(year)}{age}'] = 0

else:

if f'r{i}{name_c}' in temp1.columns: