Open dipanb opened 1 year ago
import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan") df = pd.read_csv('HRS Columns Filtered.csv',index_col = [0]) df.reset_index(inplace=True,drop=True)
df['r1drinkn'] = df['r1drinkr'] df['r2drinkn'] = df['r2drinkr']
df['Is_Dead'] = [1 if df['radyear'].iloc[i].item()>0 else 0 for i in range(df.shape[0])]
df['Death_Wave'] = 100 for i in range(df.shape[0]): if df['Is_Dead'].iloc[i].item()==1: for j in range(1,15): if df[f'r{j}iwstat'].iloc[i]=='5.nr,died this wv': df['Death_Wave'].iloc[i] = j
Extended_Df = pd.DataFrame(columns = ['Household_Id','Is_Dead','Death_Wave','Death_Year','Death_Age', 'Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion','Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt','Cohort_Of_Respondent', 'Division_At_Birth',
'Wave_No',
'Times_Married','Length_Current_Marriage','Length_Longest_Marriage','Times_Divorced',
'Household_Size','Whether_Couple_HH','HH_Wt','Person_Level_Wt','Self_Report_Health',
'Self_Report_Health_Change_3_Category',
'BMI','Height','Weight','Back_Problem','Smoke','Smoke_Now','Drink','Drink_Days_Per_Week',
'Drink_Per_Day','Blood_Pressure','Diabetes','Cancer','Lung_Disease','Heart_Problem','Stroke',
'Pyschological_Problem','Arthritis',
'Sum_Conditions',
'Non_Housing_Assets','Total_Assets','Income','Capital_Income_HH','Reports_Retired',
'Retirement_Year','Planned_Retirement_Year','Job_History','Job_Years','Household_Size',
'Number_Living_Children','Father_Alive','Mother_Alive','Father_Age','Mother_Age',
'Marital_Status','Partnership_Status','Current_Age'])
count = 0 for i in range(df.shape[0]):
print(f'{i}/{df.shape[0]-1}')
if df['Death_Wave'].iloc[i]>2:
for wave in range(3,15):
if df['Death_Wave'].iloc[i]>wave:
c_list = []
c_list.extend(df[['hhidpn','Is_Dead','Death_Wave','radyear','radage_y','ragender','raracem','raedyrs',
'raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','rawtsamp',
'racohbyr','rabplace']].iloc[i])
b_list = []
b_list = [wave,
df[f'r{wave}mrct'].iloc[i],df[f'r{wave}mcurln'].iloc[i],df[f'r{wave}mlenm'].iloc[i],
df[f'r{wave}mdiv'].iloc[i],df[f'h{wave}hhresp'].iloc[i],df[f'h{wave}cpl'].iloc[i],
df[f'r{wave}wthh'].iloc[i],df[f'r{wave}wtresp'].iloc[i],df[f'r{wave}shlt'].iloc[i],
df[f'r{wave}hltc3'].iloc[i],
# df[f'r{wave}vgactx'].iloc[i],df[f'r{wave}mdactx'].iloc[i],
# df[f'r{wave}ltactx'].iloc[i],
df[f'r{wave}bmi'].iloc[i],df[f'r{wave}height'].iloc[i],
df[f'r{wave}weight'].iloc[i],df[f'r{wave}back'].iloc[i],df[f'r{wave}smokev'].iloc[i],
df[f'r{wave}smoken'].iloc[i],df[f'r{wave}drink'].iloc[i],df[f'r{wave}drinkd'].iloc[i],
df[f'r{wave}drinkn'].iloc[i],df[f'r{wave}hibpe'].iloc[i],df[f'r{wave}diabe'].iloc[i],
df[f'r{wave}cancre'].iloc[i],df[f'r{wave}lunge'].iloc[i],df[f'r{wave}hearte'].iloc[i],
df[f'r{wave}stroke'].iloc[i],df[f'r{wave}psyche'].iloc[i],df[f'r{wave}arthre'].iloc[i],
# df[f'r{wave}alzhee'].iloc[i],df[f'r{wave}demene'].iloc[i],
df[f'r{wave}conde'].iloc[i],
# df[f'r{wave}bpsys'].iloc[i],df[f'r{wave}bpdia'].iloc[i],df[f'r{wave}bppuls'].iloc[i],
df[f'h{wave}atotn'].iloc[i],df[f'h{wave}atotb'].iloc[i],df[f'r{wave}iearn'].iloc[i],
df[f'h{wave}icap'].iloc[i],df[f'r{wave}sayret'].iloc[i],df[f'r{wave}retyr'].iloc[i],
df[f'r{wave}rplnyr'].iloc[i],df[f'r{wave}jjobs'].iloc[i],df[f'r{wave}jyears'].iloc[i],
df[f'h{wave}hhres'].iloc[i],df[f'h{wave}child'].iloc[i],df[f'r{wave}dadliv'].iloc[i],
df[f'r{wave}momliv'].iloc[i],df[f'r{wave}dadage'].iloc[i],df[f'r{wave}momage'].iloc[i],
df[f'r{wave}mstat'].iloc[i],df[f'r{wave}mpart'].iloc[i],df[f'r{wave}agey_e'].iloc[i]]
c_list.extend(b_list)
Extended_Df.loc[count] = c_list
count = count + 1
Extended_Df.to_csv('HRS Expanded v2.csv')
Extended_Df = pd.read_csv('HRS Expanded v2.csv', index_col = 0)
Extended_Df = Extended_Df.loc[:,~Extended_Df.columns.duplicated()].copy()
Extended_Df.drop('Sample_Wt', inplace=True, axis=1) Extended_Df.drop('HH_Wt', inplace=True, axis=1) Extended_Df.drop('Person_Level_Wt', inplace=True, axis=1)
Extended_Df.drop('Self_Report_Health_Change_3_Category', inplace=True, axis=1) Extended_Df.drop('Self_Report_Health', inplace=True, axis=1)
Extended_Df.drop('Back_Problem', inplace=True, axis=1)
Extended_Df.drop('Planned_Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Retirement_Year', inplace=True, axis=1) Extended_Df.drop('Household_Size.1', inplace=True, axis=1)
Extended_Df.shape
Extended_Df['Death_Age'].fillna(value = 200, inplace = True) Extended_Df['Race'].fillna(value = '1.white/caucasian', inplace = True)
Extended_Df.loc[Extended_Df['Highest_Degree']=='1.ged','Highest_Degree'] = '3.hs/ged' Extended_Df.loc[Extended_Df['Highest_Degree']=='2.hs','Highest_Degree'] = '3.hs/ged'
Extended_Df.loc[Extended_Df['Years_Education']=='17.17+ yrs','Years_Education'] = '17' Extended_Df.loc[Extended_Df['Years_Education']=='0.none','Years_Education'] = '0' Extended_Df['Years_Education'].fillna(value = '-1', inplace = True)
Extended_Df['Education_Category'].fillna(value = '1.lt high-school', inplace = True)
Extended_Df['Religion'].fillna(value = '5.other', inplace = True)
Extended_Df['Veteran_Status'].fillna(value = '0.no', inplace = True)
Extended_Df['Division_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)
Extended_Df['Cohort_Of_Respondent'].fillna(value = '0.not in any cohort', inplace = True)
Extended_Df['BMI'].fillna(value = Extended_Df['BMI'].mean(), inplace = True) Extended_Df['Height'].fillna(value = Extended_Df['Height'].mean(), inplace = True) Extended_Df['Weight'].fillna(value = Extended_Df['Weight'].mean(), inplace = True) Extended_Df['Length_Current_Marriage'].fillna(value = Extended_Df['Length_Current_Marriage'].mean(), inplace = True) Extended_Df['Length_Longest_Marriage'].fillna(value = Extended_Df['Length_Longest_Marriage'].mean(), inplace = True) Extended_Df['Times_Married'].fillna(value = int(Extended_Df['Times_Married'].mean()), inplace = True) Extended_Df['Household_Size'].fillna(value = int(Extended_Df['Household_Size'].mean()), inplace = True)
Extended_Df['Whether_Couple_HH'].fillna(value = 'No value provided', inplace = True)
Extended_Df['Mother_Years_Of_Education'].fillna(value = '-1', inplace = True) Extended_Df['Father_Years_Of_Education'].fillna(value = '-1', inplace = True)
Extended_Df.loc[Extended_Df['Division_At_Birth']=='10.us/na division','Division_At_Birth'] = '11.not us/inc us terr' Extended_Df['Division_At_Birth'].fillna(value = '11.not us/inc us terr', inplace = True)
Extended_Df.loc[Extended_Df['Marital_Status']=='4.separated','Marital_Status'] = '6.separated/divorced' Extended_Df.loc[Extended_Df['Marital_Status']=='5.divorced','Marital_Status'] = '6.separated/divorced' Extended_Df['Marital_Status'].fillna(value = 'No value given', inplace = True)
Extended_Df['Partnership_Status'].fillna(value = '0. no', inplace = True)
Extended_Df['Times_Divorced'].fillna(value = 0.0, inplace = True)
Extended_Df['Smoke'].fillna(value = '1.yes', inplace = True) Extended_Df['Smoke_Now'].fillna(value = '0.no', inplace = True)
Extended_Df['Drink'].fillna(value = '0.no', inplace = True) Extended_Df['Drink_Per_Day'].fillna(value = '0.0 or doesnt drink', inplace = True) Extended_Df['Drink_Days_Per_Week'].fillna(value = '0.0 or doesnt drink', inplace = True)
Extended_Df['Reports_Retired'].fillna(value = 'No value given', inplace = True)
Extended_Df['Reports_Retired'].fillna(value = 'No value given', inplace = True)
Extended_Df['Father_Alive'].fillna(value = '0.no', inplace = True) Extended_Df['Mother_Alive'].fillna(value = '0.no', inplace = True)
Extended_Df['Father_Age'].fillna(value = int(Extended_Df['Father_Age'].mean()), inplace = True) Extended_Df['Mother_Age'].fillna(value = int(Extended_Df['Mother_Age'].mean()), inplace = True)
Extended_Df['Number_Living_Children'].fillna(value = int(Extended_Df['Number_Living_Children'].mean()), inplace = True)
Extended_Df.loc[Extended_Df['Drink_Per_Day']=='0.0 or doesnt drink','Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=="0.doesn't drink",'Drink_Per_Day'] = '0' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='1.lt 1/day','Drink_Per_Day'] = '1' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='2.1-2/day','Drink_Per_Day'] = '2' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='3.3-4/day','Drink_Per_Day'] = '3' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='4.5+/day','Drink_Per_Day'] = '4' Extended_Df.loc[Extended_Df['Drink_Per_Day']=='99.all day','Drink_Per_Day'] = '45' Extended_Df['Drink_Per_Day'] = pd.to_numeric(Extended_Df['Drink_Per_Day'])
Extended_Df.loc[Extended_Df['Drink_Days_Per_Week']=='0.0 or doesnt drink','Drink_Days_Per_Week'] = '0' Extended_Df['Drink_Days_Per_Week'] = pd.to_numeric(Extended_Df['Drink_Days_Per_Week'])
Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='8.5.8+ yrs','Father_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='17.17+ yrs','Father_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Father_Years_Of_Education']=='0.none','Father_Years_Of_Education'] = '0' Extended_Df['Father_Years_Of_Education'] = pd.to_numeric(Extended_Df['Father_Years_Of_Education'])
Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='8.5.8+ yrs','Mother_Years_Of_Education'] = '8' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='17.17+ yrs','Mother_Years_Of_Education'] = '17' Extended_Df.loc[Extended_Df['Mother_Years_Of_Education']=='0.none','Mother_Years_Of_Education'] = '0' Extended_Df['Mother_Years_Of_Education'] = pd.to_numeric(Extended_Df['Mother_Years_Of_Education'])
Extended_Df['Years_Education'] = pd.to_numeric(Extended_Df['Years_Education'])
Extended_Df = Extended_Df[Extended_Df['Blood_Pressure'].notna()]
Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==1] Non_Death_Df = Extended_Df.loc[Extended_Df['Is_Dead']==0]
print(Extended_Df.shape) print(Death_Df.shape) print(Non_Death_Df.shape)
from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score
labels = np.array(Death_Df['Death_Age']) model_df = Death_Df.drop('Death_Age', axis = 1) train, test, train_labels, test_labels = train_test_split(model_df,labels,test_size = 0.2)
Other_Data_Train = train[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]
train = train.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)
train = pd.get_dummies(train) train = train.astype('float64') len(train.columns.to_list())
Other_Data_Test = test[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age']]
test = test.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Household_Id','Current_Age'],axis=1)
test = pd.get_dummies(test) test = test.astype('float64') len(test.columns.to_list())
Other_Data_Non_Death_Df = Non_Death_Df[['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age']]
Non_Death_Df = Non_Death_Df.drop(['Cohort_Of_Respondent','Death_Year','Death_Wave', 'Length_Current_Marriage','Is_Dead', 'Father_Alive', 'Mother_Alive', 'Father_Age', 'Mother_Age','Wave_No','Death_Age', 'Household_Id','Current_Age'],axis=1)
Non_Death_Df = pd.get_dummies(Non_Death_Df) Non_Death_Df = Non_Death_Df.astype('float64') print(len(Non_Death_Df.columns.to_list()))
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42,verbose=True) rf.fit(train, train_labels)
predictions = rf.predict(train) errors = abs(predictions - train_labels) mape = 100 * (errors / train_labels) accuracy = 100 - np.mean(mape) accuracy
R2 = r2_score(train_labels, predictions) Adj_R2 = 1-(1-R2)*(train.shape[0]-1)/(train.shape[0]-train.shape[1]-1) Adj_R2
predictions_test = rf.predict(test) test_errors = abs(predictions_test - test_labels) mape = 100 * (test_errors / test_labels) accuracy = 100 - np.mean(mape) accuracy
importances = list(rf.featureimportances) feature_importances = [(feature, round(importance, 4)) for feature, importance in zip(train.columns, importances)] feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True) feature_importances
test = pd.concat([test, Other_Data_Test], axis=1) test['Actual_Death_Age'] = test_labels test['Predicted_Death_Age'] = predictions_test test.head() test.to_excel("Death Model Tests Predicted v2.xlsx")
predictions_non_death = rf.predict(Non_Death_Df) Non_Death_Df = pd.concat([Non_Death_Df, Other_Data_Non_Death_Df], axis=1) Non_Death_Df['Predicted_Death_Age'] = predictions_non_death Non_Death_Df.to_excel("Death Model Non Death Predicted v2.xlsx")
import pandas as pd import numpy as np import datetime as dt from dateutil.relativedelta import relativedelta import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D import time
STD_Single = 12400 STD_HOH = 18650 STD_MFJ = 24800 STD_MFS = 12400
PE_Single = 0 PE_HOH = 0 PE_MFJ = 0 PE_MFS = 0 DependentExemption = 0
Inc_MTR = [.1,.12,.22,.24,.32,.35,.37]
Inc_Br = [] Inc_Br.append([9875,40125,85525,163300,207350,518400,10000000000]) Inc_Br.append([14101,53701,85501,163300,207350,518400,10000000000]) Inc_Br.append([19750,80250,171050,326600,414700,622050,10000000000]) Inc_Br.append([9875,40125,85525,163300,207350,311025,10000000000])
Q_Br = [] Q_MTR = [0,.15,.2] Q_Br.append([0,40000,441450]) Q_Br.append([0,53600,469050]) Q_Br.append([0,80000,496600]) Q_Br.append([0,40000,248300])
Min_EarnedIncome_ChildCredit = 2500 Percentage_EIRefundable_ChildCredit = 0.15 Max_Refundable_ChildCredit = 1400
SS_WithholdingRate = 0.062 MedicareRate = 0.0145 HighIncomeMedicareRate = 0.0235 SS_WithholdingThreshold = 128400 MedicareRateThreshold_Single = 200000 MedicareRateThreshold_HOH = 200000 MedicareRateThreshold_MFJ = 250000 MedicareRateThreshold_MFS = 125000 MinSEIforTaxation = 400
NIIT_Threshold_Single = 200000 NIIT_Threshold_HOH = 200000 NIIT_Threshold_MFJ = 250000 NIIT_Threshold_MFS = 125000 NIIT_TaxRate = 0.038
SS_CombinedIncome_Contribution = 0.5 SS_Taxation_Rate1 = 0.5 SS_Taxation_Rate2 = 0.85
TSS_LL_Single = 25000 TSS_LL_HOH = 32000 TSS_LL_MFJ = 32000 TSS_LL_MFS = 25000
TSS_UL_Single = 34000 TSS_UL_HOH = 44000 TSS_UL_MFJ = 44000 TSS_UL_MFS = 34000
ELL_Single = 266700 ELL_HOH = 293350 ELL_MFJ = 320000 ELL_MFS = 160000
EUL_Single = 389200 EUL_HOH = 415850 EUL_MFJ = 442500 EUL_MFS = 221250
ItemizedDeduction_Deflator_AGI = 0.03 ItemizedDeduction_Deflator = 0.8
CLL_Single = 3000 CLL_HOH = 3000 CLL_MFJ = 3000 CLL_MFS = 1500
STD_Single_HOH_MFS_65 = 1650 STD_MFJ_65 = 1300 STD_MFJ_65_65 = 2600
def IncomeTax(FilingStatus, Age, S_Age, WageIncome, SelfEmployedIncome, NetAdjustedCapitalGains, Qual_Dividend, Ord_UnearnedIncome, SocialSecurity,Adjustments, ItemizedDeduction,Child_Dependent, Non_Child_Dependent):
if (FilingStatus=="Single"):
NFILIng = 1
MedicareRateThreshold = MedicareRateThreshold_Single
StandardDeduction = STD_Single
PersonalExemption = PE_Single
ExemptionLowerLimit = ELL_Single
ExemptionUpperLimit = EUL_Single
TaxableSSLowerLimit = TSS_LL_Single
TaxableSSUpperLimit = TSS_UL_Single
CapitalLossLimit = CLL_Single
NIIT_Threshold = NIIT_Threshold_Single
elif (FilingStatus=="HOH"):
NFILIng = 2
MedicareRateThreshold = MedicareRateThreshold_HOH
StandardDeduction = STD_HOH
PersonalExemption = PE_HOH
ExemptionLowerLimit = ELL_HOH
ExemptionUpperLimit = EUL_HOH
TaxableSSLowerLimit = TSS_LL_HOH
TaxableSSUpperLimit = TSS_UL_HOH
CapitalLossLimit = CLL_HOH
NIIT_Threshold = NIIT_Threshold_HOH
elif (FilingStatus=="MFJ"):
NFILIng = 3
MedicareRateThreshold = MedicareRateThreshold_MFJ
StandardDeduction = STD_MFJ
PersonalExemption = PE_MFJ
ExemptionLowerLimit = ELL_MFJ
ExemptionUpperLimit = EUL_MFJ
TaxableSSLowerLimit = TSS_LL_MFJ
TaxableSSUpperLimit = TSS_UL_MFJ
CapitalLossLimit = CLL_MFJ
NIIT_Threshold = NIIT_Threshold_MFJ
elif (FilingStatus=="MFS"):
NFILIng = 4
MedicareRateThreshold = MedicareRateThreshold_MFS
StandardDeduction = STD_MFS
PersonalExemption = PE_MFS
ExemptionLowerLimit = ELL_MFS
ExemptionUpperLimit = EUL_MFS
TaxableSSLowerLimit = TSS_LL_MFS
TaxableSSUpperLimit = TSS_UL_MFS
CapitalLossLimit = CLL_MFS
NIIT_Threshold = NIIT_Threshold_MFS
InflationFactor = 1
SocialSecurityTax = SS_WithholdingRate * min(WageIncome, SS_WithholdingThreshold * InflationFactor)
MedicareTax = min(WageIncome, MedicareRateThreshold) * MedicareRate + max(0, WageIncome - MedicareRateThreshold) * HighIncomeMedicareRate
FICATax = SocialSecurityTax + MedicareTax
if (SelfEmployedIncome * (1 - (SS_WithholdingRate + MedicareRate)) <= MinSEIforTaxation):
SelfEmploymentTax = 0
else:
SelfEmploymentTax = 2 * SS_WithholdingRate * min(max(SS_WithholdingThreshold * InflationFactor - WageIncome, 0), SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))) + 2 * MedicareRate * SelfEmploymentIncome * (1 - (SS_WithholdingRate + MedicareRate))
EarnedIncome = WageIncome + SelfEmployedIncome
Qual_UnearnedIncome = max(-CapitalLossLimit, NetAdjustedCapitalGains) + Qual_Dividend
GrossIncome = EarnedIncome + Ord_UnearnedIncome + Qual_UnearnedIncome - Qual_Dividend + SocialSecurity
AdjustedGrossIncome = GrossIncome - Adjustments
NonTaxableInterest = 0
if (SocialSecurity == 0):
TaxableSS = 0
else:
PostRetirementCombinedIncome = (AdjustedGrossIncome - SocialSecurity) + NonTaxableInterest + SS_CombinedIncome_Contribution * SocialSecurity
SS_85 = min(SocialSecurity, max(PostRetirementCombinedIncome - TaxableSSUpperLimit, 0)) / SocialSecurity
if (PostRetirementCombinedIncome < TaxableSSLowerLimit):
SS_50 = 0
else:
SS_50 = min(TaxableSSUpperLimit - TaxableSSLowerLimit, PostRetirementCombinedIncome - TaxableSSLowerLimit) / SocialSecurity
TaxableSS = SS_85 * SocialSecurity * SS_Taxation_Rate2 + SS_50 * SocialSecurity * SS_Taxation_Rate1
TaxableSS = min(TaxableSS, SocialSecurity * SS_Taxation_Rate2)
GrossIncome = GrossIncome - SocialSecurity + TaxableSS
AdjustedGrossIncome = GrossIncome - Adjustments
if (FilingStatus == "MFJ"):
if (Age >= 65 and S_Age >= 65):
StandardDeduction_F = ((StandardDeduction + STD_MFJ_65_65) * InflationFactor)
elif ((Age < 65 and S_Age >= 65) or (Age >= 65 and S_Age < 65)):
StandardDeduction_F = ((StandardDeduction + STD_MFJ_65) * InflationFactor)
else:
StandardDeduction_F = (StandardDeduction * InflationFactor)
else:
if (Age >= 65):
StandardDeduction_F = ((StandardDeduction + STD_Single_HOH_MFS_65) * InflationFactor)
else:
StandardDeduction_F = (StandardDeduction * InflationFactor)
ItemizedDeduction_Phaseout = 0
if (AdjustedGrossIncome > (ExemptionLowerLimit * InflationFactor)):
ItemizedDeduction_Phaseout = min(ItemizedDeduction_Deflator_AGI * (AdjustedGrossIncome - (ExemptionLowerLimit * InflationFactor)), ItemizedDeduction_Deflator * (ItemizedDeduction))
else:
ItemizedDeduction_Phaseout = 0
Deduction = max(StandardDeduction, StandardDeduction_F, ItemizedDeduction - ItemizedDeduction_Phaseout)
Exemption = 0
#Taxable Income calculation
if ((Deduction + Exemption) > (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend + TaxableSS - Adjustments)):
OtherTaxableIncome = 0
Qual_UnearnedIncome = Qual_UnearnedIncome - (Deduction + Exemption - (EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS))
else:
OtherTaxableIncome = EarnedIncome + Ord_UnearnedIncome - Qual_Dividend - Adjustments + TaxableSS - Deduction - Exemption
if (Qual_UnearnedIncome < 0):
OtherTaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)
Qual_UnearnedIncome = 0
TaxableIncome = max(OtherTaxableIncome + Qual_UnearnedIncome, 0)
#Tax calculation for Qualified Unearned Income(LTCG, Qualified dividends)
if (TaxableIncome <= (Q_Br[NFILIng-1][1] * InflationFactor)):
CapitalGainTax = 0
elif ((TaxableIncome > (Q_Br[NFILIng-1][1] * InflationFactor)) and (TaxableIncome <= (Q_Br[NFILIng-1][2] * InflationFactor))):
if (OtherTaxableIncome <= 0):
CapitalGainTax = Q_MTR[1] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][1] * InflationFactor))
elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)):
CapitalGainTax = ((Q_Br[NFILIng-1][1]* InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome))
else:
CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * Qual_UnearnedIncome
elif TaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor):
if (OtherTaxableIncome <= 0):
CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - (Q_Br[NFILIng-1][1] * InflationFactor)) + Q_MTR[2] * (Qual_UnearnedIncome - (Q_Br[NFILIng-1][2] * InflationFactor))
elif (OtherTaxableIncome < (Q_Br[NFILIng-1][1] * InflationFactor)):
CapitalGainTax = ((Q_Br[NFILIng-1][1] * InflationFactor) - OtherTaxableIncome) * Q_MTR[0] + Q_MTR[1] * (Q_Br[NFILIng-1][2]* InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][1] * InflationFactor - OtherTaxableIncome) +(Q_Br[NFILIng-1][2] * InflationFactor - Q_Br[NFILIng-1][1] * InflationFactor)))
elif (OtherTaxableIncome < (Q_Br[NFILIng-1][2]* InflationFactor)):
CapitalGainTax = (Q_Br[NFILIng-1][1] * InflationFactor) * Q_MTR[0] + Q_MTR[1] * ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome) + Q_MTR[2] * (Qual_UnearnedIncome - ((Q_Br[NFILIng-1][2] * InflationFactor) - OtherTaxableIncome))
elif (OtherTaxableIncome > (Q_Br[NFILIng-1][2] * InflationFactor)):
CapitalGainTax = Qual_UnearnedIncome * Q_MTR[2]
#Tax on Other Taxable Income - EarnedIncome + Ord_UnearnedIncome
counter = 1
Tax1 = 0
if (OtherTaxableIncome < 0):
Tax1 = 0
elif (OtherTaxableIncome <= (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):
Tax1 = OtherTaxableIncome * Inc_MTR[counter-1]
else:
while (OtherTaxableIncome > (Inc_Br[NFILIng-1][counter-1] * InflationFactor)):
if (counter > 1):
Tax1 = Tax1 + (Inc_Br[NFILIng-1][counter-1] - Inc_Br[NFILIng-1][counter-2]) * Inc_MTR[counter-1] * InflationFactor
else:
Tax1 = Tax1 + Inc_Br[NFILIng-1][counter-1] * Inc_MTR[counter-1] * InflationFactor
#print(Tax1)
counter = counter + 1
Tax1 = Tax1 + (OtherTaxableIncome - Inc_Br[NFILIng-1][counter-2] * InflationFactor) * Inc_MTR[counter-1]
#Federal Tax, Federal marginal rate and next Federal marginal rate
FederalTax = CapitalGainTax + Tax1
return FederalTax
#return FederalTax,Tax1,CapitalGainTax,TaxableIncome,Inc_Br[NFILIng-1][0]
IncomeTax("MFJ", 71, 71, 100000, 0, 10150.75, 517.5,42956.66,40000,0,0,0,0)
RMD_Divisor = pd.DataFrame(data = [[70,27.4],[71,26.5],[72,25.6],[73,24.7],[74,23.8],[75,22.9],[76,22],[77,21.2], [78,20.3],[79,19.5],[80,18.7],[81,17.9],[82,17.1],[83,16.3],[84,15.5],[85,14.8], [86,14.1],[87,13.4],[88,12.7],[89,12],[90,11.4],[91,10.8],[92,10.2],[93,9.6], [94,9.1],[95,8.6],[96,8.1],[97,7.6],[98,7.1],[99,6.7],[100,6.3],[101,5.9], [102,5.5],[103,5.2],[104,4.9],[105,4.5],[106,4.2],[107,3.9],[108,3.7],[109,3.4], [110,3.1],[111,2.9],[112,2.6],[113,2.4],[114,2.1],[115,1.9],[116,1.9],[117,1.9], [118,1.9],[119,1.9],[120,1.9],[121,1.9],[122,1.9],[123,1.9],[124,1.9],[125,1.9], [126,1.9],[127,1.9]], columns = ['Age','Divisor'])
def Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total, Ret_Interest, Ret_Dividend, PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD, TotalATWDReq,WHPer, SocialSecurity, WageIncome, UnearnedIncome, WageWH, UIncWH, LastTax,LastWH, CFLast):
if (Age>=70):
RMD_D_C = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==Age,'Divisor'])[0]
RMD_C = TradBal_C/RMD_D_C
else:
RMD_C = 0
RMD_S = 0
if (S_Age>=70):
RMD_D_S = np.array(RMD_Divisor.loc[RMD_Divisor['Age']==S_Age,'Divisor'])[0]
RMD_S = TradBal_S/RMD_D_S
else:
RMD_S = 0
TradPerPerm = TradPer
TaxablePerPerm = TaxablePer
RothPerPerm = RothPer
if ((TradBal_C+TradBal_S)>0):
Trad_CFraction = TradBal_C/(TradBal_C+TradBal_S)
else:
Trad_CFraction = 0
TradBal_C = TradBal_C * (1+Ret_Total)
TradBal_S = TradBal_S * (1+Ret_Total)
Interest = TaxableBal * Ret_Interest
Dividend = TaxableBal * Ret_Dividend
TaxableCB = max(TaxableCB + Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT,0)
Distributions = Dividend + Interest + TaxableBal*(Ret_Total-Ret_Interest-Ret_Dividend)*PRLT
TaxableBal = TaxableBal * (1+Ret_Total)
RothBal = RothBal * (1+Ret_Total)
TradBal = TradBal_C + TradBal_S
TotalBal = TradBal + TaxableBal + RothBal
if (Strategy == "PRORATA"):
TradPer = TradBal/TotalBal
TaxablePer = TaxableBal/TotalBal
RothBal = RothBal/TotalBal
TrueUp = LastTax - LastWH
if (ATWD==0):
ATWD = -min(TrueUp, WageIncome-WageWH+UnearnedIncome-UIncWH + SocialSecurity - TotalATWDReq)
if (TradPer==0 and TaxablePer==0 and RothPer==0):
TradPer = 1/3
TaxablePer = 1/3
RothPer = 1/3
if (TradPer>0 and TradBal<0.1):
TradPer = 0
if ((TaxablePer+RothPer) == 0):
TaxablePer = 0.5
RothPer = 0.5
else:
temp = TaxablePer + RothPer
TaxablePer = TaxablePer/temp
RothPer = RothPer/temp
if (TaxablePer>0 and TaxableBal<0.1):
TaxablePer = 0
if ((TradPer+RothPer) == 0):
TradPer = 0.5
RothPer = 0.5
else:
temp = TradPer + RothPer
TradPer = TradPer/temp
RothPer = RothPer/temp
if (RothPer>0 and RothBal<0.1):
RothPer = 0
if ((TaxablePer+TradPer) == 0):
TaxablePer = 0.5
TradPer = 0.5
else:
temp = TaxablePer + TradPer
TaxablePer = TaxablePer/temp
TradPer = TradPer/temp
PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)
if ((TradPer*PreTaxWD)>TradBal):
PreTaxWD = max(0,ATWD+TrueUp+TradBal*WHPer)
R_Adj = TradPer - TradBal/PreTaxWD
TradPer = TradBal/PreTaxWD
if ((TaxablePer+RothPer) == 0):
TaxablePer = TaxablePer + R_Adj/2
RothPer = 1 - TradPer - TaxablePer
else:
TaxablePer = TaxablePer + R_Adj * TaxablePer/(TaxablePer+RothPer)
RothPer = 1 - TradPer - TaxablePer
if ((TaxablePer*PreTaxWD)>TaxableBal):
if ((TradPer+RothPer) == 0):
PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer/2) + TaxableBal)
R_Adj = TaxablePer - TaxableBal/PreTaxWD
TaxablePer = TaxableBal/PreTaxWD
TradPer = TradPer + R_Adj/2
RothPer = 1 - TradPer - TaxablePer
else:
PreTaxWD = max(0,(ATWD+TrueUp-TaxableBal)/(1-WHPer*TradPer/(TradPer+RothPer)) + TaxableBal)
R_Adj = TaxablePer - TaxableBal/PreTaxWD
TaxablePer = TaxableBal/PreTaxWD
TradPer = TradPer + R_Adj*TradPer/(TradPer+RothPer)
RothPer = 1 - TradPer - TaxablePer
if ((RothPer*PreTaxWD)>RothBal):
if ((TaxablePer+TradPer) == 0):
PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer/2) + RothBal)
R_Adj = RothPer - RothBal/PreTaxWD
RothPer = RothBal/PreTaxWD
TradPer = TradPer + R_Adj/2
TaxablePer = 1 - TradPer - RothPer
else:
PreTaxWD = max(0,(ATWD+TrueUp-RothBal)/(1-WHPer*TradPer/(TradPer+TaxablePer)) + RothBal)
R_Adj = RothPer - RothBal/PreTaxWD
RothPer = RothBal/PreTaxWD
TradPer = TradPer + R_Adj*TradPer/(TradPer+TaxablePer)
TaxablePer = 1 - TradPer - RothPer
PreTaxWD = max((ATWD + TrueUp)/(1-TradPer*WHPer),0)
WdTrad = PreTaxWD * TradPer
WdTaxable = PreTaxWD * TaxablePer
WdRoth = PreTaxWD * RothPer
if ((RMD_C+RMD_S)>WdTrad):
WdTrad = RMD_C + RMD_S
if ((TaxablePerPerm+RothPerPerm)>0):
WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*TaxablePerPerm/(TaxablePerPerm+RothPerPerm))
WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)
else:
WdTaxable = max(0, (ATWD+TrueUp-WdTrad*(1-WHPer))*0.5)
WdRoth = max(0,ATWD+TrueUp-WdTrad*(1-WHPer)-WdTaxable)
WdTrad = min(WdTrad,TradBal)
WdTaxable = min(WdTaxable,TaxableBal)
WdRoth = min(WdRoth,RothBal)
PreTaxWDReqAftrR1 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth
if (PreTaxWDReqAftrR1>0.1):
if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
WdTrad = TradBal
WdTaxable = TaxableBal
WdRoth = RothBal
else:
if ((TradBal-WdTrad)<.1):
TradPer = 0
if ((WdTaxable+WdRoth)==0):
TaxablePer = 0.5
RothPer = 0.5
else:
TaxablePer = WdTaxable/(WdTaxable+WdRoth)
RothPer = 1 - TaxablePer
elif ((TaxableBal-WdTaxable)<.1):
TaxablePer = 0
if ((TradPer+RothPer)==0):
TradPer = 0.5
RothPer = 0.5
else:
TradPer = TradPer/(TradPer+RothPer)
RothPer = 1 - TradPer
if (WdTrad>(PreTaxWD-WdTaxable)*TradPer):
TradPer = 0
RothPer = 1
elif ((RothBal-WdRoth)<.1):
RothPer = 0
if ((TradPer+TaxablePer)==0):
TaxablePer = 0.5
TradPer = 0.5
else:
TradPer = TradPer/(TradPer+TaxablePer)
TaxablePer = 1 - TradPer
if (WdTrad>(PreTaxWD-WdRoth)*TradPer):
TradPer = 0
TaxablePer = 1
PreTaxWDReqAftrR1 = PreTaxWDReqAftrR1/(1-WHPer*TradPer)
WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR1*TradPer)
WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR1*TaxablePer)
WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR1*RothPer)
PreTaxWDReqAftrR2 = ATWD + TrueUp - WdTrad*(1-WHPer) - WdTaxable - WdRoth
if (PreTaxWDReqAftrR2>.1):
if ((TotalBal-WdTrad-WdTaxable-WdRoth)<PreTaxWDReqAftrR1):
WdTrad = TradBal
WdTaxable = TaxableBal
WdRoth = RothBal
else:
if ((TradBal-WdTrad)<.1 and (TaxableBal-WdTaxable)<.1):
RothPer = 1
TradPer = 0
TaxablePer = 0
elif ((TradBal-WdTrad)<.1 and (RothBal-WdRoth)<.1):
TaxablePer = 1
RothPer = 0
TradPer = 0
elif ((TaxableBal-WdTaxable)<.1 and (RothBal-WdRoth)<.1):
TradPer = 1
TaxablePer = 0
RothPer = 0
PreTaxWDReqAftrR3 = PreTaxWDReqAftrR2/(1-WHPer*TradPer)
WdTrad = min(TradBal,WdTrad+PreTaxWDReqAftrR3*TradPer)
WdTaxable = min(TaxableBal, WdTaxable+PreTaxWDReqAftrR3*TaxablePer)
WdRoth = min(RothBal,WdRoth + PreTaxWDReqAftrR3*RothPer)
WdTaxableDist = min(WdTaxable,Distributions)
WdTaxableBal = WdTaxable - WdTaxableDist
if (TaxableBal==0):
RealizedGain = 0
else:
RealizedGain = WdTaxableBal *(1-(TaxableCB-Distributions)/(TaxableBal-Distributions)) + PRLT * TaxableBal/(1+Ret_Total) * (Ret_Total - Ret_Interest - Ret_Dividend)
CF = min(CFLast,0) + RealizedGain
RealizedGain = max(0,CF)
Tax = IncomeTax(FilingStatus, Age, S_Age, WageIncome, 0, RealizedGain, Dividend,
WdTrad+Interest+UnearnedIncome+Dividend, SocialSecurity,0,0,0,0)
NetIncome = WdTaxable + WdRoth + WdTrad * (1-WHPer) - TrueUp
Shortfall = ATWD - NetIncome
TradBal = TradBal - WdTrad
TradBal_C = TradBal* Trad_CFraction
TradBal_S = TradBal * (1-Trad_CFraction)
if (TaxableBal-WdTaxable)==0:
TaxableCB = 0
else:
TaxableCB = TaxableCB-WdTaxableDist - (TaxableCB-WdTaxableDist)*WdTaxableBal/(TaxableBal-WdTaxableDist)
TaxableBal = TaxableBal - WdTaxable
RothBal = RothBal - WdRoth
CF = RealizedGain
return (Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,Tax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CF)
#
def Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon):
Longevity = 0
for i in range(1,Horizon+1):
#print(i)
WageWH = WageIncome_Vec[i-1]*WageWHPer
UIncWH = UInc_Vec[i-1]*UIWHPer
if i==1:
LastTax = 0
LastWH = 0
CFLast = 0
Shortfall,NetIncome,WdTrad,WdTaxable,WdRoth,LastTax,TradBal_C,TradBal_S,TaxableBal,TaxableCB,RothBal,CFLast = Simulate_One_Year(FilingStatus, Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal,Ret_Total_Vec[i-1], Ret_Interest_Vec[i-1], Ret_Dividend_Vec[i-1],PRLT, Strategy, TradPer, TaxablePer, RothPer, ATWD_Vec[i-1],TotalATWDReq_Vec[i-1],WHPer,SocialSecurity_Vec[i-1], WageIncome_Vec[i-1], UInc_Vec[i-1], WageWH, UIncWH,LastTax,LastWH, CFLast)
LastWH = WageWH + UIncWH + WdTrad*WHPer
#print(i,"-",round(WdTrad,2),round(WdTaxable,2),round(WdRoth,2),
# round(LastTax,2),round(TradBal_C,2),round(TradBal_S,2),
# round(TaxableBal,2),round(TaxableCB,2),round(RothBal,2))
Age = Age+1
S_Age = S_Age+1
if (TradBal_C+TradBal_S+TaxableBal+RothBal)<0.01:
Longevity = Longevity + 1 - Shortfall/ATWD_Vec[i-1]
break
else:
Longevity = Longevity + 1
return Longevity,(TradBal_C+TradBal_S+TaxableBal+RothBal)
Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"
Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))
if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################
SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)
ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)
TradPer = 0.0 TaxablePer = 0.6 RothPer = 0.4 a = Simulate(FilingStatus,Age,S_Age,TradBal_C,TradBal_S, TaxableBal,TaxableCB, RothBal, Ret_Total_Vec,Ret_Interest_Vec, Ret_Dividend_Vec, PRLT,Strategy, WHPer,WageWHPer,UIWHPer, ATWD_Vec, SocialSecurity_Vec,TotalATWDReq_Vec,WageIncome_Vec,UInc_Vec, TradPer, TaxablePer, RothPer, Horizon)
def Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon): max_l = 0 max_bal = 0 max_trad = 0 max_taxable = 0
for i in range(0, 11):
for j in range(0, 11):
if (i + j) / 10 <= 1:
l, bal = Simulate(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal,
Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec,
PRLT, Strategy, WHPer, WageWHPer, UIWHPer,
ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec,
i / 10, j / 10, 1 - i / 10 - j / 10, Horizon)
if i == 0 and j == 0:
solutions = pd.DataFrame([[l, i / 10, j / 10]], columns=['Longevity', 'Trad', 'Taxable'])
else:
solutions.loc[len(solutions)] = [l, i / 10, j / 10]
if (l > max_l or (l == max_l and bal > max_bal)):
max_trad = i / 10
max_taxable = j / 10
max_l = l
max_bal = bal
# print(i/10,j/10,l,bal,max_l,max_bal)
return max_trad, max_taxable, round(1 - max_trad - max_taxable, 1), solutions, max_l
Client_DOB = dt.datetime(1955,1,3) Spouse_DOB = dt.datetime(1955,1,3) FilingStatus = "Single" CurrentDate = dt.datetime(2020,1,1) TradBal_C = 75000 TradBal_S = 0 TaxableBal = 175000 TaxableCB = 1 RothBal = 105000 LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "RATIO"
Age = relativedelta(CurrentDate,Client_DOB).years S_Age = relativedelta(CurrentDate,Spouse_DOB).years Horizon = 110 - (Age if FilingStatus!='MFJ' else min(Age,S_Age))
if Strategy=="ORDER": TaxablePer = .9999 TradPer = 1-TaxablePer RothPer = 0 ############################
SocialSecurity_Vec = np.repeat(0,Horizon) TotalATWDReq_Vec = np.repeat(67500,Horizon) WageIncome_Vec = np.repeat(0,Horizon) UInc_Vec = np.repeat(0,Horizon)
ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec (1-WageWHPer) - UInc_Vec (1-UIWHPer) - SocialSecurity_Vec Returns = {'20%':[0.0303,0.0146,0.0028],'30%':[0.0363,0.0136,0.0042],'40%':[0.0412,0.012,0.0055], '50%':[0.0462,0.0103,0.0069],'60%':[0.0504,0.0086,0.0083],'70%':[0.0531,0.0063,0.0097], '85%':[0.0596,0.0035,0.0118]} Ret_Total_Vec = np.repeat(Returns[LTAA][0],Horizon) Ret_Interest_Vec = np.repeat(Returns[LTAA][1],Horizon) Ret_Dividend_Vec = np.repeat(Returns[LTAA][2],Horizon)
TradPer = .6 TaxablePer = 0 RothPer = .40
a = Grid10Optim(FilingStatus, Age, S_Age, TradBal_C, TradBal_S, TaxableBal, TaxableCB, RothBal, Ret_Total_Vec, Ret_Interest_Vec, Ret_Dividend_Vec, PRLT, Strategy, WHPer, WageWHPer, UIWHPer, ATWD_Vec, SocialSecurity_Vec, TotalATWDReq_Vec, WageIncome_Vec, UInc_Vec, Horizon)
print(a)
class standardCase: @property def filing_status(self) -> str: return self._filing_status
@filing_status.setter
def filing_status(self, filing_status):
self._filing_status = filing_status
@property
def client_DOB(self) -> dt.datetime:
return self._client_DOB
@client_DOB.setter
def client_DOB(self, client_DOB):
self._client_DOB = client_DOB
@property
def spouse_DOB(self) -> dt.datetime:
return self._spouse_DOB
@spouse_DOB.setter
def spouse_DOB(self, spouse_DOB):
self._spouse_DOB = spouse_DOB
@property
def client_age(self) -> int:
return self._client_age
@client_age.setter
def client_age(self, client_age):
self._client_age = client_age
@property
def spouse_age(self) -> int:
return self._spouse_age
@spouse_age.setter
def spouse_age(self, spouse_age):
self._spouse_age = spouse_age
@property
def expenses(self) -> int:
return self._expenses
@expenses.setter
def expenses(self, expenses):
self._expenses = expenses
@property
def trad_bal(self) -> int:
return self._trad_bal
@trad_bal.setter
def trad_bal(self, trad_bal):
self._trad_bal = trad_bal
@property
def tax_bal(self) -> int:
return self._tax_bal
@tax_bal.setter
def tax_bal(self, tax_bal):
self._tax_bal = tax_bal
@property
def tax_cb(self) -> int:
return self._tax_cb
@tax_cb.setter
def tax_cb(self, tax_cb):
self._tax_cb = tax_cb
@property
def roth_bal(self) -> int:
return self._roth_bal
@roth_bal.setter
def roth_bal(self, roth_bal):
self._roth_bal = roth_bal
def __init__(self, filing_status: str = None, client_DOB: dt.datetime = None, spouse_DOB: dt.datetime = None, \
client_age: int = None, spouse_age: int = None, expenses: int = None, \
trad_bal: int = None, tax_bal: int = None, tax_cb: int = None, roth_bal: int = None):
self.filing_status = filing_status
self.client_DOB = client_DOB
self.spouse_DOB = spouse_DOB
self.client_age = client_age
self.spouse_age = spouse_age
self.expenses = expenses
self.trad_bal = trad_bal
self.tax_bal = tax_bal
self.tax_cb = tax_cb
self.roth_bal = roth_bal
def __str__(self):
return self.__repr__()
def __repr__(self):
return "FilingStatus: {}\n" \
"Client DOB: {}\n" \
"Spouse DOB: {}\n" \
"Client Age: {}\n" \
"Spouse Age: {}\n" \
"Expenses: {}\n" \
"Traditional Balance: {}\n" \
"Taxable Balance: {}\n" \
"Taxable Cost Basis: {}\n" \
"Roth Balance: {}".format(self.filing_status, self.client_DOB, self.spouse_DOB, self.client_age,
self.spouse_age, \
self.expenses, self.trad_bal, self.tax_bal, self.tax_cb, self.roth_bal)
standard_cases_data = pd.read_csv("Standard_Cases.csv").set_index(["Index"]) stochastic_returns = pd.read_csv("250 Returns 50% TAM.csv").set_index(["SIM\YEAR"])
Static_Returns = {'20%': [0.0303, 0.0146, 0.0028], '30%': [0.0363, 0.0136, 0.0042], '40%': [0.0412, 0.012, 0.0055], '50%': [0.0462, 0.0103, 0.0069], '60%': [0.0504, 0.0086, 0.0083], '70%': [0.0531, 0.0063, 0.0097], '85%': [0.0596, 0.0035, 0.0118]}
number_of_cases = 1600 case_list = []
for index in range(number_of_cases): filing_status = standard_cases_data.iat[index, 0] client_DOB = standard_cases_data.iat[index, 1] spouse_DOB = standard_cases_data.iat[index, 2] client_age = standard_cases_data.iat[index, 3] spouse_age = standard_cases_data.iat[index, 4] expenses = standard_cases_data.iat[index, 20] trad_bal = standard_cases_data.iat[index, 21] tax_bal = standard_cases_data.iat[index, 22] tax_cb = standard_cases_data.iat[index, 23] roth_bal = standard_cases_data.iat[index, 24]
new_case = standardCase(filing_status, client_DOB, spouse_DOB, client_age, spouse_age, expenses, trad_bal, tax_bal,
tax_cb, roth_bal)
case_list.append(new_case)
########Set Constants################### CurrentDate = dt.datetime(2020, 1, 1) LTAA = "50%" PRLT = 0.3 WHPer = 0.1 WageWHPer = .1 UIWHPer = .1 Strategy = "ORDER"
start_time = time.time()
index = 0 for case in case_list: print(f"Index {str(index)} Starting!")
Horizon = 110 - (case.client_age if case.filing_status != 'MFJ' else min(case.client_age, case.spouse_age))
if Strategy == "ORDER":
TaxablePer = .9999
TradPer = 1 - TaxablePer
RothPer = 0
############################
SocialSecurity_Vec = np.repeat(0, Horizon)
TotalATWDReq_Vec = np.repeat(case.expenses, Horizon)
WageIncome_Vec = np.repeat(0, Horizon)
UInc_Vec = np.repeat(0, Horizon)
######Calculated##########################
ATWD_Vec = TotalATWDReq_Vec - WageIncome_Vec * (1 - WageWHPer) - UInc_Vec * (1 - UIWHPer) - SocialSecurity_Vec
Ret_Total_Vec = np.repeat(Static_Returns[LTAA][0], Horizon)
Ret_Interest_Vec = np.repeat(Static_Returns[LTAA][1], Horizon)
Ret_Dividend_Vec = np.repeat(Static_Returns[LTAA][2], Horizon)
##### conventional wisdom, static returns######
# longevity_obj = Simulate(
# FilingStatus=case.filing_status,
# Age=case.client_age,
# S_Age=case.spouse_age,
# TradBal_C=case.trad_bal,
# TradBal_S=0,
# TaxableBal=case.tax_bal,
# TaxableCB=case.tax_cb,
# RothBal=case.roth_bal,
# Ret_Total_Vec=Ret_Total_Vec,
# Ret_Interest_Vec=Ret_Interest_Vec,
# Ret_Dividend_Vec=Ret_Dividend_Vec,
# PRLT=PRLT,
# Strategy=Strategy,
# WHPer=WHPer,
# WageWHPer=WageWHPer,
# UIWHPer=UIWHPer,
# ATWD_Vec=ATWD_Vec,
# SocialSecurity_Vec=SocialSecurity_Vec,
# TotalATWDReq_Vec=TotalATWDReq_Vec,
# WageIncome_Vec=TotalATWDReq_Vec,
# UInc_Vec=UInc_Vec,
# TradPer=.9999,
# TaxablePer=0.0001,
# RothPer=0,
# Horizon=Horizon)
# standard_cases_data.iat[index, 25] = longevity_obj[0]
##### conventional wisdom, 90% CL#########
results_by_sim = []
for sim in range(1, 250):
longevity_obj = Simulate(
FilingStatus=case.filing_status,
Age=case.client_age,
S_Age=case.spouse_age,
TradBal_C=case.trad_bal,
TradBal_S=0,
TaxableBal=case.tax_bal,
TaxableCB=case.tax_cb,
RothBal=case.roth_bal,
Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
Ret_Interest_Vec=Ret_Interest_Vec,
Ret_Dividend_Vec=Ret_Dividend_Vec,
PRLT=PRLT,
Strategy=Strategy,
WHPer=WHPer,
WageWHPer=WageWHPer,
UIWHPer=UIWHPer,
ATWD_Vec=ATWD_Vec,
SocialSecurity_Vec=SocialSecurity_Vec,
TotalATWDReq_Vec=TotalATWDReq_Vec,
WageIncome_Vec=TotalATWDReq_Vec,
UInc_Vec=UInc_Vec,
TradPer=.9999,
TaxablePer=0.0001,
RothPer=0,
Horizon=Horizon)
results_by_sim.append(longevity_obj[0])
ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
print(ninety_percent_confidence_longevity)
standard_cases_data.iat[index, 26] = ninety_percent_confidence_longevity
##### conventional wisdom, 50% CL#########
fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
print(fifty_percent_confidence_longevity)
standard_cases_data.iat[index, 25] = fifty_percent_confidence_longevity
##### ratio withdrawals, static returns####
winning_ratio_obj = Grid10Optim(
FilingStatus=case.filing_status,
Age=case.client_age,
S_Age=case.spouse_age,
TradBal_C=case.trad_bal,
TradBal_S=0, TaxableBal=case.tax_bal,
TaxableCB=case.tax_cb,
RothBal=case.roth_bal,
Ret_Total_Vec=Ret_Total_Vec,
Ret_Interest_Vec=Ret_Interest_Vec,
Ret_Dividend_Vec=Ret_Dividend_Vec,
PRLT=PRLT,
Strategy="RATIO",
WHPer=WHPer,
WageWHPer=WageWHPer,
UIWHPer=UIWHPer,
ATWD_Vec=ATWD_Vec,
SocialSecurity_Vec=SocialSecurity_Vec,
TotalATWDReq_Vec=TotalATWDReq_Vec,
WageIncome_Vec=TotalATWDReq_Vec,
UInc_Vec=UInc_Vec,
Horizon=Horizon)
longevity_obj = Simulate(
FilingStatus=case.filing_status,
Age=case.client_age,
S_Age=case.spouse_age,
TradBal_C=case.trad_bal,
TradBal_S=0,
TaxableBal=case.tax_bal,
TaxableCB=case.tax_cb,
RothBal=case.roth_bal,
Ret_Total_Vec=Ret_Total_Vec,
Ret_Interest_Vec=Ret_Interest_Vec,
Ret_Dividend_Vec=Ret_Dividend_Vec,
PRLT=PRLT,
Strategy="RATIO",
WHPer=WHPer,
WageWHPer=WageWHPer,
UIWHPer=UIWHPer,
ATWD_Vec=ATWD_Vec,
SocialSecurity_Vec=SocialSecurity_Vec,
TotalATWDReq_Vec=TotalATWDReq_Vec,
WageIncome_Vec=TotalATWDReq_Vec,
UInc_Vec=UInc_Vec,
TradPer=winning_ratio_obj[0],
TaxablePer=winning_ratio_obj[1],
RothPer=winning_ratio_obj[2],
Horizon=Horizon)
# standard_cases_data.iat[index, 27] = longevity_obj[0]
#### ratio withdrawals, 90% cl####
results_by_sim = []
for sim in range(1, 250):
longevity_obj = Simulate(
FilingStatus=case.filing_status,
Age=case.client_age,
S_Age=case.spouse_age,
TradBal_C=case.trad_bal,
TradBal_S=0,
TaxableBal=case.tax_bal,
TaxableCB=case.tax_cb,
RothBal=case.roth_bal,
Ret_Total_Vec=stochastic_returns[sim - 1:sim].values[0],
Ret_Interest_Vec=Ret_Interest_Vec,
Ret_Dividend_Vec=Ret_Dividend_Vec,
PRLT=PRLT,
Strategy=Strategy,
WHPer=WHPer,
WageWHPer=WageWHPer,
UIWHPer=UIWHPer,
ATWD_Vec=ATWD_Vec,
SocialSecurity_Vec=SocialSecurity_Vec,
TotalATWDReq_Vec=TotalATWDReq_Vec,
WageIncome_Vec=TotalATWDReq_Vec,
UInc_Vec=UInc_Vec,
TradPer=winning_ratio_obj[0],
TaxablePer=winning_ratio_obj[1],
RothPer=winning_ratio_obj[2],
Horizon=Horizon)
results_by_sim.append(longevity_obj[0])
ninety_percent_confidence_longevity = np.percentile(results_by_sim, 10)
standard_cases_data.iat[index, 28] = ninety_percent_confidence_longevity
##### ratio withdrawals, 50% CL#########
fifty_percent_confidence_longevity = np.percentile(results_by_sim, 50)
standard_cases_data.iat[index, 27] = fifty_percent_confidence_longevity
end_time = time.time()
print(f"{index} finished in {end_time - start_time:0.4f} seconds.")
index = index + 1
if index % 100 == 0:
df = pd.DataFrame(standard_cases_data)
df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")
df = pd.DataFrame(standard_cases_data) df.to_csv("Standard_Cases_Output.csv", float_format="%.9f")
!/usr/bin/env python
coding: utf-8
In[3]:
import pandas as pd import numpy as np import os os.chdir("Z:\Healthcare Research\Healthcare Spending\Research Papers\HRS\HRS_2018v2 - 2023\Dipan")
In[4]:
HRS_data = pd.io.stata.read_stata('randhrs1992_2018v2.dta')
In[5]:
columns = pd.DataFrame(HRS_data.columns.to_list(),columns = ['Variables'])
columns.to_csv('HRS Variables.csv',index = False)
In[6]:
HRS_keep = HRS_data[['hhidpn','r1mstat','r1mpart','r2mstat','r2mpart','r3mstat','r3mpart','r4mstat','r4mpart', 'r5mstat','r5mpart','r6mstat','r6mpart','r7mstat','r7mpart','r8mstat','r8mpart','r9mstat', 'r9mpart','r10mstat','r10mpart','r11mstat','r11mpart','r12mstat','r12mpart','r13mstat', 'r13mpart','r14mstat','r14mpart','r1mrct','r2mrct','r3mrct','r4mrct','r5mrct','r6mrct', 'r7mrct','r8mrct','r9mrct','r10mrct','r11mrct','r12mrct','r13mrct','r14mrct','r1mcurln' ,'r2mcurln','r3mcurln','r4mcurln','r5mcurln','r6mcurln','r7mcurln','r8mcurln','r9mcurln', 'r10mcurln','r11mcurln','r12mcurln','r13mcurln','r14mcurln','r1mlenm','r2mlenm','r3mlenm', 'r4mlenm','r5mlenm','r6mlenm','r7mlenm','r8mlenm','r9mlenm','r10mlenm','r11mlenm','r12mlenm', 'r13mlenm','r14mlenm','r1mdiv','r2mdiv','r3mdiv','r4mdiv','r5mdiv','r6mdiv','r7mdiv','r8mdiv', 'r9mdiv','r10mdiv','r11mdiv','r12mdiv','r13mdiv','r14mdiv','r1cenreg','r2cenreg','r3cenreg', 'r4cenreg','r5cenreg','r6cenreg','r7cenreg','r8cenreg','r9cenreg','r10cenreg','r11cenreg', 'r12cenreg','r13cenreg','r14cenreg','r1cendiv','r2cendiv','r3cendiv','r4cendiv','r5cendiv', 'r6cendiv','r7cendiv','r8cendiv','r9cendiv','r10cendiv','r11cendiv','r12cendiv','r13cendiv', 'r14cendiv','h1hhresp','h4hhresp','h5hhresp','h6hhresp','h7hhresp','h8hhresp','h9hhresp', 'h10hhresp','h11hhresp','h12hhresp','h13hhresp','h14hhresp','h1cpl','h4cpl','h5cpl','h6cpl', 'h7cpl','h8cpl','h9cpl','h10cpl','h11cpl','h12cpl','h13cpl','h14cpl','ragender','raracem', 'rabmonth','rabyear','rabdate','r1iwbeg','r2iwbeg','r3iwbeg','r4iwbeg','r5iwbeg','r6iwbeg', 'r7iwbeg','r8iwbeg','r9iwbeg','r10iwbeg','r11iwbeg','r12iwbeg','r13iwbeg','r14iwbeg','r1iwend', 'r2iwend','r3iwend','r4iwend','r5iwend','r6iwend','r7iwend','r8iwend','r9iwend','r10iwend', 'r11iwend','r12iwend','r13iwend','r14iwend','r1iwbegf','r2iwbegf','r1iwendf','r2iwendf', 'r3iwendf','r1iwmid','r2iwmid','r3iwmid','r4iwmid','r5iwmid','r6iwmid','r7iwmid', 'r8iwmid','r9iwmid','r10iwmid','r11iwmid','r12iwmid','r13iwmid','r14iwmid','r1iwmidf', 'r2iwmidf','r3iwmidf','r4iwmidf','r5iwmidf','r6iwmidf','r7iwmidf','r8iwmidf','r9iwmidf', 'r10iwmidf','r11iwmidf','r12iwmidf','r13iwmidf','r14iwmidf','r1iwstat','r2iwstat','r3iwstat', 'r4iwstat','r5iwstat','r6iwstat','r7iwstat','r8iwstat','r9iwstat','r10iwstat','r11iwstat', 'r12iwstat','r13iwstat','r14iwstat','radmonth','radyear','raddate','radage_m','radage_y', 'raedyrs','raedegrm','raeduc','rarelig','ravetrn','rameduc','rafeduc','rabplace','r2pexit', 'r3pexit','r4pexit','r5pexit','r6pexit','r7pexit','r8pexit','r9pexit','r10pexit','r11pexit', 'r12pexit','r13pexit','r14pexit','r1agem_b','r2agem_b','r3agem_b','r4agem_b','r5agem_b', 'r6agem_b','r7agem_b','r8agem_b','r9agem_b','r10agem_b','r11agem_b','r12agem_b','r13agem_b', 'r14agem_b','r1agem_e','r2agem_e','r3agem_e','r4agem_e','r5agem_e','r6agem_e','r7agem_e', 'r8agem_e','r9agem_e','r10agem_e','r11agem_e','r12agem_e','r13agem_e','r14agem_e','r1agem_m', 'r2agem_m','r3agem_m','r4agem_m','r5agem_m','r6agem_m','r7agem_m','r8agem_m','r9agem_m', 'r10agem_m','r11agem_m','r12agem_m','r13agem_m','r14agem_m','r1agey_b','r2agey_b','r3agey_b', 'r4agey_b','r5agey_b','r6agey_b','r7agey_b','r8agey_b','r9agey_b','r10agey_b','r11agey_b', 'r12agey_b','r13agey_b','r14agey_b','r1agey_e','r2agey_e','r3agey_e','r4agey_e','r5agey_e', 'r6agey_e','r7agey_e','r8agey_e','r9agey_e','r10agey_e','r11agey_e','r12agey_e','r13agey_e', 'r14agey_e','r1agey_m','r2agey_m','r3agey_m','r4agey_m','r5agey_m','r6agey_m','r7agey_m', 'r8agey_m','r9agey_m','r10agey_m','r11agey_m','r12agey_m','r13agey_m','r14agey_m','rawtsamp', 'r1wthh','r2wthh','r3wthh','r4wthh','r5wthh','r6wthh','r7wthh','r8wthh','r9wthh','r10wthh', 'r11wthh','r12wthh','r13wthh','r14wthh','r1wtresp','r2wtresp','r3wtresp','r4wtresp', 'r5wtresp','r6wtresp','r7wtresp','r8wtresp','r9wtresp','r10wtresp','r11wtresp','r12wtresp', 'r13wtresp','r14wtresp','h2cpl','h2hhresp','h3cpl','h3hhresp','racohbyr','r1iwendm', 'r2iwendm','r3iwendm','r4iwendm','r5iwendm','r6iwendm','r7iwendm','r8iwendm','r9iwendm', 'r10iwendm','r11iwendm','r12iwendm','r13iwendm','r14iwendm','r1iwendy','r2iwendy','r3iwendy', 'r4iwendy','r5iwendy','r6iwendy','r7iwendy','r8iwendy','r9iwendy','r10iwendy','r11iwendy', 'r12iwendy','r13iwendy','r14iwendy','recendiv','recenreg','remstat','rempart','reiwbeg', 'reiwmid','reiwend','reiwmidf','respagem_b','respagem_m','respagem_e','respagey_b', 'respagey_m','respagey_e','r1shlt','r2shlt','r3shlt','r4shlt','r5shlt','r6shlt', 'r7shlt','r8shlt','r9shlt','r10shlt','r11shlt','r12shlt','r13shlt','r14shlt','r1hltc5', 'r2hltc5','r3hltc5','r4hltc5','r5hltc5','r6hltc5','r3hltc5f','r4hltc5f','r5hltc5f', 'r6hltc5f','r1hltc3','r2hltc3','r3hltc3','r4hltc3','r5hltc3','r6hltc3','r7hltc3', 'r8hltc3','r9hltc3','r10hltc3','r11hltc3','r12hltc3','r13hltc3','r14hltc3','r7vgactx', 'r8vgactx','r9vgactx','r10vgactx','r11vgactx','r12vgactx','r13vgactx','r14vgactx', 'r7mdactx','r8mdactx','r9mdactx','r10mdactx','r11mdactx','r12mdactx','r13mdactx', 'r14mdactx','r7ltactx','r8ltactx','r9ltactx','r10ltactx','r11ltactx','r12ltactx', 'r13ltactx','r14ltactx','r1bmi','r2bmi','r3bmi','r4bmi','r5bmi','r6bmi','r7bmi', 'r8bmi','r9bmi','r10bmi','r11bmi','r12bmi','r13bmi','r14bmi','r1height','r2height', 'r3height','r4height','r5height','r6height','r7height','r8height','r9height','r10height', 'r11height','r12height','r13height','r14height','r1weight','r2weight','r3weight', 'r4weight','r5weight','r6weight','r7weight','r8weight','r9weight','r10weight','r11weight', 'r12weight','r13weight','r14weight','r1back','r2back','r3back','r4back','r5back','r6back', 'r7back','r8back','r9back','r10back','r11back','r12back','r13back','r14back','r1smokev', 'r2smokev','r3smokev','r4smokev','r5smokev','r6smokev','r7smokev','r8smokev','r9smokev', 'r10smokev','r11smokev','r12smokev','r13smokev','r14smokev','r1smoken','r2smoken','r3smoken', 'r4smoken','r5smoken','r6smoken','r7smoken','r8smoken','r9smoken','r10smoken','r11smoken', 'r12smoken','r13smoken','r14smoken','r1drink','r2drink','r3drink','r4drink','r5drink', 'r6drink','r7drink','r8drink','r9drink','r10drink','r11drink','r12drink','r13drink', 'r14drink','r1drinkr','r2drinkr','r3drinkd','r4drinkd','r5drinkd','r6drinkd','r7drinkd', 'r8drinkd','r9drinkd','r10drinkd','r11drinkd','r12drinkd','r13drinkd','r14drinkd', 'r3drinkn','r4drinkn','r5drinkn','r6drinkn','r7drinkn','r8drinkn','r9drinkn','r10drinkn', 'r11drinkn','r12drinkn','r13drinkn','r14drinkn','r1hibp','r2hibp','r3hibp','r4hibp','r5hibp', 'r6hibp','r7hibp','r8hibp','r9hibp','r10hibp','r11hibp','r12hibp','r13hibp','r14hibp', 'r1hibpe','r2hibpe','r3hibpe','r4hibpe','r5hibpe','r6hibpe','r7hibpe','r8hibpe','r9hibpe', 'r10hibpe','r11hibpe','r12hibpe','r13hibpe','r14hibpe','r1diab','r2diab','r3diab','r4diab', 'r5diab','r6diab','r7diab','r8diab','r9diab','r10diab','r11diab','r12diab','r13diab', 'r14diab','r1diabe','r2diabe','r3diabe','r4diabe','r5diabe','r6diabe','r7diabe','r8diabe', 'r9diabe','r10diabe','r11diabe','r12diabe','r13diabe','r14diabe','r1cancr','r2cancr', 'r3cancr','r4cancr','r5cancr','r6cancr','r7cancr','r8cancr','r9cancr','r10cancr','r11cancr', 'r12cancr','r13cancr','r14cancr','r1cancre','r2cancre','r3cancre','r4cancre','r5cancre', 'r6cancre','r7cancre','r8cancre','r9cancre','r10cancre','r11cancre','r12cancre','r13cancre', 'r14cancre','r1lung','r2lung','r3lung','r4lung','r5lung','r6lung','r7lung','r8lung', 'r9lung','r10lung','r11lung','r12lung','r13lung','r14lung','r1lunge','r2lunge','r3lunge', 'r4lunge','r5lunge','r6lunge','r7lunge','r8lunge','r9lunge','r10lunge','r11lunge','r12lunge', 'r13lunge','r14lunge','r1heart','r2heart','r3heart','r4heart','r5heart','r6heart','r7heart', 'r8heart','r9heart','r10heart','r11heart','r12heart','r13heart','r14heart','r1hearte', 'r2hearte','r3hearte','r4hearte','r5hearte','r6hearte','r7hearte','r8hearte','r9hearte', 'r10hearte','r11hearte','r12hearte','r13hearte','r14hearte','r1strok','r2strok','r3strok', 'r4strok','r5strok','r6strok','r7strok','r8strok','r9strok','r10strok','r11strok','r12strok', 'r13strok','r14strok','r1stroke','r2stroke','r3stroke','r4stroke','r5stroke','r6stroke', 'r7stroke','r8stroke','r9stroke','r10stroke','r11stroke','r12stroke','r13stroke','r14stroke', 'r1psych','r2psych','r3psych','r4psych','r5psych','r6psych','r7psych','r8psych','r9psych', 'r10psych','r11psych','r12psych','r13psych','r14psych','r1psyche','r2psyche','r3psyche', 'r4psyche','r5psyche','r6psyche','r7psyche','r8psyche','r9psyche','r10psyche','r11psyche', 'r12psyche','r13psyche','r14psyche','r1arthr','r2arthr','r3arthr','r4arthr','r5arthr', 'r6arthr','r7arthr','r8arthr','r9arthr','r10arthr','r11arthr','r12arthr','r13arthr', 'r14arthr','r1arthre','r2arthre','r3arthre','r4arthre','r5arthre','r6arthre','r7arthre', 'r8arthre','r9arthre','r10arthre','r11arthre','r12arthre','r13arthre','r14arthre', 'r4memry','r5memry','r6memry','r7memry','r8memry','r9memry','r4memrye','r5memrye', 'r6memrye','r7memrye','r8memrye','r9memrye','r10alzhe','r11alzhe','r12alzhe','r13alzhe', 'r14alzhe','r10alzhee','r11alzhee','r12alzhee','r13alzhee','r14alzhee','r10demen','r11demen', 'r12demen','r13demen','r14demen','r10demene','r11demene','r12demene','r13demene','r14demene', 'r1conde','r2conde','r3conde','r4conde','r5conde','r6conde','r7conde','r8conde','r9conde', 'r10conde','r11conde','r12conde','r13conde','r14conde','r8bpsys','r9bpsys','r10bpsys', 'r11bpsys','r12bpsys','r13bpsys','r14bpsys','r8bpdia','r9bpdia','r10bpdia','r11bpdia', 'r12bpdia','r13bpdia','r14bpdia','r8bppuls','r9bppuls','r10bppuls','r11bppuls','r12bppuls', 'r13bppuls','r14bppuls','h1atotn','h1atotb','h2atotn','h2atotb','h3atotn','h3atotb', 'h4atotn','h4atotb','h5atotn','h5atotb','h6atotn','h6atotb','h7atotn','h7atotb','h8atotn', 'h8atotb','h9atotn','h9atotb','h10atotn','h10atotb','h11atotn','h11atotb','h12atotn', 'h12atotb','h13atotn','h13atotb','h14atotn','h14atotb','r1iearn','h1icap','r2iearn','h2icap', 'r3iearn','h3icap','r4iearn','h4icap','r5iearn','h5icap','r6iearn','h6icap','r7iearn', 'h7icap','r8iearn','h8icap','r9iearn','h9icap','r10iearn','h10icap','r11iearn','h11icap', 'r12iearn','h12icap','r13iearn','h13icap','r14iearn','h14icap','r1sayret','r2sayret', 'r3sayret','r4sayret','r5sayret','r6sayret','r7sayret','r8sayret','r9sayret','r10sayret', 'r11sayret','r12sayret','r13sayret','r14sayret','r1retmon','r2retmon','r3retmon','r4retmon', 'r5retmon','r6retmon','r7retmon','r8retmon','r9retmon','r10retmon','r11retmon','r12retmon', 'r13retmon','r14retmon','r1retyr','r2retyr','r3retyr','r4retyr','r5retyr','r6retyr', 'r7retyr','r8retyr','r9retyr','r10retyr','r11retyr','r12retyr','r13retyr','r14retyr', 'r1rplnyr','r2rplnyr','r3rplnyr','r4rplnyr','r5rplnyr','r6rplnyr','r7rplnyr','r8rplnyr', 'r9rplnyr','r10rplnyr','r11rplnyr','r12rplnyr','r13rplnyr','r14rplnyr','r1jjobs', 'r2jjobs','r3jjobs','r4jjobs','r5jjobs','r6jjobs','r7jjobs','r8jjobs','r9jjobs', 'r10jjobs','r11jjobs','r12jjobs','r13jjobs','r14jjobs','r1jyears','r2jyears', 'r3jyears','r4jyears','r5jyears','r6jyears','r7jyears','r8jyears','r9jyears', 'r10jyears','r11jyears','r12jyears','r13jyears','r14jyears','h1hhres','h1child', 'r1dadliv','r1momliv','r1dadage','r1momage','h2hhres','h2child','r2dadliv','r2momliv', 'r2dadage','r2momage','h3hhres','h3child','r3dadliv','r3momliv','r3dadage','r3momage', 'h4hhres','h4child','r4dadliv','r4momliv','r4dadage','r4momage','h5hhres','h5child', 'r5dadliv','r5momliv','r5dadage','r5momage','h6hhres','h6child','r6dadliv','r6momliv', 'r6dadage','r6momage','h7hhres','h7child','r7dadliv','r7momliv','r7dadage','r7momage', 'h8hhres','h8child','r8dadliv','r8momliv','r8dadage','r8momage','h9hhres','h9child', 'r9dadliv','r9momliv','r9dadage','r9momage','h10hhres','h10child','r10dadliv','r10momliv', 'r10dadage','r10momage','h11hhres','h11child','r11dadliv','r11momliv','r11dadage', 'r11momage','h12hhres','h12child','r12dadliv','r12momliv','r12dadage','r12momage', 'h13hhres','h13child','r13dadliv','r13momliv','r13dadage','r13momage','h14hhres', 'h14child','r14dadliv','r14momliv','r14dadage','r14momage']].copy()
HRS_keep.to_csv('HRS Columns Filtered.csv')
In[7]:
########Death dataframe###################### temp_df = HRS_keep.copy()
HRS_Death = pd.DataFrame()
HRS_Death['Death_Year'] = HRS_keep['radyear']
temp_df = temp_df.loc[temp_df['radyear']>0]
In[8]:
temp_df.to_csv('HRS Death.csv')
1.Death Age and Death year
3. Find wave number exit interview
4. All demographic variables for previous wave and ever haves - death wave values not reported - fill death wave values for ever haves by last wave data
5. Sample weights
- Run regression analysis
- Calculate sum(sample wts) and count - for each year, each condition
Death data
Death_df = pd.DataFrame() Death_df[['Household_Id','Death_Year','Death_Age','Gender','Race','Years_Education', 'Highest_Degree','Education_Category','Religion', 'Veteran_Status','Mother_Years_Of_Education', 'Father_Years_Of_Education','Division_Birth','Sample_Wt', 'Cohort_Of_Respondent','Division','Region', 'Marital_Status','Partnership_Status',]] = temp_df[['hhidpn','radyear','radage_y','ragender','raracem','raedyrs', 'raedegrm','raeduc','rarelig','ravetrn', 'rameduc','rafeduc','rabplace','rawtsamp', 'racohbyr','recendiv','recenreg','remstat','rempart',]]
In[9]:
def find_death_wave(df): temp = df.copy() temp['Death_Wave'] = 0 for i in range(df.shape[0]):
print(f'row:{i+1}')
print(f'wave:{j}')
Death_df['Death_Wave'] = find_death_wave(temp_df)
In[10]:
temp_df['r1drinkn'] = temp_df['r1drinkr'] temp_df['r2drinkn'] = temp_df['r2drinkr']
In[11]:
Death_df['Times_Married'] = None Death_df['Length_Current_Marriage'] = None Death_df['Length_Longest_Marriage'] = None Death_df['Times_Divorced'] = None Death_df['Household_Size'] = None Death_df['Whether_Couple_HH'] = None Death_df['HH_Wt'] = None Death_df['Person_Level_Wt'] = None Death_df['Self_Report_Health'] = None Death_df['Self_Report_Health_Change_3_Category'] = None Death_df['Frequent_Vigorous_Physical_Activity'] = None Death_df['Frequent_Moderate_Physical_Activity'] = None Death_df['Frequent_Light_Physical_Activity'] = None Death_df['BMI'] = None Death_df['Height'] = None Death_df['Weight'] = None Death_df['Back_Problem'] = None Death_df['Smoke'] = None Death_df['Smoke_Now'] = None Death_df['Drink'] = None Death_df['Drink_Days_Per_Week'] = None Death_df['Drink_Per_Day'] = None Death_df['Blood_Pressure'] = None Death_df['Diabetes'] = None Death_df['Cancer'] = None Death_df['Lung_Disease'] = None Death_df['Heart_Problem'] = None Death_df['Stroke'] = None Death_df['Pyschological_Problem'] = None Death_df['Arthritis'] = None Death_df['Alzheimers'] = None Death_df['Dementia'] = None Death_df['Sum_Conditions'] = None Death_df['Systolic_BP'] = None Death_df['Diastolic_BP'] = None Death_df['Pulse'] = None Death_df['Non_Housing_Assets'] = None Death_df['Total_Assets'] = None Death_df['Income'] = None Death_df['Capital_Income_HH'] = None Death_df['Reports_Retired'] = None Death_df['Retirement_Year'] = None Death_df['Planned_Retirement_Year'] = None Death_df['Job_History'] = None Death_df['Job_Years'] = None Death_df['Household_Size'] = None Death_df['Number_Living_Children'] = None Death_df['Father_Alive'] = None Death_df['Mother_Alive'] = None Death_df['Father_Age'] = None Death_df['Mother_Age'] = None
for i in range(Death_df.shape[0]): d_wave = Death_df['Death_Wave'].iloc[i] if d_wave==1: continue d_wave = Death_df['Death_Wave'].iloc[i] Death_df['Times_Married'].iloc[i] = temp_df[f'r{d_wave-1}mrct'].iloc[i] Death_df['Length_Current_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mcurln'].iloc[i] Death_df['Length_Longest_Marriage'].iloc[i] = temp_df[f'r{d_wave-1}mlenm'].iloc[i] Death_df['Times_Divorced'].iloc[i] = temp_df[f'r{d_wave-1}mdiv'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhresp'].iloc[i] Death_df['Whether_Couple_HH'].iloc[i] = temp_df[f'h{d_wave-1}cpl'].iloc[i] Death_df['HH_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wthh'].iloc[i] Death_df['Person_Level_Wt'].iloc[i] = temp_df[f'r{d_wave-1}wtresp'].iloc[i] Death_df['Self_Report_Health'].iloc[i] = temp_df[f'r{d_wave-1}shlt'].iloc[i] Death_df['Self_Report_Health_Change_3_Category'].iloc[i] = temp_df[f'r{d_wave-1}hltc3'].iloc[i] Death_df['BMI'].iloc[i] = temp_df[f'r{d_wave-1}bmi'].iloc[i] Death_df['Height'].iloc[i] = temp_df[f'r{d_wave-1}height'].iloc[i] Death_df['Weight'].iloc[i] = temp_df[f'r{d_wave-1}weight'].iloc[i] Death_df['Back_Problem'].iloc[i] = temp_df[f'r{d_wave-1}back'].iloc[i] Death_df['Smoke'].iloc[i] = temp_df[f'r{d_wave-1}smokev'].iloc[i] Death_df['Smoke_Now'].iloc[i] = temp_df[f'r{d_wave-1}smoken'].iloc[i] Death_df['Drink'].iloc[i] = temp_df[f'r{d_wave-1}drink'].iloc[i] Death_df['Drink_Per_Day'].iloc[i] = temp_df[f'r{d_wave-1}drinkn'].iloc[i] Death_df['Blood_Pressure'].iloc[i] = temp_df[f'r{d_wave-1}hibpe'].iloc[i] Death_df['Diabetes'].iloc[i] = temp_df[f'r{d_wave-1}diabe'].iloc[i] Death_df['Cancer'].iloc[i] = temp_df[f'r{d_wave-1}cancre'].iloc[i] Death_df['Lung_Disease'].iloc[i] = temp_df[f'r{d_wave-1}lunge'].iloc[i] Death_df['Heart_Problem'].iloc[i] = temp_df[f'r{d_wave-1}hearte'].iloc[i] Death_df['Stroke'].iloc[i] = temp_df[f'r{d_wave-1}stroke'].iloc[i] Death_df['Pyschological_Problem'].iloc[i] = temp_df[f'r{d_wave-1}psyche'].iloc[i] Death_df['Arthritis'].iloc[i] = temp_df[f'r{d_wave-1}arthre'].iloc[i] Death_df['Sum_Conditions'].iloc[i] = temp_df[f'r{d_wave-1}conde'].iloc[i] Death_df['Non_Housing_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotn'].iloc[i] Death_df['Total_Assets'].iloc[i] = temp_df[f'h{d_wave-1}atotb'].iloc[i] Death_df['Income'].iloc[i] = temp_df[f'r{d_wave-1}iearn'].iloc[i] Death_df['Capital_Income_HH'].iloc[i] = temp_df[f'h{d_wave-1}icap'].iloc[i] Death_df['Reports_Retired'].iloc[i] = temp_df[f'r{d_wave-1}sayret'].iloc[i] Death_df['Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}retyr'].iloc[i] Death_df['Planned_Retirement_Year'].iloc[i] = temp_df[f'r{d_wave-1}rplnyr'].iloc[i] Death_df['Job_History'].iloc[i] = temp_df[f'r{d_wave-1}jjobs'].iloc[i] Death_df['Job_Years'].iloc[i] = temp_df[f'r{d_wave-1}jyears'].iloc[i] Death_df['Household_Size'].iloc[i] = temp_df[f'h{d_wave-1}hhres'].iloc[i] Death_df['Number_Living_Children'].iloc[i] = temp_df[f'h{d_wave-1}child'].iloc[i] Death_df['Father_Alive'].iloc[i] = temp_df[f'r{d_wave-1}dadliv'].iloc[i] Death_df['Mother_Alive'].iloc[i] = temp_df[f'r{d_wave-1}momliv'].iloc[i] Death_df['Father_Age'].iloc[i] = temp_df[f'r{d_wave-1}dadage'].iloc[i] Death_df['Mother_Age'].iloc[i] = temp_df[f'r{d_wave-1}momage'].iloc[i]
In[ ]:
Death_df.to_csv('HRS Death Cleaned.csv',index = False)
In[10]:
temp_df = HRS_keep.copy()
HRS_keep['radyear'].unique()
In[11]:
Incidence Data
0. For each wave - remove people who died before this wave - exit interview before this wave
1.Interview end Year
2.Ever haves - Each condition
- calculate sum(sample wts) and cout - for each year and each condtions
def find_if_death_wave(df):
temp = df.copy()
temp['Death_Wave'] = None
for i in range(df.shape[0]):
if not temp['radyear'].iloc[i]>0:
continue
for j in range(1,15):
if df[f'r{j}iwstat'].loc[i]=='5.nr,died this wv':
temp['Death_Wave'].iloc[i] = j
return temp['Death_Wave']
temp_df['Death_Wave'] = find_if_death_wave(temp_df)
In[12]:
def count_chronic(name_c,df):
count_c = []
for i in range(1,15):
if f'r{i}{name_c}' not in temp_df.columns:
count_c.append(0)
else:
count_c.append(temp_df[f'r{i}{name_c}'].value_counts()['1.yes'])
return count_c
In[13]:
Chronic_Incidence = pd.DataFrame(list(range(1,15)),columns=[''])
In[14]:
name_c = 'hibpe'
count_c = {}
sumwt_c = {}
Also include Age in this loop
for i in range(1,15):
years_list = temp_df.loc[temp_df[f'r{i}iwendy']>0][f'r{i}iwendy'].unique()
for year in years_list:
temp = temp_df.loc[temp_df[f'r{i}iwendy']==year]
for age in range(55,101):
temp1 = temp.loc[temp_df[f'r{i}agey_e']==age]
print(f'{i} {int(year)} {age} {temp1.shape[0]}')
Count
if f'{int(year)}_{age}' not in count_c.keys():
if f'r{i}{name_c}' in temp1.columns:
countc[f'{int(year)}{age}'] = temp1[f'r{i}{name_c}'].value_counts()['1.yes']
else:
countc[f'{int(year)}{age}'] = 0
else:
if f'r{i}{name_c}' in temp1.columns:
countc[f'{int(year)}{age}'] += temp1[f'r{i}{name_c}'].value_counts()['1.yes']
else:
countc[f'{int(year)}{age}'] += 0
Weight Sum
if f'{int(year)}_{age}' not in sumwt_c.keys():
if f'r{i}{name_c}' in temp1.columns:
sumwtc[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
else:
sumwtc[f'{int(year)}{age}'] = 0
else:
if f'r{i}{name_c}' in temp1.columns:
sumwtc[f'{int(year)}{age}'] += temp1[f'rawtsamp'].loc[temp1[f'r{i}{name_c}']=='1.yes'].sum()
else:
sumwtc[f'{int(year)}{age}'] += 0
count_c_list = []
sumwt_c_list = []
for i in range(1992,2020):
for j in range(55,101):
if f'{i}_{j}' in count_c.keys():
count_c_list.append([i,j,countc[f'{i}{j}']])
if f'{i}_{j}' in sumwt_c.keys():
sumwt_c_list.append([i,j,sumwtc[f'{i}{j}']])
count_df = pd.DataFrame(count_c_list,columns = ['Year','Age','Count'])
sumwt_df = pd.DataFrame(sumwt_c_list,columns = ['Year','Age','Sum_Wt'])
sumwt_df = pd.merge(count_df, sumwt_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
In[22]:
Incidence Data
def incidence_chronic(name_c,df): temp_df = df.copy() count_c = {} sumwt_c = {}
print(f'{i} {int(year)} {age} {temp1.shape[0]}')
In[23]:
Male
temp_df = HRS_keep.loc[HRS_keep['ragender']=='1.male'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Male = Incidence.copy()
Female
temp_df = HRS_keep.loc[HRS_keep['ragender']=='2.female'].copy() chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Incidence = a.copy() continue Incidence = pd.merge(Incidence, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Incidence_Female = Incidence.copy()
In[25]:
Incidence.to_csv("Incidence Counts and Sum of Weights.csv",index = False)
In[26]:
Death Counts and Sum Weigts
Male
temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Male = Death_count_df.copy()
Male
temp_df = HRS_keep.loc[HRS_keep['radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] chronic_list = ['hibpe','diabe','cancre','lunge','hearte','stroke','psyche','arthre','alzhee','demene'] for i in chronic_list: a = incidence_chronic(i,temp_df) if i == chronic_list[0]: Death_count_df = a.copy() continue Death_count_df = pd.merge(Death_count_df, a, how='left', left_on=['Year','Age'], right_on = ['Year','Age']) Death_count_Female = Death_count_df.copy()
Death_count_df.to_csv("Death Counts and Sum of Weights.csv",index = False)
In[28]:
with pd.ExcelWriter('Counts and Sum Weights Chronic - 8 May 2023.xlsx') as writer: Incidence_Male.to_excel(writer, sheet_name='Incidence_Male') Death_count_Male.to_excel(writer, sheet_name='Death_count_Male') Incidence_Female.to_excel(writer, sheet_name='Incidence_Female') Death_count_Female.to_excel(writer, sheet_name='Death_count_Female')
In[16]:
Total Count for Persons
Incidence Data
def total_person_count(df): temp_df = df.copy() count_c = {} sumwt_c = {}
count_c_death = {}
sumwt_c_death = {}
print(f'{i} {int(year)} {age} {temp1.shape[0]}')
count_cdeath[f'{int(year)}{age}'] = temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']
count_cdeath[f'{int(year)}{age}'] += temp1[f'r{i}pexit'].value_counts()['1.exit interview this wave']
sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()
sumwt_cdeath[f'{int(year)}{age}'] = temp1[f'rawtsamp'].loc[temp1[f'r{i}pexit']=='1.exit interview this wave'].sum()
count_c_death_list = []
sumwt_c_death_list = []
count_c_death_list.append([i,j,count_cdeath[f'{i}{j}']])
sumwt_c_death_list.append([i,j,sumwt_cdeath[f'{i}{j}']])
count_death_df = pd.DataFrame(count_c_death_list,columns = ['Year','Age',f'Death_Count'])
sumwt_death_df = pd.DataFrame(sumwt_c_death_list,columns = ['Year','Age',f'Death_Sum_Wt'])
final_df = pd.merge(final_df, count_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
final_df = pd.merge(final_df, sumwt_death_df, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
person_counts_male = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='1.male']) person_counts_female = total_person_count(HRS_keep.loc[HRS_keep['ragender']=='2.female'])
In[17]:
Total Counts of Death
Male
temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='1.male'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_male = temp_df.copy()
Female
temp_df = HRS_keep.loc[HRS_keep[f'radyear']>0] temp_df = temp_df.loc[temp_df['ragender']=='2.female'] a = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=len) a.columns = ['Death_Count'] b = pd.pivot_table(temp_df, index=['radyear','radage_y'],values=['rawtsamp'],aggfunc=np.sum) b.columns = ['Death_Sum'] temp_df = pd.concat([a,b]) temp_df = temp_df.reset_index() temp_df.columns = ['Year',"Age",'Death_Count','Death_Sum'] death_count_female = temp_df.copy()
In[20]:
mortality_df = pd.merge(person_counts, death_counts, how='left', left_on=['Year','Age'], right_on = ['Year','Age'])
mortality_df.columns = ['Year','Age','Total_Count','Total_Sum','Death_Count','Death_Sum']
person_counts.to_csv('Total and Death Person Counts and Sum Wts.csv')
In[21]:
temp_df.to_csv('Total Death Counts.csv')
In[19]:
with pd.ExcelWriter('Total Person and Total Death Counts and Sum 8 may 2023.xlsx') as writer: person_counts_male.to_excel(writer, sheet_name='person_counts_male') death_count_male.to_excel(writer, sheet_name='death_count_male') person_counts_female.to_excel(writer, sheet_name='person_counts_female') death_count_female.to_excel(writer, sheet_name='death_count_female')
In[ ]:
In[ ]:
In[ ]:
In[ ]: