ACS percentages code to borrow from

## melt to long format
df_acs_long = pd.melt(df_acs, id_vars= ['county', 'state', 'tract']).sort_values(by = 
                                    ['tract', 'county'])
## create prefix and suffix columns
df_acs_long['variable_prefix'], df_acs_long['variable_suffix'] = df_acs_long['variable'].str.split('_', 1).str

## manually indicate which prefix columns don't follow the pattern
prefixes_perc_notrelevant = ['B01002','B25031','B25107']
df_acs_long['perc_NA'] = np.where(df_acs_long.variable_prefix.isin(prefixes_perc_notrelevant), 
                                    1, 0)

prefixes_perc_extrahier = ['B25026', 'B25123']
df_acs_long['perc_extrahier'] = np.where(df_acs_long.variable_prefix.isin(prefixes_perc_extrahier),
                                        1, 0)

## group by county, tract, and variable prefix to generate 
## percentages (and later variable names)
df_acs_long_toiterate = df_acs_long[df_acs_long.perc_NA == 0].copy()
group_co_tract_varg = df_acs_long_toiterate.groupby(['county', 'tract', 'variable_prefix'])

################################## Generate percentages: auto-calc #################################

df_acs_long_percentage = []
#flag = 0
for group,data in group_co_tract_varg:
    #if flag > 100: # flag to test code
     #   break
#     print(data)
    tract = data.tract.iloc[0]
    county = data.county.iloc[0]
    prefix = data.variable_prefix.iloc[0]
    row_list_group = []
    for i in range(1,data.shape[0]):
        numerator = data.value.iloc[i]
        denominator = data.value.iloc[0].astype(float)
        percentage= numerator/denominator
        row = [county,tract,prefix]
        row = row + [data.variable_suffix.iloc[i],percentage]
        row_list_group.append(row)
#         print(row)
#         print(row_list_group)
#         print('___________________________')

#         break
    df_acs_long_percentage.append(pd.DataFrame(row_list_group))
    #flag = flag + 1

percentages_all_groups = pd.concat(df_acs_long_percentage)
percentages_all_groups.columns = ['county', 'tract', 'variable_prefix', 
                                  'variable_suffix', 'percentage']

percentages_all_groups['variable_prefix_suffix'] = percentages_all_groups.variable_prefix + "_" + percentages_all_groups.variable_suffix
percentages_all_groups.drop(columns = ['variable_prefix', 'variable_suffix'], inplace= True)

####################################### Reshape percentages to wide ##################################

## subset to a few counties/tract (so even row numbers for wide reshape) (remove after testing)

### commented out testing code
## test_forwide = percentages_all_groups[(percentages_all_groups.county == 5) & 
   ##                                   (percentages_all_groups.tract.isin([100, 200, 400]))].copy()

percentages_all_groups['county_tract'] = percentages_all_groups.county.astype(str) + "_" + percentages_all_groups.tract.astype(str)

percentages_all_groups.drop(columns = ['county', 'tract'], inplace= True)

## try the pivot -- before pivoting, creating county-tract 
percentages_wide_pivot = percentages_all_groups.pivot(index = 'county_tract',
                                 columns = 'variable_prefix_suffix',
                                 values = 'percentage')

percentages_wide_pivot_reset = percentages_wide_pivot.reset_index()

## other cols to drop
percentages_wide_pivot_reset.drop(columns = ['b01001_002e', 
                                      'b01001_026e'], inplace = True)
rebeccajohnson88 / qss20_s21_proj

ACS percentages code to borrow from #12