Closed github-actions[bot] closed 2 years ago
https://github.com/fitzgeraldja/stc_unicef_cpi/blob/268f65fb2b4dc971a59c4f7465223796c2e51cd4/src/stc_unicef_cpi/data/make_dataset.py#L60
# -*- coding: utf-8 -*- import pandas as pd import geopandas as gpd import os.path from functools import reduce from datetime import date from src.stc_unicef_cpi.data.process_geotiff import geotiff_to_df from src.stc_unicef_cpi.data.get_facebook_data import get_facebook_estimates from src.stc_unicef_cpi.data.get_osm_data import get_road_density from src.stc_unicef_cpi.data.get_econ_data import download_econ_data from src.stc_unicef_cpi.data.get_cell_tower_data import get_cell_data from src.stc_unicef_cpi.utils.geospatial import ( create_geometry, get_hex_code, get_hex_centroid, get_hexes_for_ctry, aggregate_hexagon, get_lat_long, ) from src.stc_unicef_cpi.data.get_speedtest_data import get_speedtest_url, get_speedtest_info def read_input_unicef(path_read): df = pd.read_csv(path_read) return df def select_country(df, country_code, lat, long): df.columns = df.columns.str.lower() subset = df[df["countrycode"].str.strip() == country_code] subset.dropna(subset=[lat, long], inplace=True) return subset def aggregate_dataset(df): df = df.groupby(by=["hex_code"], as_index=False).mean() return df def create_target_variable(country_code, lat, long, res): source = "../../../data/childpoverty_microdata_gps_21jun22.csv" df = read_input_unicef(source) print(df) sub = select_country(df, country_code, lat, long) sub = get_hex_code(sub, lat, long, res) sub = sub.reset_index(drop=True) sub = aggregate_dataset(sub) sub = get_hex_centroid(sub, "hex_code") return sub def append_predictor_variables( country_code="NGA", country="Nigeria", lat="latnum", long="longnum", res=6 ): # TODO: Integrate satellite information to pipeline # TODO: Include threshold to pipeline sub = create_target_variable(country_code, lat, long, res) # countries hexes hexes = get_hexes_for_ctry(country, res) ctry = pd.DataFrame(hexes, columns=['hex_code']) ctry = get_hex_centroid(ctry, "hex_code") ctry_name = country_code.lower() today = date.today() dat_scp = today.strftime("%d-%m-%Y") name_out = f"fb_{ctry_name}_res{res}_{dat_scp}.parquet" ## Facebook connectivity metrics connect_fb = get_facebook_estimates(ctry["hex_centroid"].values[0:900], name_out, res) #sub = sub.merge(connect_fb, on=["hex_centroid", "lat", "long"], how="left") # ## Download data if it does not exist #path_data = "../../../data/" #file_exists = os.path.exists(f"{path_data}conflict/GEDEvent_v22_1.csv") #if file_exists: # pass #else: # download_econ_data(path_data) # ## Critical Infrastructure #ci = geotiff_to_df(f"{path_data}infrastructure/CISI/010_degree/global.tif") #ci = create_geometry(ci, "latitude", "longitude") #ci = get_hex_code(ci, "latitude", "longitude") #ci = aggregate_hexagon(ci, "fric", "cii", "mean") # ## Conflict Zones #cz = pd.read_csv(f"{path_data}conflict/GEDEvent_v22_1.csv") #cz = cz[cz.country == country] #cz = create_geometry(cz, "latitude", "longitude") #cz = get_hex_code(cz, "latitude", "longitude") #cz = aggregate_hexagon(cz, "geometry", "n_conflicts", "count") # ## Open Cell Data #cell = get_cell_data(country) #cell = create_geometry(cell, "lat", "long") #cell = get_hex_code(cell, "lat", "long") #cell = aggregate_hexagon(cell, "cid", "cells", "count") # ## Road density #road = get_road_density(country, res) # Speet Test url, name = get_speedtest_url(service_type='mobile', year=2021, q=4) file_exists = os.path.exists(f"{path_data}connectivity/GEDEvent_v22_1.csv") if file_exists: pass else: download_econ_data(path_data) get_speedtest_info(url, name) ## Aggregate Data #dfs = [sub, cell, ci, cz, road] #sub = reduce( # lambda left, right: pd.merge(left, right, on="hex_code", how="left"), dfs #) append_predictor_variables() ## Health Sites # hh = pd.read_csv("nga_health.csv") # hh = hh[~hh.X.isna()] # hh = create_geometry(hh, "X", "Y") # hh = get_hex_code(hh, "X", "Y") # hh = aggregate_hexagon(hh, "geometry", "n_health", "count") # # ## Education Facilities # edu = gpd.read_file("nga_education") # edu = get_lat_long(edu, "geometry") # edu = get_hex_code(edu, "lat", "long") # edu = aggregate_hexagon(edu, "geometry", "n_education", "count")
Closed in 1345acee1e2c82fbc3e77a0d9ed03a897fbff544
https://github.com/fitzgeraldja/stc_unicef_cpi/blob/268f65fb2b4dc971a59c4f7465223796c2e51cd4/src/stc_unicef_cpi/data/make_dataset.py#L60