Closed matamadio closed 5 days ago
Worldpop superseeded - move to GHS. Same reasoning applies, as GHS comes as tiles that can be automatically processed.
Need to think a better approach than tiles zip download. Alternatives to explore:
The only practical population dataset to fetch online is WorldPop - No API for GHSL, only way is export from GEE which takes too long and needs gdrive connection. Thus WorldPop is included as default option for Population, but can be overridden by adding a providing a diffeent file.
The reason this matters is that GHSL looks much more realistic compared to Worldpop, and that reflects in the risk estimates.
For built-up, we can easily leverage the WSF datasets (2019, evo, 3d) via STAC: https://geoservice.dlr.de/eoc/ogc/stac/v1/ I made a test notebook that harvests and mosaic the WSF-evo data depending on the selected country. It works but it is a bit slow (couple of minutes).
import os
import requests
import geopandas as gpd
import shutil
from shapely.geometry import shape, MultiPolygon
from shapely.ops import unary_union
import concurrent.futures
from merge_utils import merge_tifs # Assuming this is a custom module you've created
# Define the REST API URL
rest_api_url = "https://services.arcgis.com/iQ1dY19aHwbSDYIF/ArcGIS/rest/services/World_Bank_Global_Administrative_Divisions_VIEW/FeatureServer"
# Function to get the correct layer ID based on administrative level
def get_layer_id_for_adm(adm_level):
layers_url = f"{rest_api_url}/layers"
response = requests.get(layers_url, params={'f': 'json'})
if response.status_code == 200:
layers_info = response.json().get('layers', [])
target_layer_name = f"WB_GAD_ADM{adm_level}"
for layer in layers_info:
if layer['name'] == target_layer_name:
return layer['id']
print(f"Layer matching {target_layer_name} not found.")
return None
else:
print(f"Failed to fetch layers. Status code: {response.status_code}")
return None
# Function to fetch the ADM data using the correct layer ID
def get_adm_data(country, adm_level):
layer_id = get_layer_id_for_adm(adm_level)
if layer_id is not None:
query_url = f"{rest_api_url}/{layer_id}/query"
params = {
'where': f"ISO_A3 = '{country}'",
'outFields': '*',
'f': 'geojson'
}
response = requests.get(query_url, params=params)
if response.status_code == 200:
data = response.json()
features = data.get('features', [])
if features:
geometry = [shape(feature['geometry']) for feature in features]
properties = [feature['properties'] for feature in features]
gdf = gpd.GeoDataFrame(properties, geometry=geometry)
return gdf
else:
print("No features found for the specified query.")
return None
else:
print(f"Error fetching data: {response.status_code}")
return None
else:
print("Invalid administrative level or layer mapping not found.")
return None
# Function to download files
def download_file(url, dest_folder):
if not os.path.exists(dest_folder):
os.makedirs(dest_folder)
local_filename = os.path.join(dest_folder, url.split('/')[-1])
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded {local_filename}")
return local_filename
# Mapping of administrative levels to field names
adm_field_mapping = {
0: {'code': 'HASC_0', 'name': 'NAM_0'},
1: {'code': 'HASC_1', 'name': 'NAM_1'},
2: {'code': 'HASC_2', 'name': 'NAM_2'},
# Add mappings for additional levels as needed
}
# Example usage
country = 'BGD' # Bangladesh's ISO_A3 code
adm_level = 2 # Administrative level (e.g., 2)
# Fetch the data
adm_data = get_adm_data(country, adm_level)
if adm_data is not None:
# Step 1: Extract and combine the geometry into a single geometry
ADM_area = adm_data.unary_union # Combine all geometries into one
if not isinstance(ADM_area, (MultiPolygon, shape)):
# If the union results in a non-polygonal shape, handle it accordingly
ADM_area = MultiPolygon([ADM_area])
# Convert to bounding box
bbox = ADM_area.bounds # (minx, miny, maxx, maxy)
# Step 2: Construct the STAC search query
search_query = {
"bbox": list(bbox), # [minx, miny, maxx, maxy]
"collections": ["WSF_Evolution"], # Assuming this is the correct collection name
"limit": 100 # Adjust as needed
}
# Step 3: Send the POST request to the STAC API
stac_search_url = "https://geoservice.dlr.de/eoc/ogc/stac/v1/search"
headers = {
"Content-Type": "application/json"
}
response = requests.post(stac_search_url, headers=headers, json=search_query)
# Step 4: Check the response status and parse the results
if response.status_code == 200:
search_results = response.json()
items = search_results.get("features", [])
if items:
print(f"Found {len(items)} items.")
# Directory to save downloaded files
subfolder_name = f"{country}_tifs"
download_folder = os.path.join(f"wd/data/{country}_WSF_evo/", subfolder_name)
if not os.path.exists(download_folder):
os.makedirs(download_folder)
# Step 5: Download .tif assets into the subdirectory
tif_files = []
for item in items:
print(f"ID: {item['id']}")
for asset_key, asset_value in item['assets'].items():
if asset_value['href'].endswith('.tif'):
print(f"Downloading {asset_key} from {asset_value['href']}")
tif_file = download_file(asset_value['href'], download_folder)
tif_files.append(tif_file)
# Step 6: Mosaic the downloaded .tif files using the subdirectory
if tif_files:
print("Mosaicing downloaded .tif files...")
merge_tifs(download_folder)
# Step 7: Rename the mosaiced file
merged_tif_path = os.path.join(download_folder, f"{subfolder_name}.tif")
output_filename = os.path.join(f"wd/data/{country}_WSF_evo/", f"{country}_WSF-evo.tif")
os.rename(merged_tif_path, output_filename)
print(f"Mosaiced file saved as {output_filename}")
# Delete the tiles subfolder after mosaicing
shutil.rmtree(download_folder)
print(f"Deleted temporary folder: {download_folder}")
else:
print("No .tif files found for mosaicing.")
else:
print("No items found for the specified query.")
else:
print(f"Error {response.status_code}: {response.text}")
else:
print("Missing ADM data!")
Task completed: WorldPop2020 and WSF2019 are fetched and processed automatically by the script as default layers.
Until now, all data must be cooked and hand-feed to the script. The prototype for auto-fetch from API or RES is there for worldpop, only needs refinement with "year" selector.
It should be something like:
The magic stuff (api harvesting):
I am discussing with DLR to make the same thing possible with WSF19 and WSF-Evo data, which would be cool to calculate change of risk across years.