seareport / coastlines

1 stars 0 forks source link

GSHHG: Ensure that the output is valid topographically in south polar projection, too #7

Open pmav99 opened 3 days ago

pmav99 commented 1 day ago

Maybe something like this:

import cartopy.crs as ccrs
import geopandas as gpd
import geoviews as gv
import panel as pn
import pyproj
import seareport_data as D
import shapely

STEREO = ccrs.NorthPolarStereo()
WORLD_SINUSOIDAL = pyproj.CRS(("esri", 54008))

def sort_by_area(gdf: gpd.GeoDataFrame, equal_area_crs: pyproj.CRS = WORLD_SINUSOIDAL) -> gpd.GeoDataFrame:
    index = gdf.to_crs(equal_area_crs).area.sort_values(ascending=False).index
    sorted_gdf = gdf.iloc[index].reset_index(drop=True)
    return sorted_gdf

def clip_antarctica(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    bbox = shapely.box(-180, -85.5, 180, 90)
    clipped = gpd.clip(gdf, bbox, sort=True)
    return clipped

def simplify_geometry(geo: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    # Make all geometries valid if necessary: https://stackoverflow.com/a/69283950
    if not geo.is_valid.all():
        geo = geo.make_valid().to_frame(name="geometry")
    assert geo.is_valid.all()
    # Merge what can be merged and split the MultiGeometry to simple geometries
    geo = gpd.GeoDataFrame(geometry=[geo.union_all()], crs=geo.crs).explode(ignore_index=True)
    assert geo.is_valid.all()
    return geo

def make_valid(gdf: gpd.GeoDataFrame, crs: pyproj.CRS = STEREO) -> gpd.GeoDataFrame:
    gdf_crs = gdf.to_crs(crs)
    if gdf_crs.is_valid.all():
        valid_crs = gdf_crs
    else:
        valid_crs = gdf_crs.make_valid().explode(ignore_index=True)
        assert valid_crs.is_valid.all(), f"GeoDataFrame not valid in: {crs}"
        assert valid_crs.geom_type.unique() == ["Polygon"]
    return valid_crs.to_crs(gdf.crs)

def normalize_gdf(gdf: gpd.GeoDataFrame):
    clipped = clip_antarctica(gdf)
    simplified = simplify_geometry(clipped)
    valid = make_valid(simplified)
    normalized = sort_by_area(valid)
    return normalized

for resolution in ["c", "l", "i", "h", "f"]:
    for shoreline in ("6", ):
        resolution, shoreline
        gdf = D.gshhg_df(resolution, shoreline)
        ngdf = normalize_gdf(gdf)
        ngdf.to_file(f"{resolution}{shoreline}_n.gpkg")