SpatioTemporal / STAREPandas

STAREpandas adds SpatioTemporal Adaptive Resolution Encoding (STARE) support to pandas DataFrames. https://starepandas.readthedocs.io/en/latest/
MIT License
4 stars 1 forks source link

Trixels truncated at dateline #143

Open mbauer288 opened 1 year ago

mbauer288 commented 1 year ago

Here's an example program to demonstrate this.

#! /usr/bin/env python -tt
# -*- coding: utf-8; mode: python -*-
r"""

dateline_issue.py
~~~~~~~~~~~~~~~~~

"""
# Standard Imports
import os
import pickle

# Third-Party Imports
from matplotlib import cm
from matplotlib import font_manager
from shapely.geometry import Polygon
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import dask
import geopandas
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas

# STARE Imports
import pystare
import starepandas

# ------------------------------------------------------------------------------

def main():
    ##
    # Tweak Pandas screen dump
    pandas.set_option("display.max_columns", None)
    pandas.set_option("display.width", 500)
    pandas.set_option("display.colheader_justify", "right")
    pandas.set_option("display.precision", 3)
    pandas.options.mode.copy_on_write = True

    ##
    # Tweak GeoPandas screen dump
    geopandas.options.display_precision = 3
    geopandas.options.use_pygeos = False

    # Mike's local copy
    # feature_data = "/Users/mbauer/tmp/data/tablespace/xcal/pickles/featuredb.pkl"
    # imerg_sidecar = "IMERG_stare.nc"

    # for some reason the FlexFS servers /bayesics folder is empty, but was something like this
    feature_data = "/tablespace/xcal/pickles/featuredb.pickle"
    imerg_sidecar = "/bayesics/p4/Source/GPM/IMERG_stare.nc"

    draw_plots = [False, True][0]

    ##
    # Read the IMERG Features file into a STAREPandas DataFrame
    with open(feature_data, 'rb') as f:
        imerg_sdf = pickle.load(f)

    ##
    # Extract a single feature/event
    #   subset_sdf =  <class 'starepandas.staredataframe.STAREDataFrame'>
    """
             index  label           timestamp                                                  x                                                  y                                         cell_areas   tot_area                                            precips  tot_precip                                               sids                                              cover                                            trixels
        0     6077     37 2021-02-05 14:30:00                [542, 542, 542, 543, 543, 544, 544]         [3233, 3234, 3235, 3234, 3235, 3233, 3234]  [100345481.30079782, 100345481.30079782, 10034...  7.032e+08  [1.355738, 2.0544746, 1.5360532, 1.3778167, 1....   5.003e+05  [904835998274304745, 904828974740792201, 90481...  [904812307752681481, 904814506775937033, 90481...  MULTIPOLYGON (((143.676 35.644, 143.460 35.663...
        1     6078     37 2021-02-05 15:00:00           [541, 541, 542, 542, 543, 543, 543, 544]   [3234, 3235, 3234, 3235, 3233, 3234, 3235, 3233]  [100219248.63592072, 100219248.63592072, 10034...  8.031e+08  [1.2717158, 1.9754196, 1.1044679, 1.495765, 1....   5.529e+05  [904827950646811209, 904933268391128745, 90482...  [904812307752681481, 904816705799192585, 90482...  MULTIPOLYGON (((143.676 35.644, 143.460 35.663...
        2     6079     37 2021-02-05 15:30:00           [540, 540, 541, 541, 542, 542, 542, 543]   [3235, 3236, 3235, 3236, 3234, 3235, 3236, 3234]  [100092710.68594712, 100092710.68594712, 10021...  8.021e+08  [1.5025604, 2.3538067, 1.3093063, 1.7862825, 1...   6.881e+05  [904919807797595113, 904934765162898217, 90493...  [904812307752681481, 904827700915470345, 90483...  MULTIPOLYGON (((143.676 35.644, 143.460 35.663...
        3     6080     37 2021-02-05 16:00:00  [534, 534, 534, 539, 539, 539, 539, 540, 540, ...  [3230, 3231, 3232, 3236, 3237, 3238, 3239, 323...  [99327093.64970006, 99327093.64970006, 9932709...  1.499e+09  [2.548102, 3.5231876, 1.6926019, 1.8397847, 1....   1.545e+06  [904296384236998153, 904297886683378313, 90429...  [904293338264371209, 904295537287626761, 90429...  MULTIPOLYGON (((143.317 36.440, 143.172 36.643...
        4     6081     37 2021-02-05 16:30:00  [532, 533, 533, 533, 534, 538, 538, 538, 538, ...  [3231, 3231, 3232, 3233, 3232, 3238, 3239, 324...  [99069463.27244306, 99198429.54881625, 9919842...  1.796e+09  [1.42, 2.77, 3.83, 1.8399999, 1.0699999, 2.0, ...   1.877e+06  [904308668621248745, 904300510651838537, 90430...  [904293338264371209, 904299935334137865, 90430...  MULTIPOLYGON (((143.317 36.440, 143.172 36.643...
        ..     ...    ...                 ...                                                ...                                                ...                                                ...        ...                                                ...         ...                                                ...                                                ...                                                ...
        199   6276     37 2021-02-09 18:00:00  [568, 568, 568, 569, 569, 569, 569, 569, 570, ...  [3597, 3598, 3599, 3595, 3596, 3597, 3598, 359...  [103519134.98180684, 103519134.98180684, 10351...  1.392e+10  [1.1695099, 1.6371919, 1.568149, 1.2314502, 1....   1.119e+07  [867223753433970025, 867209926233394793, 86714...  [867083665757175816, 867092461850198024, 86710...  MULTIPOLYGON (((179.472 32.661, 179.834 32.486...
        200   6277     37 2021-02-09 18:30:00  [571, 572, 572, 573, 573, 573, 573, 574, 574, ...  [3599, 3598, 3599, 3595, 3597, 3598, 3599, 359...  [103871730.60377292, 103988630.13166831, 10398...  1.282e+10  [1.9399999, 1.0699999, 2.1299999, 1.0, 1.59999...   1.045e+07  [867208080067029545, 867101236360906281, 86709...  [867083665757175816, 867092461850198025, 86709...  MULTIPOLYGON (((179.472 32.661, 179.834 32.486...
        201   6278     37 2021-02-09 19:00:00  [573, 574, 574, 575, 575, 575, 576, 576, 576, ...  [3599, 3597, 3599, 3597, 3598, 3599, 3597, 359...  [104105212.892217, 104221478.53028575, 1042214...  1.020e+10  [1.4010634, 1.1362529, 1.8140539, 1.2430454, 1...   1.036e+07  [868931415636743593, 867116255144589001, 86893...  [867085864780431369, 867114452082753545, 86778...  MULTIPOLYGON (((179.727 32.334, 179.653 32.574...
        202   6279     37 2021-02-09 19:30:00  [576, 577, 577, 578, 578, 579, 579, 579, 580, ...  [3599, 3598, 3599, 3598, 3599, 3597, 3598, 359...  [104453057.02328122, 104568369.17277576, 10456...  3.893e+09  [1.2154739, 1.3653224, 1.4656936, 1.0716105, 1...   3.633e+06  [868943611143033993, 868920339897876713, 86894...  [867787353198952456, 867796149291974665, 86780...  MULTIPOLYGON (((179.981 32.006, 179.769 31.702...
        203   6280     37 2021-02-09 20:00:00                          [584, 584, 585, 585, 586]                     [3598, 3599, 3598, 3599, 3599]  [105366615.73733008, 105366615.73733008, 10547...  5.273e+08  [1.0232406, 1.1071403, 1.07733, 1.2315224, 1.2...   2.991e+05  [867801516070089001, 867803219036514633, 86779...  [867796149291974665, 867800547338485769, 86780...  MULTIPOLYGON (((179.915 31.223, 179.842 31.463...
    [204 rows x 12 columns]
    """
    subset_sdf = imerg_sdf[imerg_sdf["label"].isin([37])]
    subset_sdf = subset_sdf.reset_index()
    del imerg_sdf

    # ##
    # # TMP save time debugging (comment out to "Find the SIDs for verts/geometry" after save)
    # with open('tmp.pkl', "wb") as pfile:
    #     pickle.dump(subset_sdf, pfile, pickle.HIGHEST_PROTOCOL)

    # ##
    # # TMP save time debugging (comment out above to "Find the SIDs for verts/geometry")
    # with open('tmp.pkl', 'rb') as f:
    #   subset_sdf = pickle.load(f)

    # <class 'pandas.core.series.Series'>
    single_sdf = subset_sdf.iloc[126, :]
    print(f"{type(single_sdf) = }")
    """
    sids (4060)  : [874279450188275529 ... 3584450484248280905]

    cover (617)  : [874278869849341960 ... 3584449687991615497]

    tixels       : MULTIPOLYGON (with 618 Polygons)

    Looping over these trixel Polygons I see that no trixels have longitudes past -180.
        tmp[  0] max/min lon =  +168.44 ...  +168.84
        tmp[  1] max/min lon =  +168.74 ...  +168.94
        tmp[  2] max/min lon =  +168.74 ...  +168.94
        ...
        tmp[615] max/min lon =  +176.25 ...  +176.59
        tmp[616] max/min lon =  +176.00 ...  +176.34
        tmp[617] max/min lon =  -180.00 ...  -179.97

        Min Lon  -180.00
        Max Lon  -179.97

    Now if I convert the SIDs into vertices I see something different:
        verts = starepandas.to_vertices(sids, wrap_lon=False)
            corner lats (12180,      30.74,      48.46):
            corner lons (12180,     162.82,     180.03):
            center lats (4060,       30.87,      48.37):
            center lons (4060,      162.93,     179.89):
    So the corner lons just barely cross the DL at 180.03 deg.

    But if I switch wrap_lon:
        verts = starepandas.to_vertices(sids, wrap_lon=True)
            corner lats (12180,      30.74,      48.46):
            corner lons (12180,    -179.97,     179.92):
            center lats (4060,       30.87,      48.37):
            center lons (4060,      162.93,     179.89):
    Now the corners lons span the interior between the DL +/-179 deg.
    """
    sids = single_sdf["sids"]
    cover = single_sdf["cover"]
    tixels = single_sdf["trixels"]

    print(f"sids {sids.shape}  : [{np.amin(sids)} ... {np.amax(sids)}]")
    print(f"cover {cover.shape}: [{np.amin(cover)} ... {np.amax(cover)}]")
    print(f"tixels : MULTIPOLYGON(with {len(tixels.geoms)} Polygons)")
    lons = []
    for inner_poly in range(len(tixels.geoms)):
        tmp = list(tixels.geoms[inner_poly].exterior.coords)
        lons.extend([_[0] for _ in tmp])
        tmp_lons = sorted([_[0] for _ in tmp])
        print(f"tmp[{inner_poly:3d}] max/min lon = {tmp_lons[0]:+8.2f} ... {tmp_lons[-1]:+8.2f}")
    print(f"Min lon {min(tmp_lons):+8.2f}")
    print(f"Max lon {max(tmp_lons):+8.2f}")

    verts = starepandas.to_vertices(sids, wrap_lon=False)
    print(f"\n{verts = }")
    plabs = ("corner lats", "corner lons", "center lats", "center lons")
    for pidx, part in enumerate(verts):
        print(f"\t{plabs[pidx]} ({len(part)}, {np.amin(part):10.2f}, {np.amax(part):10.2f}): {part}")

    verts = starepandas.to_vertices(sids, wrap_lon=True)
    print(f"\n{verts = }")
    plabs = ("corner lats", "corner lons", "center lats", "center lons")
    for pidx, part in enumerate(verts):
        print(f"\t{plabs[pidx]} ({len(part)}, {np.amin(part):10.2f}, {np.amax(part):10.2f}): {part}")

    if draw_plots:
        plot_dpi =  300
        plot_size = [12, 4]

        # WGS-84 Earth equatorial radius at sea level (meters)
        #   STARE uses ccrs.Globe(datum='WGS84', ellipse='WGS84') https://epsg.io/4326
        globe = ccrs.Globe(datum='WGS84', ellipse='WGS84')
        lon_0_global = 0.0
        lat_0_global = 90.0

        ##
        # Map extent
        min_lat, max_lat = 20, 70
        min_lon, max_lon = -180, 180

        # Determine the map extent (left_lon, right_lon, lower_lat, upper_lat) for the plot domain
        map_extent = (min_lon, max_lon, min_lat, max_lat)

        ##
        # Utility CRS

        # Geodetic:
        #   A 3D/spherical CRS based on latitude and longitude where geographical distance and coordinates are measured in degrees.
        geod_crs = ccrs.Geodetic(globe=globe)

        # PlateCarree Projection (AKA Equirectangular or Equidistant Cylindrical):
        #   A 2D CRS with a flat topology and Euclidean distance (m).
        flat_crs = ccrs.PlateCarree(central_longitude=lon_0_global, globe=globe)

        ##
        # Main Map CRS

        # PlateCarree Projection (AKA Equirectangular or Equidistant Cylindrical):
        #   A 2D CRS with a flat topology and Euclidean distance (m).
        map_crs = ccrs.PlateCarree(central_longitude=lon_0_global, globe=globe)
        proj_title = "PlateCarree"

        # Lambert Azimuthal Equal-Area
        #   A 2D CRS centered on (map_lat0, map_lon0), which is then the zero point of the flat topology w/ Euclidean distance (m).
        # polar_crs = ccrs.LambertAzimuthalEqualArea(central_longitude=lon_0_global, central_latitude=lat_0_global, globe=globe)

        # Tweak CRS
        map_crs._threshold /= 100.

        for index, row in subset_sdf.iterrows():
            #if index < 126:
            if index < 146:
                continue
            ##
            # Make Figure
            # --------------------------------------------------------------------------
            fig = plt.figure(figsize=(12, 4), frameon=True)
            geo_axes = plt.axes(projection=map_crs)

            ##
            # Set map extent
            geo_axes.set_xlim(left=min_lon, right=max_lon)
            geo_axes.set_ylim(bottom=min_lat, top=max_lat)

            ##
            # Color Land/Sea
            geo_axes.add_feature(cfeature.LAND, alpha=0.5)
            geo_axes.add_feature(cfeature.COASTLINE, alpha=0.5)

            dlat = 10
            par = np.arange(min_lat, max_lat + dlat, dlat)
            dlon = 30
            mer = np.arange(min_lon, max_lon + dlon, dlon)
            gl = geo_axes.gridlines(crs=map_crs, draw_labels=True, linewidth=0.5, color='k', alpha=0.5,
                                    linestyle='--', x_inline=False, y_inline=False)

            sdf_ = starepandas.STAREDataFrame({'index': [row.index]}, sids=[row.sids], trixels=[row.trixels])
            sdf_.plot(ax=geo_axes, trixels=True, boundary=True, figsize=plot_size, aspect=None, zorder=2, linewidth=0.5, color='r', alpha=0.7, transform=geod_crs)
            geo_axes.set_title(f"Feature {str(row.label)}: index {str(index)}")
            plt.show()
            plt.clf()
            plt.close('all')

        return

# ---Start of main code block.
if __name__ == '__main__':
    #
    # Local DASK Processes (https://dask.pydata.org/en/latest/scheduling.html?highlight=multiprocessing)
    #
    # To use DASK locally (i.e., cores on the machine you are working on) your python script must have a main()
    #   method and you need to import dask and issue the following directive. If not, STARE requests such as
    #       sids = starepandas.sids_from_geoseries(polys, level=10, convex=False, force_ccw=True, n_partitions=2, num_workers=4)
    #   Throws a RuntimeError:
    #        An attempt has been made to start a new process before the
    #        current process has finished its bootstrapping phase.
    #
    #        This probably means that you are not using fork to start your
    #        child processes and you have forgotten to use the proper idiom
    #        in the main module:
    #
    #            if __name__ == '__main__':
    #                freeze_support()
    #                ...
    #
    #        The "freeze_support()" line can be omitted if the program
    #        is not going to be frozen to produce an executable.
    #
    dask.config.set(scheduler='processes')

    ##
    # Run the main routine
    main()

I should add some potential dependency issues that might be at work here (e.g., Pandas 2.0).

Name Version
astropy 5.2.2
cartopy 0.21.1
gdal 3.6.3
geopandas 0.12.2
geopandas-base 0.12.2
geos 3.11.2
h5py 3.8.0
hdf4 4.2.15
hdf5 1.12.2
hdfeos2 2.20
matplotlib 3.7.1
numpy 1.24.2
pandas 2.0.0
proj 9.1.1
pygeos 0.14
pyhdf 0.10.5
pyproj 3.5.0
pyshp 2.3.1
pytest 7.3.1
python 3.11.3
shapely 2.0.1
STARE Installs
pystare 0.8.12 STARE
staremaster 0.0.4 STARE
starepandas 0.6.6 STARE
NiklasPhabian commented 10 months ago

I think this is connected to https://github.com/SpatioTemporal/STAREPandas/issues/156