Deltares / dfm_tools

A Python package for pre- and postprocessing D-FlowFM model input and output files
https://deltares.github.io/dfm_tools/
GNU General Public License v3.0
65 stars 11 forks source link

Consider adding preprocess_schism #642

Open veenstrajelmer opened 10 months ago

veenstrajelmer commented 10 months ago

Attributes in schism ugrid file from MW were not all (properly) set. Maybe future schism files have it corrected, so a function like this has no added value.

import xugrid as xu

file_nc = r"p:\11208420-habitat-kernel-dev\development_cases\Richard_EUNIS_ecotopes_20230403\SCHISM model\conv_schism_raw_2.nc"

def preprocess_schism_test(ds):
    # convert topology_dimension attribute: str to int
    topodim = int(ds["schism_mesh"].attrs["topology_dimension"])
    mesh_attrs = {"topology_dimension":topodim,
                  "name":"SCHISM_hgrid"}
    ds["schism_mesh"] = ds["schism_mesh"].assign_attrs(mesh_attrs)
    # add necessary attributes for face_node_connectivity
    fnc_attrs = {"_FillValue":-1, "start_index":1}
    ds["SCHISM_hgrid_face_nodes"] = ds["SCHISM_hgrid_face_nodes"].assign_attrs(fnc_attrs)
    # set x/y coords, seems not necessary
    # ds = ds.set_coords(["SCHISM_hgrid_node_x","SCHISM_hgrid_node_y"])
    datavar_attrs = {"mesh":"schism_mesh"}
    for varn in ds.data_vars:
        ds[varn] = ds[varn].assign_attrs(datavar_attrs)
    return ds

uds = xu.open_mfdataset(file_nc, preprocess=preprocess_schism_test)
uds.salinity.isel(time=-1, nSCHISM_vgrid_layers=0).ugrid.plot()

SCHISM issue created: https://github.com/schism-dev/schism/issues/114

Also check SHYFEM model output: https://github.com/Deltares/dfm_tools/issues/745

veenstrajelmer commented 10 months ago

Temporary backup:

import matplotlib.pyplot as plt
plt.close("all")
import xarray as xr
import numpy as np
from netCDF4 import default_fillvals
# pip install xugrid, adds ugrid and grid accessors to xarray datasets
import xugrid as xu
# convenient progressbar, this line also checks if dask is installed. This is quite essential for large netcdf files.
from dask.diagnostics import ProgressBar

# dfm_tools is optional, we use dfmt.uda_to_faces(): https://github.com/Deltares/dfm_tools/blob/9ecc5cd59875acd1a3c32506161413ef1d75f7f5/dfm_tools/xugrid_helpers.py#L414
# normally install with `pip install dfm_tools`, but use main branch since `uda_to_faces` was not released yet: `pip install git+https://github.com/deltares/dfm_tools`
import dfm_tools as dfmt

def preprocess_schism_scribedio(ds):
    """
    This preprocessing function describes the minimal changes that would have to be made 
    in the SCHISM output in order for it to be read via ugrid conventions with xugrid.
    It is probably not a complete overview.
    """
    # set varnames
    gridname = "SCHISM_hgrid"
    fnc_varn = f"{gridname}_face_nodes"
    enc_varn = f"{gridname}_edge_nodes"
    node_x = f"{gridname}_node_x"
    node_y = f"{gridname}_node_y"

    # set topology attributes to empty variable
    topo_attrs = {"cf_role": "mesh_topology",
                 "topology_dimension": 2, # has to be int, not str
                 "node_coordinates": f"{node_x} {node_y}",
                 "face_node_connectivity": fnc_varn,
                 "edge_node_connectivity": enc_varn,
                 }
    ds[gridname] = xr.DataArray(np.array(default_fillvals["i4"], dtype=np.int32), attrs=topo_attrs)

    # assign necessary attributes to connectivity variables
    fnc_attrs = {"_FillValue":-1, "start_index":1}
    ds[fnc_varn] = ds[fnc_varn].assign_attrs(fnc_attrs)
    ds[enc_varn] = ds[enc_varn].assign_attrs(fnc_attrs)

    # set node_x/node_y as coordinate variables instead of data_vars
    ds = ds.set_coords([node_x,node_y])

    # to prevent xugrid UserWarning, but this is hardcoded and it should be different for lat/lon models.
    # "UserWarning: No standard_name of ('projection_x_coordinate', 'longitude', 'projection_y_coordinate', 'latitude') in
    # ['SCHISM_hgrid_node_x', 'SCHISM_hgrid_node_y']. Using SCHISM_hgrid_node_x and SCHISM_hgrid_node_y as projected x and y coordinates."
    projected = True
    if projected:
        ds[node_x] = ds[node_x].assign_attrs({"standard_name":"projection_x_coordinate"})
        ds[node_y] = ds[node_y].assign_attrs({"standard_name":"projection_y_coordinate"})
    else:
        ds[node_x] = ds[node_x].assign_attrs({"standard_name":"longitude"})
        ds[node_y] = ds[node_y].assign_attrs({"standard_name":"latitude"})

    # add variable with coordinate system, optional but convenient for loading into QGIS and other tools
    # not yet properly read/updated by xugrid: https://github.com/Deltares/xugrid/issues/42
    if projected:
        grid_mapping_name = 'Unknown projected'
        crs_varn = 'projected_coordinate_system'
        crs_num = 25832 #UTM Zone 32N from communication with BJ
    else:
        grid_mapping_name = 'latitude_longitude'
        crs_varn = 'wgs84'
        crs_num = 4326
    crs_str = f'EPSG:{crs_num}'
    crs_attrs = {'epsg': crs_num, # epsg or EPSG_code are required for correct interpretation by QGIS
                  'EPSG_code': crs_str, # epsg or EPSG_code are required for correct interpretation by QGIS
                  'grid_mapping_name': grid_mapping_name,
                  }
    ds[crs_varn] = xr.DataArray(np.array(default_fillvals['i4'],dtype=int),dims=(),attrs=crs_attrs)

    # mesh attribute is required for d-ecoimpact #TODO: why?
    # valueable other attrs are "location" (node/face/edge), 
    # "standard_name", "long_name", "units", "grid_mapping"
    for varn in ds.data_vars:
        ds[varn] = ds[varn].assign_attrs({'mesh': gridname})

    # time requires "units" attribute to be converted by xarray and other tools
    # refdate taken from params.nml
    ds['time'] = ds['time'].assign_attrs({"units":"seconds since 2017-01-02 00:00:00"})
    ds = xr.decode_cf(ds)

    #TODO: set _FillValue attribute for data_vars, test dataset did not seem to have nans

    return ds

file_nc_pat = r"c:\Users\veenstra\Downloads\example\1_raw_outputs\*.nc"
file_nc_out = r"c:\Users\veenstra\Downloads\example\dataset_ugrid_wgs84.nc"

# open the file with xarray, add ugrid conventions with preprocessing function and convert to UgridDataset
chunks = {"time":1} # using different chunks might improve or ruin perfomance

# open_mfdataset works but it adds a time dimension in case of multiday files
# adding `data_vars="minimal"` prevents this
print("open_mfdataset for all files")
ds = xr.open_mfdataset(file_nc_pat, chunks=chunks, data_vars="minimal")

# to check if the merging was correct, we had a time dimension in the enc with combine="by_coords"
assert len(ds.SCHISM_hgrid_edge_nodes.shape) == 2

print("preprocess_schism, ugriddataset")
ds = preprocess_schism_scribedio(ds)
uds = xu.UgridDataset(ds)

# then convert coordinates, also converts standard names from projection_x_coordinate to longitude
print("convert coordinates")
uds.ugrid.set_crs("EPSG:25832") #UTM Zone 32N from communication with BJ
uds_wgs84 = uds.ugrid.to_crs("EPSG:4326")
# update crs attrs for QGIS since xugrid does not this yet upon crs conversion: https://github.com/Deltares/xugrid/issues/42
uds_wgs84 = uds_wgs84.rename({'projected_coordinate_system':'wgs84'})
crs_attrs_new = {"epsg": 4326, "EPSG_code": "EPSG:4326", "grid_mapping_name":"latitude_longitude"}
uds_wgs84['wgs84'] = uds_wgs84['wgs84'].assign_attrs(crs_attrs_new)

print("interpolate nodes to faces")
dimname = uds.grid.node_dimension
varlist_tofaces = uds.data_vars
varlist_tofaces = ["salinity"] # you can also provide a different list of variable names here, or uds.data_vars to do all
for varname in varlist_tofaces: 
    uds_wgs84[varname] = dfmt.uda_to_faces(uds_wgs84[varname])

# test with a ugrid plot to show the salinity variable was properly connected to the ugrid topology
print("plot timestep")
fig, ax = plt.subplots()
uds_wgs84.salinity.isel(time=-1, nSCHISM_vgrid_layers=0).ugrid.plot()

# write to netcdf (per chunk, this takes a while)
print("writing to ugrid compliant netcdf file, takes a while")
with ProgressBar(): # adds a convenient progress bar
    uds_wgs84.ugrid.to_netcdf(file_nc_out.replace(".nc","_test.nc"))