oceanmodeling / searvey

Sea state observational data retrieval
https://searvey.readthedocs.io/en/stable/
GNU General Public License v3.0
22 stars 11 forks source link

Support other variables (tempreture, velocity) from CO-OPS #64

Open saeed-moghimi-noaa opened 2 years ago

saeed-moghimi-noaa commented 2 years ago

@zacharyburnett @pmav99 @brey

Thanks for the great tool. I just want to report that the download process for water temperature went through nicely:

water_temperature

However it generated error on netcdf file output:


In [1]: run data_region.py
 > get data ... water_temperature
  > plot figure for  water_temperature
 > save netcdf file
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File /mnt/c/Users/Saeed.Moghimi/Documents/work/linux_working/00-working/15-stormevents/data_region.py:48, in <module>
     46     if True:
     47         print(' > save netcdf file')
---> 48         data.to_netcdf(var + '.nc')
     51 if True:
     52     import geopandas

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/core/dataset.py:1901, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1898     encoding = {}
   1899 from ..backends.api import to_netcdf
-> 1901 return to_netcdf(
   1902     self,
   1903     path,
   1904     mode,
   1905     format=format,
   1906     group=group,
   1907     engine=engine,
   1908     encoding=encoding,
   1909     unlimited_dims=unlimited_dims,
   1910     compute=compute,
   1911     invalid_netcdf=invalid_netcdf,
   1912 )

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/backends/api.py:1072, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1067 # TODO: figure out how to refactor this logic (here and in save_mfdataset)
   1068 # to avoid this mess of conditionals
   1069 try:
   1070     # TODO: allow this work (setting up the file for writing array data)
   1071     # to be parallelized with dask
-> 1072     dump_to_store(
   1073         dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1074     )
   1075     if autoclose:
   1076         store.close()

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/backends/api.py:1119, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1116 if encoder:
   1117     variables, attrs = encoder(variables, attrs)
-> 1119 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/backends/common.py:261, in AbstractWritableDataStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    258 if writer is None:
    259     writer = ArrayWriter()
--> 261 variables, attributes = self.encode(variables, attributes)
    263 self.set_attributes(attributes)
    264 self.set_dimensions(variables, unlimited_dims=unlimited_dims)

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/backends/common.py:350, in WritableCFDataStore.encode(self, variables, attributes)
    347 def encode(self, variables, attributes):
    348     # All NetCDF files get CF encoded by default, without this attempting
    349     # to write times, for example, would fail.
--> 350     variables, attributes = cf_encoder(variables, attributes)
    351     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    352     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/conventions.py:859, in cf_encoder(variables, attributes)
    856 # add encoding for time bounds variables if present.
    857 _update_bounds_encoding(variables)
--> 859 new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
    861 # Remove attrs from bounds variables (issue #2921)
    862 for var in new_vars.values():

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/conventions.py:859, in <dictcomp>(.0)
    856 # add encoding for time bounds variables if present.
    857 _update_bounds_encoding(variables)
--> 859 new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
    861 # Remove attrs from bounds variables (issue #2921)
    862 for var in new_vars.values():

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/conventions.py:279, in encode_cf_variable(var, needs_copy, name)
    277 var = maybe_default_fill_value(var)
    278 var = maybe_encode_bools(var)
--> 279 var = ensure_dtype_not_object(var, name=name)
    281 for attr_name in CF_RELATED_DATA:
    282     pop_to(var.encoding, var.attrs, attr_name)

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/conventions.py:234, in ensure_dtype_not_object(var, name)
    231             inferred_dtype = np.dtype(float)
    232         fill_value = inferred_dtype.type(np.nan)
--> 234     data = _copy_with_dtype(data, dtype=inferred_dtype)
    235     data[missing] = fill_value
    236 else:

File ~/miniconda3/envs/adcircpy_env/lib/python3.10/site-packages/xarray/conventions.py:195, in _copy_with_dtype(data, dtype)
    189 """Create a copy of an array with the given dtype.
    190
    191 We use this instead of np.array() to ensure that custom object dtypes end
    192 up on the resulting array.
    193 """
    194 result = np.empty(data.shape, dtype)
--> 195 result[...] = data
    196 return result

TypeError: float() argument must be a string or a real number, not 'NAType'

In [2]:

Here is my code:

from shapely.geometry import box
from datetime import datetime,timedelta
from stormevents.coops import coops_product_within_region

import geopandas
from  matplotlib import pyplot as plt

#vars = ['water_level','water_temperature','wind','salinity']
#vars = ['wind','salinity']
vars = ['water_level','water_temperature']
vars = ['water_temperature']

#hsofs bnd
#bnd = -99.0,5,-52.8,46.3    #US mainland
bnd = -98.5 ,-84.5,24.,31.5  #GulfM
bnd = -74.5 , -73.55  , 40.35 ,   41.2  #san_newyork

now_    = datetime.now()
past_   = now_ - timedelta(0.25)

for var in vars:
    print (' > get data ...', var)
    data = coops_product_within_region(
        product = var,
        start_date = past_.isoformat(),
        end_date   = now_.isoformat(),
        region=box(*bnd)
    )

    print (' > plot figure for ' , var)
    plt.figure()
    countries = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

    figure, axis = plt.subplots(1, 1)
    figure.set_size_inches(12, 12 / 1.61803398875)
    countries.plot(color='lightgrey', ax=axis, zorder=-1)
    plt.scatter(x=data.x,y=data.y,c=data.v.mean(axis=1))
    plt.colorbar()
    plt.xlim(*bnd[:2])
    plt.ylim(*bnd[2:])
    axis.set_title(var)
    plt.savefig(var + '.png')
    plt.close('all')

    if True:
        print(' > save netcdf file')
        data.to_netcdf(var + '.nc')

Please look into this.

brey commented 2 years ago

Hi @saeed-moghimi-noaa . The issue is that your dataset has nan values (see for instance data.q variable) that need to be converted to float for writing.

The easiest way is to convert them on the fly with

data.fillna(0).to_netcdf(var + '.nc').

You can choose any value instead of zero.

BTW, you can run the same code with searvey just by replacing

from stormevents.coops import coops_product_within_region

with

from searvey.coops import coops_product_within_region

You can install searvey with

conda install -c conda-forge searvey

saeed-moghimi-noaa commented 2 years ago

Thanks for the prompt reply @brey.

saeed-moghimi-noaa commented 2 years ago

Worked fine. I will close the ticket. Thanks to @bery.

saeed-moghimi-noaa commented 2 years ago

Final code:

from shapely.geometry import box
from datetime import datetime,timedelta
from searvey.coops import coops_product_within_region
from os.path import exists

import geopandas
from  matplotlib import pyplot as plt

vars = ['water_level','water_temperature','wind','salinity','conductivity','currents']

#bnd = -99.0,5,-52.8,46.3    #US mainland
#bnd = -98.5 ,-84.5,24.,31.5  #GulfM
bnd = -74.5 , -71.0 , 40.1 ,   41.55  #newyork

now_    = datetime.now()
past_   = now_ - timedelta(0.25)

print (' > Read US medium shoreline ... ')
if not exists('us_medium_shoreline.shp'):
    countries = geopandas.read_file('https://coast.noaa.gov/htdata/Shoreline/us_medium_shoreline.zip')
    countries.to_file('us_medium_shoreline.shp')
else:
    countries = geopandas.read_file('us_medium_shoreline.shp')

for var in vars:
    print (' > get data ...', var)
    if True:
        # See here https://api.tidesandcurrents.noaa.gov/api/prod/
        data = coops_product_within_region(
            product = var,
            start_date = past_.isoformat(),
            end_date   = now_.isoformat(),
            region     = box(*bnd),
            datum      = 'NAVD'
        )

    print (' > plot figure for ' , var)
    plt.figure()
    figure, axis = plt.subplots(1, 1)
    figure.set_size_inches(12, 12 / 1.61803398875)
    countries.plot(color='lightgrey', ax=axis, zorder=-1)
    plt.scatter(x=data.x,y=data.y,c=data.v.mean(axis=1))
    plt.colorbar()
    plt.xlim(*bnd[:2])
    plt.ylim(*bnd[2:])
    axis.set_title(var)
    plt.savefig(var + '.png')
    plt.close('all')

    if True:
        print(' > save netcdf file')
        data.fillna(-99).to_netcdf(var + '.nc')

Sample outputs:

There is work to be done on the parameters besides water level to:

Main remaining issue:

currents salinity water_level water_temperature wind conductivity

zacharyburnett commented 2 years ago

I did not consider multi-scalar outputs for plotting, that would be a good feature for searvey to support.

saeed-moghimi-noaa commented 2 years ago

@zacharyburnett Do them get included in the final netcdf file?

brey commented 2 years ago

It seems that there is some normalization taking place with variable name change. I see from the API e.g wind has different and more scalar names which are lost.

{"metadata":{"id":"8452660","name":"Newport","lat":"41.5050","lon":"-71.3267"}, "data": [{"t":"2021-08-08 15:00", "s":"8.16", "d":"7.00", "dr":"N", "g":"9.72", "f":"0,0"},{"t":"2021-08-08 15:06", "s":"7.39", "d":"8.00", "dr":"N", "g":"10.89", "f":"0,0"}]}

This is an API issue that we need to tackle. @zacharyburnett I guess this is what you mean above.

For IOC we kept all their names to avoid errors but that means it is up to the user to differentiate.