pydata / xarray

N-D labeled arrays and datasets in Python
https://xarray.dev
Apache License 2.0
3.62k stars 1.08k forks source link

Opendap access failure error #4043

Closed aragong closed 11 months ago

aragong commented 4 years ago

Hi all,

We are having some trouble with opendap access to our own thredds. I've detected that when I tried to access to small subsets of small variable arrays, opendap works perfect but now I obtained this error when I was trying to access to some ERA5 netCDF files (~8gb) with Xarray. Check my example code bellow:

import xarray as xr
import os
from datetime import datetime, timedelta
import pandas as pd
import shutil
import numpy as np
import time

tic = time.perf_counter()

# ------------------------------------------------------------------------------------------------------------------
# Inputs Example:
lonlat_box = [-4.5, -2.5, 44, 45]
date_ini =  datetime(1998,5,28,12)
date_end =  datetime(1998,6,1,12)
output_path = r'test_inputs\ERA5\data'
source_path = r'http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global'
# source_path = r'D:\2020_REPSOL\Codigos_input_TESEO\raw'
dl = 0.5
# ------------------------------------------------------------------------------------------------------------------

# Create results folders and paths
if os.path.exists(output_path):
    pass
else:
    os.makedirs(output_path)

# Change to (-180,180) if there is 0 to 360
if lonlat_box[0] > 180:
    lonlat_box[0] = lonlat_box[0] - 360
if lonlat_box[1] > 180:
    lonlat_box[1] = lonlat_box[1] - 360

# Check coordinates
if lonlat_box[0] < -19 or lonlat_box[1] > 5 or lonlat_box[2] < 26 or lonlat_box[3] > 56:
    print("Invalid coordinates! coordinates must be Lon:(-19º,5º) and Lat:(26º,56)")
    exit()

# Check time range
if date_ini<datetime(1992,1,1,0) or date_end>datetime(2017,12,31,23):
    print("Invalid time range! This database provide data from 01/1992 to 12/2017")
    exit()

# Create a tuple to store Lon Lat
Lon = (lonlat_box[0], lonlat_box[1])
Lat = (lonlat_box[2], lonlat_box[3])
del lonlat_box

# Create date list of files to be loaded
dates = pd.date_range(start=date_ini, end=date_end, closed=None, freq='D')
file_list = []
for date in dates:
    p = list([source_path + '/Wind_ERA5_Global_' + date.strftime("%Y") + '.' + date.strftime("%m") + '.nc'])
    file_list = file_list + p

# Delete repeated elements
file_list = list(dict.fromkeys(file_list))
print('Loading files: \n{}\n'.format("\n".join(file_list)))

# Load data
# ds = xr.open_mfdataset(file_list)
ds = xr.open_mfdataset(file_list)

# Select variables
ds = ds.get(['u', 'v'])

# from 0º,360º to -180º,180º
ds['lon'] = (ds.lon + 180) % 360 - 180
ds = ds.sortby('lon', 'lat')

# Select spatial subset [lon,lat]
ds = ds.where((ds.lon >= Lon[0] - dl) & (ds.lon <= Lon[1] + dl) & (ds.lat >= Lat[0] - dl) & (ds.lat <= Lat[1] + dl), drop=True)

# Select temporal subset
ds = ds.where((ds.time >= np.datetime64(date_ini)) & (ds.time <= np.datetime64(date_end)), drop=True)

# Create depth-layers file for 2D simulation
winds_list = []

# From xarray to dataframe
df = ds.to_dataframe()

Problem Description

If I run the process with local data the code runs perfect and there is no problem at all. I previously downloaded to my local PC two files to perform this test.

But when I used the opendap to generalize the process for any date using the opendap url

source_path = r'http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global'

I found this error

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
d:\2020_REPSOL\Codigos_input_TESEO\draft_code.py in 
     82 
     83 # From xarray to dataframe
---> 84 df = ds.to_dataframe()
     85 
     86 df = df.reset_index()

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in to_dataframe(self)
   3335         this dataset's indices.
   3336         """
-> 3337         return self._to_dataframe(self.dims)
   3338 
   3339     @classmethod

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in _to_dataframe(self, ordered_dims)
   3324         columns = [k for k in self.variables if k not in self.dims]
   3325         data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
-> 3326                 for k in columns]
   3327         index = self.coords.to_index(ordered_dims)
   3328         return pd.DataFrame(OrderedDict(zip(columns, data)), index=index)

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in (.0)
   3324         columns = [k for k in self.variables if k not in self.dims]
   3325         data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
-> 3326                 for k in columns]
   3327         index = self.coords.to_index(ordered_dims)
   3328         return pd.DataFrame(OrderedDict(zip(columns, data)), index=index)

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\variable.py in values(self)
    390     def values(self):
    391         """The variable's data as a numpy.ndarray"""
--> 392         return _as_array_or_item(self._data)
    393 
    394     @values.setter

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\variable.py in _as_array_or_item(data)
    211     TODO: remove this (replace with np.asarray) once these issues are fixed
    212     """
--> 213     data = np.asarray(data)
    214     if data.ndim == 0:
    215         if data.dtype.kind == 'M':

~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
    539 
    540 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\array\core.py in __array__(self, dtype, **kwargs)
    996 
    997     def __array__(self, dtype=None, **kwargs):
--> 998         x = self.compute()
    999         if dtype and x.dtype != dtype:
   1000             x = x.astype(dtype)

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
    154         dask.base.compute
    155         """
--> 156         (result,) = compute(self, traverse=False, **kwargs)
    157         return result
    158 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
    396     keys = [x.__dask_keys__() for x in collections]
    397     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 398     results = schedule(dsk, keys, **kwargs)
    399     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    400 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     74     results = get_async(pool.apply_async, len(pool._pool), dsk, result,
     75                         cache=cache, get_id=_thread_get_id,
---> 76                         pack_exception=pack_exception, **kwargs)
     77 
     78     # Cleanup pools associated to dead threads

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    460                         _execute_task(task, data)  # Re-execute locally
    461                     else:
--> 462                         raise_exception(exc, tb)
    463                 res, worker_id = loads(res_info)
    464                 state['cache'][key] = res

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\compatibility.py in reraise(exc, tb)
    110         if exc.__traceback__ is not tb:
    111             raise exc.with_traceback(tb)
--> 112         raise exc
    113 
    114     import pickle as cPickle

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    228     try:
    229         task, data = loads(task_info)
--> 230         result = _execute_task(task, data)
    231         id = get_id()
    232         result = dumps((result, id))

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    117         func, args = arg[0], arg[1:]
    118         args2 = [_execute_task(a, cache) for a in args]
--> 119         return func(*args2)
    120     elif not ishashable(arg):
    121         return arg

~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\array\core.py in getter(a, b, asarray, lock)
     80         c = a[b]
     81         if asarray:
---> 82             c = np.asarray(c)
     83     finally:
     84         if lock:

~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
    539 
    540 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    602 
    603     def __array__(self, dtype=None):
--> 604         return np.asarray(self.array, dtype=dtype)
    605 
    606     def __getitem__(self, key):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
    539 
    540 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    508     def __array__(self, dtype=None):
    509         array = as_indexable(self.array)
--> 510         return np.asarray(array[self.key], dtype=None)
    511 
    512     def transpose(self, order):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
    539 
    540 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
     66 
     67     def __array__(self, dtype=None):
---> 68         return self.func(self.array)
     69 
     70     def __repr__(self):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in _scale_offset_decoding(data, scale_factor, add_offset, dtype)
    182 
    183 def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
--> 184     data = np.array(data, dtype=dtype, copy=True)
    185     if scale_factor is not None:
    186         data *= scale_factor

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
     66 
     67     def __array__(self, dtype=None):
---> 68         return self.func(self.array)
     69 
     70     def __repr__(self):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
    133 ) -> np.ndarray:
    134     """Mask all matching values in a NumPy arrays."""
--> 135     data = np.asarray(data, dtype=dtype)
    136     condition = False
    137     for fv in encoded_fill_values:

~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
    539 
    540 

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    508     def __array__(self, dtype=None):
    509         array = as_indexable(self.array)
--> 510         return np.asarray(array[self.key], dtype=None)
    511 
    512     def transpose(self, order):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\netCDF4_.py in __getitem__(self, key)
     62         return indexing.explicit_indexing_adapter(
     63             key, self.shape, indexing.IndexingSupport.OUTER,
---> 64             self._getitem)
     65 
     66     def _getitem(self, key):

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    776     """
    777     raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 778     result = raw_indexing_method(raw_key.tuple)
    779     if numpy_indices.tuple:
    780         # index the loaded np.ndarray

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\netCDF4_.py in _getitem(self, key)
     73             with self.datastore.lock:
     74                 original_array = self.get_array(needs_lock=False)
---> 75                 array = getitem(original_array, key)
     76         except IndexError:
     77             # Catch IndexError in netCDF4 and return a more informative

~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\common.py in robust_getitem(array, key, catch, max_retries, initial_delay)
     53     for n in range(max_retries + 1):
     54         try:
---> 55             return array[key]
     56         except catch:
     57             if n == max_retries:

netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()

netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable._get()

netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()

RuntimeError: NetCDF: Access failure

We thought that can be related to the opendap service config in the thredds, and we try to raise by x100 and even x1000 these parameters.

<Opendap>
    <ascLimit>50</ascLimit>
    <binLimit>500</binLimit>
    <serverVersion>opendap/3.7</serverVersion>
  </Opendap>

The result of these changes is that now the error at the end says: RuntimeError: NetCDF: file not found

We do not know how to do to properly fix opendap access to this information, any help is highly appreciated.

Thank you in advance!!

dcherian commented 4 years ago

It's unfortunate that we don't print filenames when access fails.

Are you sure all the urls you construct are actually valid?

aragong commented 4 years ago

Totally agree,

from my code the list of url are:

Loading files: 
http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.05.nc
http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.06.nc

and through the web browser I can copy paste for that dates this:

http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.05.nc
http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.06.nc

So I think the URL is properly constructed, indeed if I select only the longitude variable, which is quit small, I can perform the ds.to_dataframe() method... so I think url is fine!

ocefpaf commented 4 years ago

How are you installing netcdf4? There was a problem with the underlying libetcdf some time ago that caused access failures like that. You can try upgrading it or using another backend, like pydap.

aragong commented 4 years ago

thank you @ocefpaf ,

I installed xarray through the recommended command in the official website in my minicoda env some months-year ago:

conda install -c conda-forge xarray dask netCDF4 bottleneck

I list my versions below:

INSTALLED VERSIONS
------------------
commit: None
python: 3.6.7 (default, Feb 28 2019, 07:28:18) [MSC v.1900 64 bit (AMD64)]
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 42 Stepping 7, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
libhdf5: 1.10.4
libnetcdf: 4.6.2

xarray: 0.12.1
pandas: 0.24.2
numpy: 1.16.3
scipy: 1.2.1
netCDF4: 1.5.1.2
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: None
cftime: 1.0.3.4
nc_time_axis: 1.2.0
PseudonetCDF: None
rasterio: None
cfgrib: 0.9.6.2
iris: None
bottleneck: None
dask: 1.1.5
distributed: 1.28.1
matplotlib: 3.0.3
cartopy: 0.16.0
seaborn: None
setuptools: 41.0.1
pip: 19.1.1
conda: 4.8.2
pytest: None
IPython: 7.5.0
sphinx: None

I'v just created a new environment with python3.7 and all last versions and the result is the same error, I list this new environment below also:

INSTALLED VERSIONS
------------------
commit: None
python: 3.7.7 (default, May  6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)]
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 42 Stepping 7, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
libhdf5: 1.10.4
libnetcdf: 4.7.3

xarray: 0.15.1
pandas: 1.0.3
numpy: 1.18.1
scipy: 1.4.1
netCDF4: 1.5.3
pydap: installed
h5netcdf: None
h5py: None
Nio: None
zarr: None
cftime: 1.1.2
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: 1.3.2
dask: 2.15.0
distributed: 2.15.2
matplotlib: None
cartopy: None
seaborn: None
numbagg: None
setuptools: 46.1.3.post20200330
pip: 20.0.2
conda: None
pytest: None
IPython: 7.13.0
sphinx: None

Thank you in advance!

ocefpaf commented 4 years ago

I installed xarray through the recommended command in the official website in my minicoda env some months-year ago

That is probably it then. I see you have libnetcdf 4.6.2, if you recreate that env you should get libnetcdf 4.7.4. Can you try it with a new clean env:

conda create --name TEST --channel conda-forge xarray dask netCDF4 bottleneck
aragong commented 4 years ago

Thank you @ocefpaf!

But it raised the same error. I also try to load "u" variable with matlab ncread through opendap and also failed! So maybe is not a problem related with python...? I am very confused!


http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.05.nc
http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.06.nc

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
d:\2020_REPSOL\Codigos_input_TESEO\user_script.py in 
     58 #                     )
     59 
---> 60 ERA5_windIHData2txt_TESEO(lonlat_box=[-4.5, -2.5, 44, 45],
     61                     date_ini=datetime(1998, 5, 28, 0),
     62                     date_end=datetime(1998, 6, 1, 12),

d:\2020_REPSOL\Codigos_input_TESEO\TESEOtools_v0.py in ERA5_windIHData2txt_TESEO(***failed resolving arguments***)
    826 
    827     # From xarray to dataframe
--> 828     df = ds.to_dataframe().reset_index()
    829     del ds
    830     print('[Processing currents 2D...]')

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\dataset.py in to_dataframe(self)
   4503         this dataset's indices.
   4504         """
-> 4505         return self._to_dataframe(self.dims)
   4506 
   4507     def _set_sparse_data_from_dataframe(

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\dataset.py in _to_dataframe(self, ordered_dims)
   4489     def _to_dataframe(self, ordered_dims):
   4490         columns = [k for k in self.variables if k not in self.dims]
-> 4491         data = [
   4492             self._variables[k].set_dims(ordered_dims).values.reshape(-1)
   4493             for k in columns

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\dataset.py in (.0)
   4490         columns = [k for k in self.variables if k not in self.dims]
   4491         data = [
-> 4492             self._variables[k].set_dims(ordered_dims).values.reshape(-1)
   4493             for k in columns
   4494         ]

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\variable.py in values(self)
    444     def values(self):
    445         """The variable's data as a numpy.ndarray"""
--> 446         return _as_array_or_item(self._data)
    447 
    448     @values.setter

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\variable.py in _as_array_or_item(data)
    247     TODO: remove this (replace with np.asarray) once these issues are fixed
    248     """
--> 249     data = np.asarray(data)
    250     if data.ndim == 0:
    251         if data.dtype.kind == "M":

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\array\core.py in __array__(self, dtype, **kwargs)
   1334 
   1335     def __array__(self, dtype=None, **kwargs):
-> 1336         x = self.compute()
   1337         if dtype and x.dtype != dtype:
   1338             x = x.astype(dtype)

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\base.py in compute(self, **kwargs)
    164         dask.base.compute
    165         """
--> 166         (result,) = compute(self, traverse=False, **kwargs)
    167         return result
    168 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\base.py in compute(*args, **kwargs)
    442         postcomputes.append(x.__dask_postcompute__())
    443 
--> 444     results = schedule(dsk, keys, **kwargs)
    445     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    446 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     74                 pools[thread][num_workers] = pool
     75 
---> 76     results = get_async(
     77         pool.apply_async,
     78         len(pool._pool),

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317 
    318 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in (.0)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in (.0)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in (.0)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\dask\array\core.py in getter(a, b, asarray, lock)
     98         c = a[b]
     99         if asarray:
--> 100             c = np.asarray(c)
    101     finally:
    102         if lock:

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    489 
    490     def __array__(self, dtype=None):
--> 491         return np.asarray(self.array, dtype=dtype)
    492 
    493     def __getitem__(self, key):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    651 
    652     def __array__(self, dtype=None):
--> 653         return np.asarray(self.array, dtype=dtype)
    654 
    655     def __getitem__(self, key):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    555     def __array__(self, dtype=None):
    556         array = as_indexable(self.array)
--> 557         return np.asarray(array[self.key], dtype=None)
    558 
    559     def transpose(self, order):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
     70 
     71     def __array__(self, dtype=None):
---> 72         return self.func(self.array)
     73 
     74     def __repr__(self):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\coding\variables.py in _scale_offset_decoding(data, scale_factor, add_offset, dtype)
    216 
    217 def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
--> 218     data = np.array(data, dtype=dtype, copy=True)
    219     if scale_factor is not None:
    220         data *= scale_factor

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
     70 
     71     def __array__(self, dtype=None):
---> 72         return self.func(self.array)
     73 
     74     def __repr__(self):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\coding\variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
    136 ) -> np.ndarray:
    137     """Mask all matching values in a NumPy arrays."""
--> 138     data = np.asarray(data, dtype=dtype)
    139     condition = False
    140     for fv in encoded_fill_values:

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
    555     def __array__(self, dtype=None):
    556         array = as_indexable(self.array)
--> 557         return np.asarray(array[self.key], dtype=None)
    558 
    559     def transpose(self, order):

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\backends\netCDF4_.py in __getitem__(self, key)
     70 
     71     def __getitem__(self, key):
---> 72         return indexing.explicit_indexing_adapter(
     73             key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
     74         )

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\core\indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    835     """
    836     raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 837     result = raw_indexing_method(raw_key.tuple)
    838     if numpy_indices.tuple:
    839         # index the loaded np.ndarray

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\backends\netCDF4_.py in _getitem(self, key)
     83             with self.datastore.lock:
     84                 original_array = self.get_array(needs_lock=False)
---> 85                 array = getitem(original_array, key)
     86         except IndexError:
     87             # Catch IndexError in netCDF4 and return a more informative

~\AppData\Local\Continuum\miniconda3\envs\TEST\lib\site-packages\xarray\backends\common.py in robust_getitem(array, key, catch, max_retries, initial_delay)
     52     for n in range(max_retries + 1):
     53         try:
---> 54             return array[key]
     55         except catch:
     56             if n == max_retries:

netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()

netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable._get()

netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()

RuntimeError: NetCDF: Access failure```
dcherian commented 4 years ago

I would check your server logs if you can. Or avoid xarray and try with lower level pydap / netCDF4. This may be useful: https://github.com/pangeo-data/pangeo/issues/767. Maybe you're requesting too much data?

aragong commented 4 years ago

thank you @dcherian, We know that if the request is small it works fine, but we want to make big requests of data. Is any limitation using opendap?

rabernat commented 4 years ago

I have spent plenty of time debugging these sorts of issues. It really helps to take xarray out of the equation. Try making your request with just the netCDF--that's all that xarray uses under the hood. Overall your example is very complicated, which makes it hard to find the core issue.

You generally want to try something like this

import netCDF4
ncds = netCDF4.Dataset(OPENDAP_url)
data = ncds[variable_name][:]

Try playing around with the slice [:] to see under what circumstances the opendap server fails. Then use chunking in xarray to limit the size of each individual request. That's what's described in pangeo-data/pangeo#767.

A few additional comments about your code:

# Select spatial subset [lon,lat]
ds = ds.where((ds.lon >= Lon[0] - dl) & (ds.lon <= Lon[1] + dl) & (ds.lat >= Lat[0] - dl) & (ds.lat <= Lat[1] + dl), drop=True)

This is NOT how you do subsetting with xarray. Where is meant for masking. I recommend reviewing the xarray docs on indexing and selecting. Your call should be something like

ds = ds.sel(lon=slice(...), lat=slice(...))

What's the difference? where downloads all of the data from the opendap server and then fills it with NaNs outside of your selection, while sel lazily limits the size of the request from the opendap server. This could make a big difference in terms of the server's memory usage.

ds = ds.sortby('lon', 'lat')

Can you do this sorting after loading the data. It's an expensive operation and might not interact well with the opendap server.

aragong commented 4 years ago

@rabernat - Thank you! I will review the code (thank you for the extra comments, I really appreciate that) and follow your instructions to test the chunk size.

Just for my understanding, So theoretically It is not possible to make big requests without using chunking? The threads server is under our management and we want to know if these errors can be solved through any specific configuration of the service in the thredds.

Thank you in advance!

rabernat commented 4 years ago

Just for my understanding, So theoretically It is not possible to make big requests without using chunking?

This depends entirely on the TDS server configuration. See comment in https://github.com/Unidata/netcdf-c/issues/1667#issuecomment-597372065. The default limit appears to be 500 MB.

It's important to note that none of this has to do with xarray. Xarray is simply the top layer of a very deep software stack. If the TDS server could deliver larger data requests, and the netCDF4-python library could accept them, xarray would have no problem.

aragong commented 4 years ago

I followed your recommendations @rabernat, please see my test code bellow.

import xarray as xr
import os
from datetime import datetime, timedelta
import pandas as pd
import shutil
import numpy as np
import time

lonlat_box = [-4.5, -2.5, 44, 45]

# ERA5 IHdata - Local
# -------------------

ds = xr.open_mfdataset(['raw/Wind_ERA5_Global_1998.05.nc', 'raw/Wind_ERA5_Global_1998.06.nc'])
ds = ds.get('u')

# from 0º,360º to -180º,180º
ds['lon'] = (ds.lon + 180) % 360 - 180
# lat is upside down --> sort ascending
ds = ds.sortby(['lon', 'lat'])

# Make the selection
ds = ds.sel(lon=slice(lonlat_box[0], lonlat_box[1]),
            lat=slice(lonlat_box[2], lonlat_box[3]))

print(ds)

tic = time.perf_counter()
df = ds.to_dataframe()
toc = time.perf_counter()
print(f"\nLocal Network - Elapsed time: {(toc - tic)/60:0.4f} minutes\n\n")

del ds, df

# ERA5 IHdata - Opendap
# ---------------------
ds = xr.open_mfdataset(['http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.05.nc',
                        'http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global/Wind_ERA5_Global_1998.06.nc'],
                       chunks={'time': '500MB'})

ds = ds.get('u')

# from 0º,360º to -180º,180º
ds['lon'] = (ds.lon + 180) % 360 - 180
# lat is upside down --> sort ascending
ds = ds.sortby(['lon', 'lat'])

# Make the selection
ds = ds.sel(lon=slice(lonlat_box[0], lonlat_box[1]),
            lat=slice(lonlat_box[2], lonlat_box[3]))

print(ds)

tic = time.perf_counter()
df = ds.to_dataframe()
toc = time.perf_counter()
print(f"\n OpenDAP - Elapsed time: {(toc - tic)/60:0.4f} minutes\n\n")

del ds, df

Result:

<xarray.DataArray 'u' (lat: 5, lon: 9, time: 1464)>
dask.array<getitem, shape=(5, 9, 1464), dtype=float32, chunksize=(5, 9, 744), chunktype=numpy.ndarray>
Coordinates:
  * lon      (lon) float32 -4.5 -4.25 -4.0 -3.75 -3.5 -3.25 -3.0 -2.75 -2.5
  * lat      (lat) float32 44.0 44.25 44.5 44.75 45.0
  * time     (time) datetime64[ns] 1998-05-01 ... 1998-06-30T23:00:00
Attributes:
    units:      m s**-1
    long_name:  10 metre U wind component

Local Network - Elapsed time: 0.4037 minutes

<xarray.DataArray 'u' (lat: 5, lon: 9, time: 1464)>
dask.array<getitem, shape=(5, 9, 1464), dtype=float32, chunksize=(5, 9, 120), chunktype=numpy.ndarray>
Coordinates:
  * lon      (lon) float32 -4.5 -4.25 -4.0 -3.75 -3.5 -3.25 -3.0 -2.75 -2.5
  * lat      (lat) float32 44.0 44.25 44.5 44.75 45.0
  * time     (time) datetime64[ns] 1998-05-01 ... 1998-06-30T23:00:00
Attributes:
    units:      m s**-1
    long_name:  10 metre U wind component

 OpenDAP - Elapsed time: 8.1971 minutes

Using this chunk of time=500Mb the code runs properly but it is really slow compared with the response through local network. I will try to raise this limit in the Opendap configuration with our IT-team to a more reasonable limit.

rabernat commented 4 years ago

Using this chunk of time=500Mb the code runs properly but it is really slow compared with the response through local network.

You might want to experiment with smaller chunks.

In general, opendap will always introduce overhead compared to direct file access.

aragong commented 4 years ago

We tried several times with 2000MB this configuration in the thredds:

   <Opendap>
        <ascLimit>50</ascLimit>
        <binLimit>2000</binLimit>
        <serverVersion>opendap/3.7</serverVersion>
     </Opendap>

But when we request more than a chunk of time=500MB the error appears: RuntimeError: NetCDF: Access failure

You might want to experiment with smaller chunks.

I tried with 50MB and the elapsed time was huge.

Local Network - Elapsed time: 0.5819 minutes OpenDAP - Elapsed time: 37.1448 minutes

dopplershift commented 4 years ago

Probably worth raising upstream with the THREDDS team. I do wonder if there's some issues with the chunking/compression of the native .nc files that's at play here.

sgdecker commented 2 years ago

I believe I am experiencing a similar issue, although with code that I thought was smart enough to chunk the data request into smaller pieces:

import numpy as np
import xarray as xr
from dask.diagnostics import ProgressBar
import intake

wrf_url = ('https://rda.ucar.edu/thredds/catalog/files/g/ds612.0/'
           'PGW3D/2006/catalog.xml')
catalog_u = intake.open_thredds_merged(wrf_url, path=['*_U_2006060*'])
catalog_v = intake.open_thredds_merged(wrf_url, path=['*_V_2006060*'])

ds_u = catalog_u.to_dask()
ds_u['U'] = ds_u.U.chunk("auto")
ds_v = catalog_v.to_dask()
ds_v['V'] = ds_v.V.chunk("auto")
ds = xr.merge((ds_u, ds_v))

def unstagger(ds, var, coord, new_coord):
    var1 = ds[var].isel({coord: slice(None, -1)})
    var2 = ds[var].isel({coord: slice(1, None)})
    return ((var1 + var2) / 2).rename({coord: new_coord})

with ProgressBar():
    ds['U_unstaggered'] = unstagger(ds, 'U', 'west_east_stag', 'west_east')
    ds['V_unstaggered'] = unstagger(ds, 'V', 'south_north_stag', 'south_north')
    ds['speed'] = np.hypot(ds.U_unstaggered, ds.V_unstaggered)
    ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()

This throws an error because, according to the RDA help folks, a request for an entire variable is made, which far exceeds their server's 500 MB request limit:

rda.ucar.edu/thredds/dodsC/files/g/ds612.0/PGW3D/2006/wrf3d_d01_PGW_U_20060607.nc.dods?U%5B0:1: 7%5D%5B0:1:49%5D%5B0:1:1014%5D%5B0:1:1359%5D

Here's the error:

Traceback (most recent call last):
  File "/home/decker/classes/met325/rda_plot.py", line 29, in <module>
    ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 862, in __call__
    return plot(self._da, **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 293, in plot
    darray = darray.squeeze().compute()
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 951, in compute
    return new.load(**kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 925, in load
    ds = self._to_temp_dataset().load(**kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataset.py", line 862, in load
    evaluated_data = da.compute(*lazy_data.values(), **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/base.py", line 571, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/threaded.py", line 79, in get
    results = get_async(
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 507, in get_async
    raise_exception(exc, tb)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 315, in reraise
    raise exc
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 220, in execute_task
    result = _execute_task(task, data)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
    return func(*(_execute_task(a, cache) for a in args))
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/array/core.py", line 116, in getter
    c = np.asarray(c)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 357, in __array__
    return np.asarray(self.array, dtype=dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 521, in __array__
    return np.asarray(self.array, dtype=dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
    return np.asarray(array[self.key], dtype=None)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/conventions.py", line 62, in __getitem__
    return np.asarray(self.array[key], dtype=self.dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
    return np.asarray(array[self.key], dtype=None)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 39, in __getitem__
    return indexing.explicit_indexing_adapter(
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 711, in explicit_indexing_adapter
    result = raw_indexing_method(raw_key.tuple)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 47, in _getitem
    result = robust_getitem(array, key, catch=ValueError)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/common.py", line 64, in robust_getitem
    return array[key]
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 323, in __getitem__
    out.data = self._get_data_index(index)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 353, in _get_data_index
    return self._data[index]
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/handlers/dap.py", line 170, in __getitem__
    raise_for_status(r)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/net.py", line 38, in raise_for_status
    raise HTTPError(
webob.exc.HTTPError: 403 403

I thought smaller requests would automagically happen with this code. Is it intended that a large request be made?

max-sixty commented 11 months ago

Not much we can do here I think, so closing in an effort to keep our open issues focused