Closed barronh closed 6 years ago
Yep, I'm seeing the same thing. Here's the full traceback:
============================================================== FAILURES ===============================================================
____________________________________ NetCDF4DataTest.test_88_character_filename_segmentation_fault ____________________________________
error = <class 'Warning'>, pattern = 'segmentation fault'
@contextmanager
def raises_regex(error, pattern):
__tracebackhide__ = True # noqa: F841
with pytest.raises(error) as excinfo:
> yield
xarray/tests/__init__.py:150:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <xarray.tests.test_backends.NetCDF4DataTest testMethod=test_88_character_filename_segmentation_fault>
def test_88_character_filename_segmentation_fault(self):
# should be fixed in netcdf4 v1.3.1
with mock.patch('netCDF4.__version__', '1.2.4'):
with warnings.catch_warnings():
warnings.simplefilter("error")
with raises_regex(Warning, 'segmentation fault'):
# Need to construct 88 character filepath
> xr.Dataset().to_netcdf('a' * (88 - len(os.getcwd()) - 1))
xarray/tests/test_backends.py:1143:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <xarray.Dataset>
Dimensions: ()
Data variables:
*empty*
path = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', mode = 'w', format = None, group = None, engine = None
encoding = {}, unlimited_dims = None
def to_netcdf(self, path=None, mode='w', format=None, group=None,
engine=None, encoding=None, unlimited_dims=None):
"""Write dataset contents to a netCDF file.
Parameters
----------
path : str, Path or file-like object, optional
Path to which to save this dataset. File-like objects are only
supported by the scipy engine. If no path is provided, this
function returns the resulting netCDF file as bytes; in this case,
we need to use scipy, which does not support netCDF version 4 (the
default format becomes NETCDF3_64BIT).
mode : {'w', 'a'}, optional
Write ('w') or append ('a') mode. If mode='w', any existing file at
this location will be overwritten. If mode='a', existing variables
will be overwritten.
format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT','NETCDF3_CLASSIC'}, optional
File format for the resulting netCDF file:
* NETCDF4: Data is stored in an HDF5 file, using netCDF4 API
features.
* NETCDF4_CLASSIC: Data is stored in an HDF5 file, using only
netCDF 3 compatible API features.
* NETCDF3_64BIT: 64-bit offset version of the netCDF 3 file format,
which fully supports 2+ GB files, but is only compatible with
clients linked against netCDF version 3.6.0 or later.
* NETCDF3_CLASSIC: The classic netCDF 3 file format. It does not
handle 2+ GB files very well.
All formats are supported by the netCDF4-python library.
scipy.io.netcdf only supports the last two formats.
The default format is NETCDF4 if you are saving a file to disk and
have the netCDF4-python library available. Otherwise, xarray falls
back to using scipy to write netCDF files and defaults to the
NETCDF3_64BIT format (scipy does not support netCDF4).
group : str, optional
Path to the netCDF4 group in the given file to open (only works for
format='NETCDF4'). The group(s) will be created if necessary.
engine : {'netcdf4', 'scipy', 'h5netcdf'}, optional
Engine to use when writing netCDF files. If not provided, the
default engine is chosen based on available dependencies, with a
preference for 'netcdf4' if writing to a file on disk.
encoding : dict, optional
Nested dictionary with variable names as keys and dictionaries of
variable specific encodings as values, e.g.,
``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1,
'zlib': True}, ...}``
unlimited_dims : sequence of str, optional
Dimension(s) that should be serialized as unlimited dimensions.
By default, no dimensions are treated as unlimited dimensions.
Note that unlimited_dims may also be set via
``dataset.encoding['unlimited_dims']``.
"""
if encoding is None:
encoding = {}
from ..backends.api import to_netcdf
return to_netcdf(self, path, mode, format=format, group=group,
engine=engine, encoding=encoding,
> unlimited_dims=unlimited_dims)
xarray/core/dataset.py:1131:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dataset = <xarray.Dataset>
Dimensions: ()
Data variables:
*empty*
path_or_file = '/Users/shoyer/dev/xarray/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', mode = 'w', format = None
group = None, engine = 'netcdf4', writer = None, encoding = {}, unlimited_dims = None
def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
engine=None, writer=None, encoding=None, unlimited_dims=None):
"""This function creates an appropriate datastore for writing a dataset to
disk as a netCDF file
See `Dataset.to_netcdf` for full API docs.
The ``writer`` argument is only for the private use of save_mfdataset.
"""
if isinstance(path_or_file, path_type):
path_or_file = str(path_or_file)
if encoding is None:
encoding = {}
if path_or_file is None:
if engine is None:
engine = 'scipy'
elif engine != 'scipy':
raise ValueError('invalid engine for creating bytes with '
'to_netcdf: %r. Only the default engine '
"or engine='scipy' is supported" % engine)
elif isinstance(path_or_file, basestring):
if engine is None:
engine = _get_default_engine(path_or_file)
path_or_file = _normalize_path(path_or_file)
else: # file-like object
engine = 'scipy'
# validate Dataset keys, DataArray names, and attr keys/values
_validate_dataset_names(dataset)
_validate_attrs(dataset)
try:
store_open = WRITEABLE_STORES[engine]
except KeyError:
raise ValueError('unrecognized engine for to_netcdf: %r' % engine)
if format is not None:
format = format.upper()
# if a writer is provided, store asynchronously
sync = writer is None
# handle scheduler specific logic
> scheduler = get_scheduler()
xarray/backends/api.py:639:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
get = None, collection = None
def get_scheduler(get=None, collection=None):
""" Determine the dask scheduler that is being used.
None is returned if not dask scheduler is active.
See also
--------
dask.utils.effective_get
"""
try:
from dask.utils import effective_get
actual_get = effective_get(get, collection)
try:
> from dask.distributed import Client
xarray/backends/common.py:46:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import absolute_import, division, print_function
try:
> from distributed import *
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/dask/distributed.py:5:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
from .config import config
> from .core import connect, rpc
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/__init__.py:4:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
from collections import defaultdict, deque
from concurrent.futures import CancelledError
from functools import partial
import logging
import six
import traceback
import uuid
import weakref
from six import string_types
from toolz import assoc
from tornado import gen
from tornado.ioloop import IOLoop
from tornado.locks import Event
> from .comm import (connect, listen, CommClosedError,
normalize_address,
unparse_host_port, get_address_host_port)
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/core.py:20:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
from .addressing import (parse_address, unparse_address,
normalize_address, parse_host_port,
unparse_host_port, resolve_address,
get_address_host_port, get_address_host,
get_local_address_for,
)
from .core import connect, listen, Comm, CommClosedError
def _register_transports():
from . import inproc
from . import tcp
> _register_transports()
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/comm/__init__.py:17:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
def _register_transports():
> from . import inproc
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/comm/__init__.py:13:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
from collections import deque, namedtuple
import itertools
import logging
import os
import threading
import weakref
from tornado import gen, locks
from tornado.concurrent import Future
from tornado.ioloop import IOLoop
from ..compatibility import finalize
> from ..protocol import nested_deserialize
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/comm/inproc.py:15:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
from functools import partial
from .compression import compressions, default_compression
> from .core import (dumps, loads, maybe_compress, decompress, msgpack)
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/protocol/__init__.py:6:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import print_function, division, absolute_import
import logging
> import msgpack
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/distributed/protocol/core.py:5:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from msgpack._version import version
from msgpack.exceptions import *
from collections import namedtuple
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
import os
if os.environ.get('MSGPACK_PUREPYTHON'):
from msgpack.fallback import Packer, unpack, unpackb, Unpacker
else:
try:
> from msgpack._packer import Packer
../../miniconda3/envs/xarray-py36/lib/python3.6/site-packages/msgpack/__init__.py:25:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
msgpack/_packer.pyx:7: ImportWarning
During handling of the above exception, another exception occurred:
self = <xarray.tests.test_backends.NetCDF4DataTest testMethod=test_88_character_filename_segmentation_fault>
def test_88_character_filename_segmentation_fault(self):
# should be fixed in netcdf4 v1.3.1
with mock.patch('netCDF4.__version__', '1.2.4'):
with warnings.catch_warnings():
warnings.simplefilter("error")
with raises_regex(Warning, 'segmentation fault'):
# Need to construct 88 character filepath
> xr.Dataset().to_netcdf('a' * (88 - len(os.getcwd()) - 1))
xarray/tests/test_backends.py:1143:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <contextlib._GeneratorContextManager object at 0x1209dc4e0>, type = <class 'ImportWarning'>
value = ImportWarning("can't resolve package from __spec__ or __package__, falling back on __name__ and __path__",)
traceback = <traceback object at 0x120ca93c8>
def __exit__(self, type, value, traceback):
if type is None:
try:
next(self.gen)
except StopIteration:
return False
else:
raise RuntimeError("generator didn't stop")
else:
if value is None:
# Need to force instantiation so we can reliably
# tell if we get the same exception back
value = type()
try:
> self.gen.throw(type, value, traceback)
E AssertionError: exception ImportWarning("can't resolve package from __spec__ or __package__, falling back on __name__ and __path__",) did not match pattern 'segmentation fault'
Thanks for noting this. I think https://github.com/pydata/xarray/pull/2026 will fix it.
Code Sample, a copy-pastable example if possible
Problem description
Instead of passing all tests, this fails on the
test_88_character_filename_segmentation_fault
function.It oddly doesn't seem to be related to the 88 character issue. I traced this down to xarray/backends/common.py::get_scheduler where an "ImportWarning" is not being caught on line 49.
Expected Output
Passes all tests. Adding ImportWarning to the tuple of exceptions in get_scheduler fixes the problem.
Output of
xr.show_versions()