Closed shanicetbailey closed 1 year ago
Can you post the full stack trace?
Looks the same as #16
ZeroDivisionError Traceback (most recent call last) Cell In [15], line 1 ----> 1 test.load()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/dataarray.py:967, in DataArray.load(self, kwargs) 949 def load(self: T_DataArray, kwargs) -> T_DataArray: 950 """Manually trigger loading of this array's data from disk or a 951 remote source into memory and return this array. 952 (...) 965 dask.compute 966 """ --> 967 ds = self._to_temp_dataset().load(**kwargs) 968 new = self._from_temp_dataset(ds) 969 self._variable = new._variable
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/dataset.py:733, in Dataset.load(self, *kwargs) 730 import dask.array as da 732 # evaluate all the dask arrays simultaneously --> 733 evaluated_data = da.compute(lazy_data.values(), **kwargs) 735 for k, data in zip(lazy_data, evaluated_data): 736 self.variables[k].data = data
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/base.py:600, in compute(traverse, optimize_graph, scheduler, get, args, kwargs) 597 keys.append(x.dask_keys()) 598 postcomputes.append(x.dask_postcompute()) --> 600 results = schedule(dsk, keys, kwargs) 601 return repack([f(r, a) for r, (f, a) in zip(results, postcomputes)])
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py:3052, in Client.get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs) 3050 should_rejoin = False 3051 try: -> 3052 results = self.gather(packed, asynchronous=asynchronous, direct=direct) 3053 finally: 3054 for f in futures.values():
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py:2226, in Client.gather(self, futures, errors, direct, asynchronous) 2224 else: 2225 local_worker = None -> 2226 return self.sync( 2227 self._gather, 2228 futures, 2229 errors=errors, 2230 direct=direct, 2231 local_worker=local_worker, 2232 asynchronous=asynchronous, 2233 )
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py:339, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, *kwargs) 337 return future 338 else: --> 339 return sync( 340 self.loop, func, args, callback_timeout=callback_timeout, **kwargs 341 )
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py:406, in sync(loop, func, callback_timeout, *args, **kwargs) 404 if error: 405 typ, exc, tb = error --> 406 raise exc.with_traceback(tb) 407 else: 408 return result
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py:379, in sync.
File /srv/conda/envs/notebook/lib/python3.9/site-packages/tornado/gen.py:762, in Runner.run(self) 759 exc_info = None 761 try: --> 762 value = future.result() 763 except Exception: 764 exc_info = sys.exc_info()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py:2089, in Client._gather(self, futures, errors, direct, local_worker) 2087 exc = CancelledError(key) 2088 else: -> 2089 raise exception.with_traceback(traceback) 2090 raise exc 2091 if errors == "skip":
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/optimization.py:990, in call() 988 if not len(args) == len(self.inkeys): 989 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args))) --> 990 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:149, in get() 147 for key in toposort(dsk): 148 task = dsk[key] --> 149 result = _execute_task(task, cache) 150 cache[key] = result 151 result = _execute_task(out, cache)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:119, in _execute_task() 115 func, args = arg[0], arg[1:] 116 # Note: Don't assign the subtask results to a variable. numpy detects 117 # temporaries by their reference count and can execute certain 118 # operations in-place. --> 119 return func(*(_execute_task(a, cache) for a in args)) 120 elif not ishashable(arg): 121 return arg
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:119, in
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:113, in _execute_task() 83 """Do the actual work of collecting data and executing a function 84 85 Examples (...) 110 'foo' 111 """ 112 if isinstance(arg, list): --> 113 return [_execute_task(a, cache) for a in arg] 114 elif istask(arg): 115 func, args = arg[0], arg[1:]
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:113, in
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:119, in _execute_task() 115 func, args = arg[0], arg[1:] 116 # Note: Don't assign the subtask results to a variable. numpy detects 117 # temporaries by their reference count and can execute certain 118 # operations in-place. --> 119 return func(*(_execute_task(a, cache) for a in args)) 120 elif not ishashable(arg): 121 return arg
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/utils.py:71, in apply()
40 """Apply a function given its positional and keyword arguments.
41
42 Equivalent to func(*args, **kwargs)
(...)
68 >>> dsk = {'task-name': task} # adds the task to a low level Dask task graph
69 """
70 if kwargs:
---> 71 return func(*args, *kwargs)
72 else:
73 return func(args)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xhistogram/core.py:236, in _bincount() 233 else: 234 weights_array = None --> 236 bin_counts = _bincount_2d_vectorized( 237 *all_arrays_reshaped, 238 bins=bins, 239 weights=weights_array, 240 density=density, 241 block_size=block_size, 242 ) 244 final_shape = kept_axes_shape + bin_counts.shape[1:] 245 bin_counts = reshape(bin_counts, final_shape)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xhistogram/core.py:185, in _bincount_2d_vectorized() 182 # total number of unique bin indices 183 N = reduce(lambda x, y: x y, hist_shapes) --> 185 bin_counts = _dispatch_bincount( 186 bin_indices, weights, N, hist_shapes, block_size=block_size 187 ) 189 # just throw out everything outside of the bins, as np.histogram does 190 # TODO: make this optional? 191 slices = (slice(None),) + (N_inputs (slice(1, -1),))
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xhistogram/core.py:129, in _dispatch_bincount() 126 def _dispatch_bincount(bin_indices, weights, N, hist_shapes, block_size=None): 127 # block_chunks is like a dask chunk, a tuple that divides up the first 128 # axis of bin_indices --> 129 block_chunks = _determine_block_chunks(bin_indices, block_size) 130 if len(block_chunks) == 1: 131 # single global chunk, don't need a loop over chunks 132 return _bincount_2d(bin_indices, weights, N, hist_shapes)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xhistogram/core.py:117, in _determine_block_chunks() 115 block_size = min(_MAX_CHUNK_SIZE // N, M) 116 assert isinstance(block_size, int) --> 117 num_chunks = M // block_size 118 block_chunks = num_chunks * (block_size,) 119 residual = M % block_size
ZeroDivisionError: integer division or modulo by zero
@rabernat, I was just going to tag #16, and am going to try the workaround of chunking .
do you know if ds.SALT
is a dask array?
It's an xarray.DataArray
So trying to rechunk salt data ds.SALT.chunk({'time':-1, 'XC':-1, 'YC':-1, 'Z':-1})
and seems to not be producing an error...however, I am unsure because when I try to run test.load()
on the new salt variable it initially runs but the workers freeze..
I have tried with ds.SALT.chunk({'time':-1, 'XC':5, 'YC':5, 'Z':-1})
and that seemed to work, but took way longer than the run for ECCO data and with no need to rechunk. I guess I'll stick with this workaround 🤷♀️.
+1 to fixing this.
I'm going to close this as a duplicate of #16.
To get around this bug you can use block_size=None
though, see https://github.com/xgcm/xhistogram/issues/16#issuecomment-1584707121.
Hi, I am just rerunning old notebooks but am now getting an error I haven't gotten before with this code. Below I have attached a simplified version using xhistogram's
histogram
function on regular SOSE salt data and only specifyingsalt_bins
as an argument (no weights or dims specified for simplicity). Below is the error message. I have run this on regular ECCO data and it works with this template...not sure what's going on and would appreciate anyone shedding some light. Thanks!Error message: