uw-echospace / ooi-workflow

MIT License
1 stars 1 forks source link

Issue with compute_Sv #10

Open prerit162 opened 6 months ago

prerit162 commented 6 months ago

I get the following error while running the following code -

ds_Sv = ep.calibrate.compute_Sv(final_ed).compute() ds_Sv

The final_ed is an Echodata Object which has been formed after running combine_echodata on 548 echodata objects.

The Echodata size of final_ed was 246.2 GB

ERROR -

CancelledError Traceback (most recent call last) Cell In[19], line 1 ----> 1 ds_Sv = ep.calibrate.compute_Sv(final_ed).compute() 2 ds_Sv

File ~/anaconda3/lib/python3.11/site-packages/echopype/calibrate/api.py:206, in compute_Sv(echodata, kwargs) 120 def compute_Sv(echodata: EchoData, kwargs) -> xr.Dataset: 121 """ 122 Compute volume backscattering strength (Sv) from raw data. 123 (...) 204 it must be set using EchoData.update_platform(). 205 """ --> 206 return _compute_cal(cal_type="Sv", echodata=echodata, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/echopype/calibrate/api.py:92, in _compute_cal(cal_type, echodata, env_params, cal_params, ecs_file, waveform_mode, encode_mode) 84 if echodata.sonar_model == "EK80": 85 ds[cal_type] = ds[cal_type].assign_attrs( 86 { 87 "waveform_mode": waveform_mode, 88 "encode_mode": encode_mode, 89 } 90 ) ---> 92 add_attrs(cal_type, cal_ds) 94 # Add provinance 95 # Provenance source files may originate from raw files (echodata.source_files) 96 # or converted files (echodata.converted_raw_path) 97 if echodata.source_file is not None:

File ~/anaconda3/lib/python3.11/site-packages/echopype/calibrate/api.py:80, in _compute_cal..add_attrs(cal_type, ds) 71 ds["range_sample"].attrs = {"long_name": "Along-range sample number, base 0"} 72 ds["echo_range"].attrs = {"long_name": "Range distance", "units": "m"} 73 ds[cal_type].attrs = { 74 "long_name": { 75 "Sv": "Volume backscattering strength (Sv re 1 m-1)", 76 "TS": "Target strength (TS re 1 m^2)", 77 }[cal_type], 78 "units": "dB", 79 "actual_range": [ ---> 80 round(float(ds[cal_type].min().values), 2), 81 round(float(ds[cal_type].max().values), 2), 82 ], 83 } 84 if echodata.sonar_model == "EK80": 85 ds[cal_type] = ds[cal_type].assign_attrs( 86 { 87 "waveform_mode": waveform_mode, 88 "encode_mode": encode_mode, 89 } 90 )

File ~/anaconda3/lib/python3.11/site-packages/xarray/core/dataarray.py:733, in DataArray.values(self) 724 @property 725 def values(self) -> np.ndarray: 726 """ 727 The array's data as a numpy.ndarray. 728 (...) 731 type does not support coercion like this (e.g. cupy). 732 """ --> 733 return self.variable.values

File ~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:614, in Variable.values(self) 611 @property 612 def values(self): 613 """The variable's data as a numpy.ndarray""" --> 614 return _as_array_or_item(self._data)

File ~/anaconda3/lib/python3.11/site-packages/xarray/core/variable.py:314, in _as_array_or_item(data) 300 def _as_array_or_item(data): 301 """Return the given values as a numpy array, or as an individual item if 302 it's a 0d datetime64 or timedelta64 array. 303 (...) 312 TODO: remove this (replace with np.asarray) once these issues are fixed 313 """ --> 314 data = np.asarray(data) 315 if data.ndim == 0: 316 if data.dtype.kind == "M":

File ~/anaconda3/lib/python3.11/site-packages/dask/array/core.py:1701, in Array.array(self, dtype, kwargs) 1700 def array(self, dtype=None, kwargs): -> 1701 x = self.compute() 1702 if dtype and x.dtype != dtype: 1703 x = x.astype(dtype)

File ~/anaconda3/lib/python3.11/site-packages/dask/base.py:310, in DaskMethodsMixin.compute(self, kwargs) 286 def compute(self, kwargs): 287 """Compute this dask collection 288 289 This turns a lazy Dask collection into its in-memory equivalent. (...) 308 dask.compute 309 """ --> 310 (result,) = compute(self, traverse=False, **kwargs) 311 return result

File ~/anaconda3/lib/python3.11/site-packages/dask/base.py:595, in compute(traverse, optimize_graph, scheduler, get, args, kwargs) 592 keys.append(x.dask_keys()) 593 postcomputes.append(x.dask_postcompute()) --> 595 results = schedule(dsk, keys, kwargs) 596 return repack([f(r, a) for r, (f, a) in zip(results, postcomputes)])

File ~/anaconda3/lib/python3.11/site-packages/distributed/client.py:3243, in Client.get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs) 3241 should_rejoin = False 3242 try: -> 3243 results = self.gather(packed, asynchronous=asynchronous, direct=direct) 3244 finally: 3245 for f in futures.values():

File ~/anaconda3/lib/python3.11/site-packages/distributed/client.py:2368, in Client.gather(self, futures, errors, direct, asynchronous) 2366 except ValueError: 2367 local_worker = None -> 2368 return self.sync( 2369 self._gather, 2370 futures, 2371 errors=errors, 2372 direct=direct, 2373 local_worker=local_worker, 2374 asynchronous=asynchronous, 2375 )

File ~/anaconda3/lib/python3.11/site-packages/distributed/utils.py:351, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, *kwargs) 349 return future 350 else: --> 351 return sync( 352 self.loop, func, args, callback_timeout=callback_timeout, **kwargs 353 )

File ~/anaconda3/lib/python3.11/site-packages/distributed/utils.py:418, in sync(loop, func, callback_timeout, *args, **kwargs) 416 if error: 417 typ, exc, tb = error --> 418 raise exc.with_traceback(tb) 419 else: 420 return result

File ~/anaconda3/lib/python3.11/site-packages/distributed/utils.py:391, in sync..f() 389 future = wait_for(future, callback_timeout) 390 future = asyncio.ensure_future(future) --> 391 result = yield future 392 except Exception: 393 error = sys.exc_info()

File ~/anaconda3/lib/python3.11/site-packages/tornado/gen.py:767, in Runner.run(self) 765 try: 766 try: --> 767 value = future.result() 768 except Exception as e: 769 # Save the exception for later. It's important that 770 # gen.throw() not be called inside this try/except block 771 # because that makes sys.exc_info behave unexpectedly. 772 exc: Optional[Exception] = e

File ~/anaconda3/lib/python3.11/site-packages/distributed/client.py:2232, in Client._gather(self, futures, errors, direct, local_worker) 2230 else: 2231 raise exception.with_traceback(traceback) -> 2232 raise exc 2233 if errors == "skip": 2234 bad_keys.add(key)

CancelledError: ('_nanmin_skip-aggregate-43026fcdfa28064953afd5eeb691d3d9',)

leewujung commented 6 months ago

I haven't looked at this closely, but take a look at this: https://github.com/OSOceanAcoustics/echopype/issues/1212 If it's the same issue, try to see if you can see what the cause is.

prerit162 commented 6 months ago

I tried looking through the complete thread you shared but wasn't able to decipher what the exact solution might be. Actually the echodata object is taking a lot of memory as it is a full year of data. Is there anything else we can do this to run this function ?