coecms / xmhw

Xarray version of Marine Heatwaves code by Eric Olivier
https://xmhw.readthedocs.io/en/latest/
Apache License 2.0
21 stars 10 forks source link

ValueError: attempt to get argmax of an empty sequence #70

Open sryan288 opened 8 months ago

sryan288 commented 8 months ago

Hi Paola,

I am running the xmhw code (newest version) on high-res model output. Due to the large data volume I am running it in chunks, e.g. separately for different depth levels and on each depth level in multiple horizontal chunks. I am encountering an error that I don't understand. The code runs fine for some depth levels but in other levels and specific horizontal chunks I am getting the error message below. I am wondering if you encountered a similar error before? I don't see an obvious difference in the input data for the chunks that work and don't work...

Thank you in advance for any advice or help! Happy to share datafiles to see if you can reproduce the error!

`---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [9], in <cell line: 3>()
      2 inc=25
      3 for x in np.arange(75,200,inc):
----> 4         mhw, intermediate = detect(ds.sel(x=slice(x,x+inc)).drop(['nav_lon','nav_lat','deptht']).drop_indexes(('x','y')),
      5                                    clim['thresh'].sel(x=slice(x,x+inc)).drop(['nav_lon','nav_lat','deptht']).drop_indexes(('x','y')),
      6                                    clim['seas'].sel(x=slice(x,x+inc)).drop(['nav_lon','nav_lat','deptht']).drop_indexes(('x','y')),
      7                                    anynans=False,intermediate=True)
     13         print('finished mhw detection')

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/xmhw/xmhw.py:454, in detect(***failed resolving arguments***)
    440     for c in ts.cell:
    441         mhwls.append(
    442             define_events(
    443                 ts.sel(cell=c),
   (...)
    452             )
    453         )
--> 454 results = dask.compute(mhwls)
    456 # Concatenate results and save as dataset
    457 # re-assign dimensions previously used to stack arrays
    458 if point:

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/base.py:603, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    600     keys.append(x.__dask_keys__())
    601     postcomputes.append(x.__dask_postcompute__())
--> 603 results = schedule(dsk, keys, **kwargs)
    604 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
     86     elif isinstance(pool, multiprocessing.pool.Pool):
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads
    101 with pools_lock:

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    509         _execute_task(task, data)  # Re-execute locally
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)
    513 state["cache"][key] = res

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/local.py:319, in reraise(exc, tb)
    317 if exc.__traceback__ is not tb:
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    222 try:
    223     task, data = loads(task_info)
--> 224     result = _execute_task(task, data)
    225     id = get_id()
    226     result = dumps((result, id))

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
    115     func, args = arg[0], arg[1:]
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):
    121     return arg

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/xmhw/identify.py:400, in define_events(ts, th, se, idxarr, minDuration, joinGaps, maxGap, intermediate, tdim)
    397 del dfev
    399 # Calculate mhw properties, for each event using groupby
--> 400 dfmhw = mhw_features(df, len(idxarr) - 1, tdim, dims)
    402 # Convert back to xarray dataset
    403 mhw = xr.Dataset.from_dataframe(dfmhw, sparse=False)

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/xmhw/features.py:89, in mhw_features(dftime, last, tdim, dims)
     73 """Calculate mhw properties, grouping by each event.
     74 
     75 Parameters
   (...)
     85     Includes MHW characteristics along time index
     86 """
     88 # calculate some of the mhw properties aggregating by events
---> 89 df = agg_df(dftime, tdim, dims)
     90 # calculate the rest of the mhw properties
     91 df = properties(df, dftime.loc[:,'relThresh'], dftime.loc[:,'mabs'])

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/xmhw/features.py:114, in agg_df(df, tdim, dims)
     98 """Groupby events and apply different functions depending on attribute.
     99 
    100 Parameters
   (...)
    110     Includes most MHW properties by events
    111 """
    113 # using an aggregation dictionary to avoid apply.
--> 114 dfout = df.groupby("events", group_keys=True).agg(
    115     event=("events", "first"),
    116     index_start=("start", "first"),
    117     index_end=("end", "first"),
    118     time_start=(tdim, "first"),
    119     time_end=(tdim, "last"),
    120     relS_imax=("relSeas", np.argmax),
    121     # time as dataframe index, instead
    122     # of the timeseries index
    123     time_peak=("relSeas", "idxmax"),
    124     # the following are needed for onset_decline
    125     # anom_plus is (sst -seas) shifted 1 day ahead
    126     # anom_minus is (sst -seas) shifted 1 day back
    127     relS_first=("relSeas", "first"),
    128     relS_last=("relSeas", "last"),
    129     anom_first=("anom_plus", "first"),
    130     anom_last=("anom_minus", "last"),
    131     # intensity_max can be used as relSeas(index_peak)
    132     # in onset_decline
    133     intensity_max=("relSeas", "max"),
    134     intensity_mean=("relSeas", "mean"),
    135     intensity_cumulative=("relSeas", "sum"),
    136     severity_max=("severity", "max"),
    137     severity_mean=("severity", "mean"),
    138     severity_cumulative=("severity", "sum"),
    139     severity_var=("severity", "var"),
    140     relS_var=("relSeas", "var"),
    141     relT_var=("relThresh", "var"),
    142     intensity_mean_relThresh=("relThresh", "mean"),
    143     intensity_cumulative_relThresh=("relThresh", "sum"),
    144     intensity_mean_abs=("mabs", "mean"),
    145     mabs_var=("mabs", "var"),
    146     intensity_cumulative_abs=("mabs", "sum"),
    147     cats_max=("cats", "max"),
    148     duration_moderate=("duration_moderate", "sum"),
    149     duration_strong=("duration_strong", "sum"),
    150     duration_severe=("duration_severe", "sum"),
    151     duration_extreme=("duration_extreme", "sum"),
    152 ) 
    153 # adding dimensions used in stacked cell to recreate cell later
    154 # sending values to list to avoid warnings
    155 for d in dims:

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
    866 func = maybe_mangle_lambdas(func)
    868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
    870 if not is_dict_like(func) and result is not None:
    871     return result

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
    165     return self.apply_str()
    167 if is_dict_like(arg):
--> 168     return self.agg_dict_like()
    169 elif is_list_like(arg):
    170     # we require a list, but not a 'str'
    171     return self.agg_list_like()

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/apply.py:481, in Apply.agg_dict_like(self)
    478     results = {key: colg.agg(how) for key, how in arg.items()}
    479 else:
    480     # key used for column selection and output
--> 481     results = {
    482         key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
    483     }
    485 # set the final keys
    486 keys = list(arg.keys())

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/apply.py:482, in <dictcomp>(.0)
    478     results = {key: colg.agg(how) for key, how in arg.items()}
    479 else:
    480     # key used for column selection and output
    481     results = {
--> 482         key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
    483     }
    485 # set the final keys
    486 keys = list(arg.keys())

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/generic.py:271, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
    267 elif isinstance(func, abc.Iterable):
    268     # Catch instances of lists / tuples
    269     # but not the class list / tuple itself.
    270     func = maybe_mangle_lambdas(func)
--> 271     ret = self._aggregate_multiple_funcs(func)
    272     if relabeling:
    273         # error: Incompatible types in assignment (expression has type
    274         # "Optional[List[str]]", variable has type "Index")
    275         ret.columns = columns  # type: ignore[assignment]

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/generic.py:326, in SeriesGroupBy._aggregate_multiple_funcs(self, arg)
    323 for idx, (name, func) in enumerate(arg):
    325     key = base.OutputKey(label=name, position=idx)
--> 326     results[key] = self.aggregate(func)
    328 if any(isinstance(x, DataFrame) for x in results.values()):
    329     from pandas import concat

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/generic.py:287, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
    284     return self._python_agg_general(func, *args, **kwargs)
    286 try:
--> 287     return self._python_agg_general(func, *args, **kwargs)
    288 except KeyError:
    289     # TODO: KeyError is raised in _python_agg_general,
    290     #  see test_groupby.test_basic
    291     result = self._aggregate_named(func, *args, **kwargs)

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1483, in GroupBy._python_agg_general(self, func, *args, **kwargs)
   1479 output: dict[base.OutputKey, ArrayLike] = {}
   1481 if self.ngroups == 0:
   1482     # agg_series below assumes ngroups > 0
-> 1483     return self._python_apply_general(f, self._selected_obj)
   1485 for idx, obj in enumerate(self._iterate_slices()):
   1486     name = obj.name

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1464, in GroupBy._python_apply_general(self, f, data, not_indexed_same)
   1438 @final
   1439 def _python_apply_general(
   1440     self,
   (...)
   1443     not_indexed_same: bool | None = None,
   1444 ) -> DataFrame | Series:
   1445     """
   1446     Apply function f in python space
   1447 
   (...)
   1462         data after applying f
   1463     """
-> 1464     values, mutated = self.grouper.apply(f, data, self.axis)
   1466     if not_indexed_same is None:
   1467         not_indexed_same = mutated or self.mutated

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/ops.py:776, in BaseGrouper.apply(self, f, data, axis)
    766 # getattr pattern for __name__ is needed for functools.partial objects
    767 if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
    768     "idxmin",
    769     "idxmax",
   (...)
    774     #  so we will not have raised even if this is an invalid dtype.
    775     #  So do one dummy call here to raise appropriate TypeError.
--> 776     f(data.iloc[:0])
    778 return result_values, mutated

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1476, in GroupBy._python_agg_general.<locals>.<lambda>(x)
   1473 @final
   1474 def _python_agg_general(self, func, *args, **kwargs):
   1475     func = com.is_builtin_func(func)
-> 1476     f = lambda x: func(x, *args, **kwargs)
   1478     # iterate through "columns" ex exclusions to populate output dict
   1479     output: dict[base.OutputKey, ArrayLike] = {}

File <__array_function__ internals>:180, in argmax(*args, **kwargs)

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/numpy/core/fromnumeric.py:1216, in argmax(a, axis, out, keepdims)
   1129 """
   1130 Returns the indices of the maximum values along an axis.
   1131 
   (...)
   1213 (2, 1, 4)
   1214 """
   1215 kwds = {'keepdims': keepdims} if keepdims is not np._NoValue else {}
-> 1216 return _wrapfunc(a, 'argmax', axis=axis, out=out, **kwds)

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/numpy/core/fromnumeric.py:57, in _wrapfunc(obj, method, *args, **kwds)
     54     return _wrapit(obj, method, *args, **kwds)
     56 try:
---> 57     return bound(*args, **kwds)
     58 except TypeError:
     59     # A TypeError occurs if the object does have such a method in its
     60     # class, but its signature is not identical to that of NumPy's. This
   (...)
     64     # Call _wrapit from within the except clause to ensure a potential
     65     # exception has a traceback chain.
     66     return _wrapit(obj, method, *args, **kwds)

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/base.py:657, in IndexOpsMixin.argmax(self, axis, skipna, *args, **kwargs)
    653         return delegate.argmax()
    654 else:
    655     # error: Incompatible return value type (got "Union[int, ndarray]", expected
    656     # "int")
--> 657     return nanops.nanargmax(  # type: ignore[return-value]
    658         delegate, skipna=skipna
    659     )

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/nanops.py:93, in disallow.__call__.<locals>._f(*args, **kwargs)
     91 try:
     92     with np.errstate(invalid="ignore"):
---> 93         return f(*args, **kwargs)
     94 except ValueError as e:
     95     # we want to transform an object array
     96     # ValueError message to the more typical TypeError
     97     # e.g. this is normally a disallowed function on
     98     # object arrays that contain strings
     99     if is_object_dtype(args[0]):

File ~/miniconda3/envs/xmhw_93/lib/python3.8/site-packages/pandas/core/nanops.py:1096, in nanargmax(values, axis, skipna, mask)
   1094 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask)
   1095 # error: Need type annotation for 'result'
-> 1096 result = values.argmax(axis)  # type: ignore[var-annotated]
   1097 result = _maybe_arg_null_out(result, axis, mask, skipna)
   1098 return result

ValueError: attempt to get argmax of an empty sequence
paolap commented 8 months ago

Hi Svenya,

sounds like some of the depth levels don't have data for those specific chunks. I'm not sure how you run it in chunks, but maybe you can check if the chunks you're passing are empty beforehand?