LinkedEarth / PyleoTutorials

Jupyter-based, science-driven tutorials for using the LinkedEarth data-software Python ecosystem
http://linked.earth/PyleoTutorials
Apache License 2.0
13 stars 6 forks source link

Issues in L0_paleopandas.ipynb #53

Closed CommonClimate closed 3 weeks ago

CommonClimate commented 1 month ago

Series resampler does not work with pandas 2.1.3.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[25], line 1
----> 1 co2_5kavg = co2_5k.mean() # the aggregator here is simply the mean
      2 fig, ax = co2ts.plot(color='gray')
      3 co2_5kavg.plot(ax=ax,color='C1')         

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pyleoclim/core/series.py:4436, in SeriesResampler.__getattr__(self, attr)
   4435 def __getattr__(self, attr):
-> 4436     attr = getattr(self.series.resample(self.rule,  **self.kwargs), attr)
   4437     def func(*args, **kwargs):
   4438         series = attr(*args, **kwargs)

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/generic.py:9771, in NDFrame.resample(self, rule, axis, closed, label, convention, kind, on, level, origin, offset, group_keys)
   9768 else:
   9769     convention = "start"
-> 9771 return get_resampler(
   9772     cast("Series | DataFrame", self),
   9773     freq=rule,
   9774     label=label,
   9775     closed=closed,
   9776     axis=axis,
   9777     kind=kind,
   9778     convention=convention,
   9779     key=on,
   9780     level=level,
   9781     origin=origin,
   9782     offset=offset,
   9783     group_keys=group_keys,
   9784 )

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:2050, in get_resampler(obj, kind, **kwds)
   2046 """
   2047 Create a TimeGrouper and return our resampler.
   2048 """
   2049 tg = TimeGrouper(obj, **kwds)  # type: ignore[arg-type]
-> 2050 return tg._get_resampler(obj, kind=kind)

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:2231, in TimeGrouper._get_resampler(self, obj, kind)
   2229 _, ax, _ = self._set_grouper(obj, gpr_index=None)
   2230 if isinstance(ax, DatetimeIndex):
-> 2231     return DatetimeIndexResampler(
   2232         obj,
   2233         timegrouper=self,
   2234         kind=kind,
   2235         axis=self.axis,
   2236         group_keys=self.group_keys,
   2237         gpr_index=ax,
   2238     )
   2239 elif isinstance(ax, PeriodIndex) or kind == "period":
   2240     if isinstance(ax, PeriodIndex):
   2241         # GH#53481

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:187, in Resampler.__init__(self, obj, timegrouper, axis, kind, gpr_index, group_keys, selection, include_groups)
    182 self.include_groups = include_groups
    184 self.obj, self.ax, self._indexer = self._timegrouper._set_grouper(
    185     self._convert_obj(obj), sort=True, gpr_index=gpr_index
    186 )
--> 187 self.binner, self._grouper = self._get_binner()
    188 self._selection = selection
    189 if self._timegrouper.key is not None:

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:252, in Resampler._get_binner(self)
    246 @final
    247 def _get_binner(self):
    248     """
    249     Create the BinGrouper, assume that self.set_grouper(obj)
    250     has already been called.
    251     """
--> 252     binner, bins, binlabels = self._get_binner_for_time()
    253     assert len(bins) == len(binlabels)
    254     bin_grouper = BinGrouper(bins, binlabels, indexer=self._indexer)

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:1741, in DatetimeIndexResampler._get_binner_for_time(self)
   1739 if self.kind == "period":
   1740     return self._timegrouper._get_time_period_bins(self.ax)
-> 1741 return self._timegrouper._get_time_bins(self.ax)

File ~/opt/miniconda3/envs/pyleo/lib/python3.11/site-packages/pandas/core/resample.py:2329, in TimeGrouper._get_time_bins(self, ax)
   2326 binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
   2328 # general version, knowing nothing about relative frequencies
-> 2329 bins = lib.generate_bins_dt64(
   2330     ax_values, bin_edges, self.closed, hasnans=ax.hasnans
   2331 )
   2333 if self.closed == "right":
   2334     labels = binner

File lib.pyx:891, in pandas._libs.lib.generate_bins_dt64()

ValueError: Values falls before first bin

Have tried @khider's suggestion to upgrade to the latest pandas from wheel (labeled 3.0.0 + gibberish) but it broke my environment (parasitic error messages popped up with every command, whether pandas related or not).

Proposed solutions:

CommonClimate commented 3 weeks ago

Creating the pyleo environment from the repo's yml file, as currently implemented, fixes this issue on my machine. It seems that CI is using a higher version of pandas than mandated by Pyleoclim (2.1.4), leading to a failing test when using Series.resample().

This seems due to this line in the workflow file: pip install pandas --upgrade. @alexkjames do you remember why that clause was added? Also, we can relax the "no correlations" thing now that that particular notebook has gotten so much leaner and meaner.