pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
43.42k stars 17.85k forks source link

BUG: PeriodIndexGrouper fails with Grouper selection #14008

Open chris-b1 opened 8 years ago

chris-b1 commented 8 years ago

xref #13961

In [3]: df = pd.DataFrame({'date': pd.period_range('2015-01-01', freq='D', periods=5),
   ...:                    'a': np.arange(5)},
   ...:                    index=pd.MultiIndex.from_arrays([
   ...:                             [1,2,3,4,5],
   ...:                             pd.period_range('2015-01-01', freq='D', periods=5)],
   ...:                         names=['v','d']))

In [4]: df
Out[4]: 
              a       date
v d                       
1 2015-01-01  0 2015-01-01
2 2015-01-02  1 2015-01-02
3 2015-01-03  2 2015-01-03
4 2015-01-04  3 2015-01-04
5 2015-01-05  4 2015-01-05

In [5]: df.groupby(pd.Grouper(key='date', freq='2D')).sum()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-5-7d905f24cc6a> in <module>()
----> 1 df.groupby(pd.Grouper(key='date', freq='2D')).sum()

C:\Users\Chris\Documents\python-dev\pandas\pandas\core\generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, **kwargs)
   3962         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
   3963                        sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 3964                        **kwargs)
   3965 
   3966     def asfreq(self, freq, method=None, how=None, normalize=False):

C:\Users\Chris\Documents\python-dev\pandas\pandas\core\groupby.py in groupby(obj, by, **kwds)
   1495         raise TypeError('invalid type: %s' % type(obj))
   1496 
-> 1497     return klass(obj, by, **kwds)
   1498 
   1499 

C:\Users\Chris\Documents\python-dev\pandas\pandas\core\groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, **kwargs)
    365                                                     level=level,
    366                                                     sort=sort,
--> 367                                                     mutated=self.mutated)
    368 
    369         self.obj = obj

C:\Users\Chris\Documents\python-dev\pandas\pandas\core\groupby.py in _get_grouper(obj, key, axis, level, sort, mutated)
   2381     # a passed-in Grouper, directly convert
   2382     if isinstance(key, Grouper):
-> 2383         binner, grouper, obj = key._get_grouper(obj)
   2384         if key.key is None:
   2385             return grouper, [], obj

C:\Users\Chris\Documents\python-dev\pandas\pandas\tseries\resample.py in _get_grouper(self, obj)
   1027     def _get_grouper(self, obj):
   1028         # create the resampler and return our binner
-> 1029         r = self._get_resampler(obj)
   1030         r._set_binner()
   1031         return r.binner, r.grouper, r.obj

C:\Users\Chris\Documents\python-dev\pandas\pandas\tseries\resample.py in _get_resampler(self, obj, kind)
   1015                                         groupby=self,
   1016                                         kind=kind,
-> 1017                                         axis=self.axis)
   1018         elif isinstance(ax, TimedeltaIndex):
   1019             return TimedeltaIndexResampler(obj,

C:\Users\Chris\Documents\python-dev\pandas\pandas\tseries\resample.py in __init__(self, obj, groupby, axis, kind, **kwargs)
     88 
     89         if self.groupby is not None:
---> 90             self.groupby._set_grouper(self._convert_obj(obj), sort=True)
     91 
     92     def __unicode__(self):

C:\Users\Chris\Documents\python-dev\pandas\pandas\tseries\resample.py in _convert_obj(self, obj)
    764         # convert to timestamp
    765         if not (self.kind is None or self.kind == 'period'):
--> 766             obj = obj.to_timestamp(how=self.convention)
    767         return obj
    768 

C:\Users\Chris\Documents\python-dev\pandas\pandas\core\frame.py in to_timestamp(self, freq, how, axis, copy)
   5174         axis = self._get_axis_number(axis)
   5175         if axis == 0:
-> 5176             new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how))
   5177         elif axis == 1:
   5178             new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))

AttributeError: 'MultiIndex' object has no attribute 'to_timestamp'
jreback commented 8 years ago

ok will have to add here https://github.com/pydata/pandas/issues/12871

discort commented 6 years ago

@jreback @chris-b1 I get the following exception using mentioned example. Can you please update the description of issue?

Output of pd.show_versions()

INSTALLED VERSIONS ------------------ commit: 2be2ba570453ab09f0c8da46dfd8e1a179771fee python: 3.5.3.candidate.1 python-bits: 64 OS: Darwin OS-release: 16.7.0 machine: x86_64 processor: i386 byteorder: little LC_ALL: en_US.UTF-8 LANG: en_US.UTF-8 LOCALE: en_US.UTF-8 pandas: 0.24.0.dev0+506.g2be2ba570 pytest: 3.7.2 pip: 18.0 setuptools: 33.1.1 Cython: 0.28.4 numpy: 1.12.0 scipy: None pyarrow: None xarray: None IPython: 5.2.2 sphinx: 1.6.6 patsy: None dateutil: 2.6.0 pytz: 2016.10 blosc: None bottleneck: None tables: None numexpr: None feather: None matplotlib: 2.0.0 openpyxl: None xlrd: None xlwt: None xlsxwriter: None lxml: None bs4: None html5lib: 0.9999999 sqlalchemy: None pymysql: None psycopg2: None jinja2: 2.9.5 s3fs: None fastparquet: None pandas_gbq: None pandas_datareader: None gcsfs: None
In [1]: df.groupby(pd.Grouper(key='date', freq='2D')).sum()
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
/Users/discort/python/fun/pandas/pandas/tests/test_resample.py in <module>()
----> 1 df.groupby(pd.Grouper(key='date', freq='2D')).sum()

/Users/discort/python/fun/pandas/pandas/core/generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs)
   6996         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
   6997                        sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 6998                        observed=observed, **kwargs)
   6999
   7000     def asfreq(self, freq, method=None, how=None, normalize=False,

/Users/discort/python/fun/pandas/pandas/core/groupby/groupby.py in groupby(obj, by, **kwds)
   1964         raise TypeError('invalid type: %s' % type(obj))
   1965
-> 1966     return klass(obj, by, **kwds)

/Users/discort/python/fun/pandas/pandas/core/groupby/groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs)
    363                                                     sort=sort,
    364                                                     observed=observed,
--> 365                                                     mutated=self.mutated)
    366
    367         self.obj = obj

/Users/discort/python/fun/pandas/pandas/core/groupby/grouper.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
    478     # a passed-in Grouper, directly convert
    479     if isinstance(key, Grouper):
--> 480         binner, grouper, obj = key._get_grouper(obj, validate=False)
    481         if key.key is None:
    482             return grouper, [], obj

/Users/discort/python/fun/pandas/pandas/core/resample.py in _get_grouper(self, obj, validate)
   1317     def _get_grouper(self, obj, validate=True):
   1318         # create the resampler and return our binner
-> 1319         r = self._get_resampler(obj)
   1320         r._set_binner()
   1321         return r.binner, r.grouper, r.obj

/Users/discort/python/fun/pandas/pandas/core/resample.py in _get_resampler(self, obj, kind)
   1305                                         groupby=self,
   1306                                         kind=kind,
-> 1307                                         axis=self.axis)
   1308         elif isinstance(ax, TimedeltaIndex):
   1309             return TimedeltaIndexResampler(obj,

/Users/discort/python/fun/pandas/pandas/core/resample.py in __init__(self, obj, groupby, axis, kind, **kwargs)
     78
     79         if self.groupby is not None:
---> 80             self.groupby._set_grouper(self._convert_obj(obj), sort=True)
     81
     82     def __unicode__(self):

/Users/discort/python/fun/pandas/pandas/core/resample.py in _convert_obj(self, obj)
   1050                    " with a PeriodIndex is not currently supported,"
   1051                    " use .set_index(...) to explicitly set index")
-> 1052             raise NotImplementedError(msg)
   1053
   1054         if self.loffset is not None:

NotImplementedError: Resampling from level= or on= selection with a PeriodIndex is not currently supported, use .set_index(...) to explicitly set index
fhipol commented 1 year ago

I am receiving this error also, even when I set the convention parameter.

dfm_ltv.set_index("date").groupby([pd.Grouper(key='country'),
                                   pd.Grouper(key='team'),
                                   pd.Grouper(freq="s", level="date", convention="start")]).sum()

---
NotImplementedError: Resampling from level= or on= selection with a PeriodIndex is not currently supported, use .set_index(...) to explicitly set index 

Output of pd.show_version():

INSTALLED VERSIONS ------------------ commit : 91111fd99898d9dcaa6bf6bedb662db4108da6e6 python : 3.8.11.final.0 python-bits : 64 OS : Darwin OS-release : 21.6.0 Version : Darwin Kernel Version 21.6.0: Mon Aug 22 20:17:10 PDT 2022; root:xnu-8020.140.49~2/RELEASE_X86_64 machine : x86_64 processor : i386 byteorder : little LC_ALL : None LANG : None LOCALE : None.UTF-8 pandas : 1.5.1 numpy : 1.23.4 pytz : 2022.5 dateutil : 2.8.2 setuptools : 65.4.1 pip : 22.2.2 Cython : None pytest : 6.0.1 hypothesis : None sphinx : None blosc : None feather : None xlsxwriter : None lxml.etree : 4.3.3 html5lib : 1.0.1 pymysql : 0.9.3 psycopg2 : None jinja2 : 2.11.2 IPython : 7.34.0 pandas_datareader: None bs4 : 4.7.1 bottleneck : None brotli : None fastparquet : None fsspec : None gcsfs : None matplotlib : 3.4.0 numba : None numexpr : None odfpy : None openpyxl : 3.0.3 pandas_gbq : None pyarrow : None pyreadstat : None pyxlsb : None s3fs : None scipy : 1.4.1 snappy : None sqlalchemy : 1.3.17 tables : None tabulate : None xarray : None xlrd : 1.2.0 xlwt : 1.3.0 zstandard : None tzdata : None