pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
43.62k stars 17.91k forks source link

BUG: error indexing non-nanosecond datetime outside of nanosecond datetime range #56940

Open arfriedman opened 9 months ago

arfriedman commented 9 months ago

Pandas version checks

Reproducible Example

from datetime import datetime
import numpy as np
import pandas as pd
years = pd.date_range(start=datetime(1001, 12, 31),
                      end=datetime(2000, 12, 31),
                      freq="A", unit="s")
ts = pd.Series(np.arange(1000), index=years)

In [2]: ts.index
Out[2]:
DatetimeIndex(['1001-12-31', '1002-12-31', '1003-12-31', '1004-12-31',
               '1005-12-31', '1006-12-31', '1007-12-31', '1008-12-31',
               '1009-12-31', '1010-12-31',
               ...
               '1991-12-31', '1992-12-31', '1993-12-31', '1994-12-31',
               '1995-12-31', '1996-12-31', '1997-12-31', '1998-12-31',
               '1999-12-31', '2000-12-31'],
              dtype='datetime64[s]', length=1000, freq='A-DEC')

In [4]: ts.loc["1600"]
---------------------------------------------------------------------------
OutOfBoundsDatetime                       Traceback (most recent call last)
Cell In[4], line 1
----> 1 ts.loc["1600"]

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1153, in _LocationIndexer.__getitem__(self, key)
   1150 axis = self.axis or 0
   1152 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1153 return self._getitem_axis(maybe_callable, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1393, in _LocIndexer._getitem_axis(self, key, axis)
   1391 # fall thru to straight lookup
   1392 self._validate_key(key, axis)
-> 1393 return self._get_label(key, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1343, in _LocIndexer._get_label(self, label, axis)
   1341 def _get_label(self, label, axis: AxisInt):
   1342     # GH#5567 this will fail if the label is not present in the axis.
-> 1343     return self.obj.xs(label, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/generic.py:4236, in NDFrame.xs(self, key, axis, level, drop_level)
   4234             new_index = index[loc]
   4235 else:
-> 4236     loc = index.get_loc(key)
   4238     if isinstance(loc, np.ndarray):
   4239         if loc.dtype == np.bool_:

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:611, in DatetimeIndex.get_loc(self, key)
    609 if self._can_partial_date_slice(reso):
    610     try:
--> 611         return self._partial_date_slice(reso, parsed)
    612     except KeyError as err:
    613         raise KeyError(key) from err

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimelike.py:301, in DatetimeIndexOpsMixin._partial_date_slice(self, reso, parsed)
    298 if not self._can_partial_date_slice(reso):
    299     raise ValueError
--> 301 t1, t2 = self._parsed_string_to_bounds(reso, parsed)
    302 vals = self._data._ndarray
    303 unbox = self._data._unbox

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:539, in DatetimeIndex._parsed_string_to_bounds(self, reso, parsed)
    524 """
    525 Calculate datetime bounds for parsed time string and its resolution.
    526
   (...)
    536 lower, upper: pd.Timestamp
    537 """
    538 per = Period(parsed, freq=reso.attr_abbrev)
--> 539 start, end = per.start_time, per.end_time
    541 # GH 24076
    542 # If an incoming date string contained a UTC offset, need to localize
    543 # the parsed date to this offset first before aligning with the index's
    544 # timezone
    545 start = start.tz_localize(parsed.tzinfo)

File period.pyx:1651, in pandas._libs.tslibs.period.PeriodMixin.start_time.__get__()

File period.pyx:2000, in pandas._libs.tslibs.period._Period.to_timestamp()

File period.pyx:1158, in pandas._libs.tslibs.period.period_ordinal_to_dt64()

File np_datetime.pyx:231, in pandas._libs.tslibs.np_datetime.check_dts_bounds()

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1600-01-01 00:00:00

Issue Description

I receive an error trying to index a timeseries with second resolution outside the standard nanosecond range. It appears that the time series is being converted to nanosecond resolution during the indexing operation.

I believe this is issue would fall under https://github.com/pandas-dev/pandas/issues/46587

Expected Behavior

The expected behavior would function as it does within the standard nanosecond range:

In [3]: ts.loc["1700"] Out[3]: 1700-12-31 699 Freq: A-DEC, dtype: int64

Installed Versions

INSTALLED VERSIONS ------------------ commit : a671b5a8bf5dd13fb19f0e88edc679bc9e15c673 python : 3.11.7.final.0 python-bits : 64 OS : Linux OS-release : 5.15.0-91-generic Version : #101-Ubuntu SMP Tue Nov 14 13:30:08 UTC 2023 machine : x86_64 processor : x86_64 byteorder : little LC_ALL : None LANG : en_US.UTF-8 LOCALE : en_US.UTF-8 pandas : 2.1.4 numpy : 1.26.3 pytz : 2023.3.post1 dateutil : 2.8.2 setuptools : 69.0.3 pip : 23.3.2 Cython : None pytest : None hypothesis : None sphinx : None blosc : None feather : None xlsxwriter : None lxml.etree : 5.1.0 html5lib : None pymysql : None psycopg2 : None jinja2 : 3.1.3 IPython : 8.20.0 pandas_datareader : None bs4 : None bottleneck : None dataframe-api-compat: None fastparquet : None fsspec : 2023.12.2 gcsfs : None matplotlib : 3.8.2 numba : 0.58.1 numexpr : None odfpy : None openpyxl : None pandas_gbq : None pyarrow : 14.0.2 pyreadstat : None pyxlsb : None s3fs : None scipy : 1.11.4 sqlalchemy : None tables : None tabulate : None xarray : 2023.12.0 xlrd : None zstandard : None tzdata : 2023.4 qtpy : None pyqt5 : None
arfriedman commented 9 months ago

I just updated to Pandas 2.2.0 and received the same error:

In [4]: ts.loc["1600"]
---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
File period.pyx:1169, in pandas._libs.tslibs.period.period_ordinal_to_dt64()

OverflowError: Overflow occurred in npy_datetimestruct_to_datetime

The above exception was the direct cause of the following exception:

OutOfBoundsDatetime                       Traceback (most recent call last)
Cell In[4], line 1
----> 1 ts.loc["1600"]

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1192, in _LocationIndexer.__getitem__(self, key)
   1190 maybe_callable = com.apply_if_callable(key, self.obj)
   1191 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)
-> 1192 return self._getitem_axis(maybe_callable, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1432, in _LocIndexer._getitem_axis(self, key, axis)
   1430 # fall thru to straight lookup
   1431 self._validate_key(key, axis)
-> 1432 return self._get_label(key, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexing.py:1382, in _LocIndexer._get_label(self, label, axis)
   1380 def _get_label(self, label, axis: AxisInt):
   1381     # GH#5567 this will fail if the label is not present in the axis.
-> 1382     return self.obj.xs(label, axis=axis)

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/generic.py:4295, in NDFrame.xs(self, key, axis, level, drop_level)
   4293             new_index = index[loc]
   4294 else:
-> 4295     loc = index.get_loc(key)
   4297     if isinstance(loc, np.ndarray):
   4298         if loc.dtype == np.bool_:

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:610, in DatetimeIndex.get_loc(self, key)
    608 if self._can_partial_date_slice(reso):
    609     try:
--> 610         return self._partial_date_slice(reso, parsed)
    611     except KeyError as err:
    612         raise KeyError(key) from err

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimelike.py:324, in DatetimeIndexOpsMixin._partial_date_slice(self, reso, parsed)
    321 if not self._can_partial_date_slice(reso):
    322     raise ValueError
--> 324 t1, t2 = self._parsed_string_to_bounds(reso, parsed)
    325 vals = self._data._ndarray
    326 unbox = self._data._unbox

File ~/miniconda3/envs/Nile-Laki/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:538, in DatetimeIndex._parsed_string_to_bounds(self, reso, parsed)
    536 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
    537 per = Period(parsed, freq=freq)
--> 538 start, end = per.start_time, per.end_time
    540 # GH 24076
    541 # If an incoming date string contained a UTC offset, need to localize
    542 # the parsed date to this offset first before aligning with the index's
    543 # timezone
    544 start = start.tz_localize(parsed.tzinfo)

File period.pyx:1666, in pandas._libs.tslibs.period.PeriodMixin.start_time.__get__()

File period.pyx:1992, in pandas._libs.tslibs.period._Period.to_timestamp()

File period.pyx:1172, in pandas._libs.tslibs.period.period_ordinal_to_dt64()

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1600-01-01 00:00:00