Open sam-s opened 4 months ago
Possibly related: Preserving DatetimeIndex freq
in MultiIndex in Pandas
Thanks for the report, noting that the issue still occurs when inplace=False
. Further investigations and PRs to fix are welcome.
@sam-s @rhshadrach Hi, all. Can I take this issue? Please tell me if you mind it.
Because delete
method of class Index
uses _constructor, freq is lost.
inplace=False
version=3.0.0.dev0+1023.g3b48b17e52
import datetime
import pandas as pd
index = pd.date_range(start=datetime.date(2024,5,10), end=datetime.date(2024,5,15))
# index.freq == <Day>
df = pd.DataFrame({"a":[1]*len(index)}, index=index)
# df.index.freq == <Day>
# set inplace as false
df = df.drop(index=df.index[(df.index > pd.Timestamp("2024-05-13")) |
(df.index < pd.Timestamp("2024-05-11"))], inplace=False)
# df.index.freq == None, not <Day>
assert df.index.freq == index.freq
drop
method of NDFrame
calls _drop_axis
methodobj holds index.freq just before _drop_axis
called.
def drop(
self,
labels: IndexLabel | ListLike = None,
*,
axis: Axis = 0,
index: IndexLabel | ListLike = None,
columns: IndexLabel | ListLike = None,
level: Level | None = None,
inplace: bool = False,
errors: IgnoreRaise = "raise",
) -> Self | None:
inplace = validate_bool_kwarg(inplace, "inplace")
if labels is not None:
if index is not None or columns is not None:
raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
axis_name = self._get_axis_name(axis)
axes = {axis_name: labels}
elif index is not None or columns is not None:
axes = {"index": index}
if self.ndim == 2:
axes["columns"] = columns
else:
raise ValueError(
"Need to specify at least one of 'labels', 'index' or 'columns'"
)
obj = self
for axis, labels in axes.items():
if labels is not None:
# obj still holds index.freq
obj = obj._drop_axis(labels, axis, level=level, errors=errors)
# obj.index.freq is None
if inplace:
self._update_inplace(obj)
return None
else:
return obj
axis.drop returns new_axis without freq
@final
def _drop_axis(
self,
labels,
axis,
level=None,
errors: IgnoreRaise = "raise",
only_slice: bool = False,
) -> Self:
"""
Drop labels from specified axis. Used in the ``drop`` method
internally.
Parameters
----------
labels : single label or list-like
axis : int or axis name
level : int or level name, default None
For MultiIndex
errors : {'ignore', 'raise'}, default 'raise'
If 'ignore', suppress error and existing labels are dropped.
only_slice : bool, default False
Whether indexing along columns should be view-only.
"""
axis_num = self._get_axis_number(axis)
axis = self._get_axis(axis)
if axis.is_unique:
if level is not None:
if not isinstance(axis, MultiIndex):
raise AssertionError("axis must be a MultiIndex")
new_axis = axis.drop(labels, level=level, errors=errors)
else:
new_axis = axis.drop(labels, errors=errors)
indexer = axis.get_indexer(new_axis)
# new_axis.freq is None
self.delete(indexer) returnes self(Index) with freq=None
def drop(
self,
labels: Index | np.ndarray | Iterable[Hashable],
errors: IgnoreRaise = "raise",
) -> Index:
"""
Make new Index with passed list of labels deleted.
Parameters
----------
labels : array-like or scalar
Array-like object or a scalar value, representing the labels to be removed
from the Index.
errors : {'ignore', 'raise'}, default 'raise'
If 'ignore', suppress error and existing labels are dropped.
Returns
-------
Index
Will be same type as self, except for RangeIndex.
Raises
------
KeyError
If not all of the labels are found in the selected axis
See Also
--------
Index.dropna : Return Index without NA/NaN values.
Index.drop_duplicates : Return Index with duplicate values removed.
Examples
--------
>>> idx = pd.Index(["a", "b", "c"])
>>> idx.drop(["a"])
Index(['b', 'c'], dtype='object')
"""
if not isinstance(labels, Index):
# avoid materializing e.g. RangeIndex
arr_dtype = "object" if self.dtype == "object" else None
labels = com.index_labels_to_array(labels, dtype=arr_dtype)
indexer = self.get_indexer_for(labels)
mask = indexer == -1
if mask.any():
if errors != "ignore":
raise KeyError(f"{labels[mask].tolist()} not found in axis")
indexer = indexer[~mask]
# self.delete(indexer) returns Index without freq
return self.delete(indexer)
Pandas version checks
[X] I have checked that this issue has not already been reported.
[X] I have confirmed this bug exists on the latest version of pandas.
[ ] I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
Issue Description
drop(inplace=True)
with 2 sided limits losesfreq
from theDatetimeIndex
Expected Behavior
The
df.index.freq
should be the same afterdrop
as beforeInstalled Versions