pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
42.57k stars 17.56k forks source link

BUG: Unable to import `pandas` when `pyarrow` 16.1.0 is installed #59118

Closed dhirschfeld closed 3 days ago

dhirschfeld commented 4 days ago

Pandas version checks

Reproducible Example

In [2]: import pandas as pd
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[2], line 1
----> 1 import pandas as pd

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/__init__.py:26
     22 del _hard_dependencies, _dependency, _missing_dependencies
     24 try:
     25     # numpy compat
---> 26     from pandas.compat import (
     27         is_numpy_dev as _is_numpy_dev,  # pyright: ignore[reportUnusedImport] # noqa: F401
     28     )
     29 except ImportError as _err:  # pragma: no cover
     30     _module = _err.name

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/compat/__init__.py:27
     25 import pandas.compat.compressors
     26 from pandas.compat.numpy import is_numpy_dev
---> 27 from pandas.compat.pyarrow import (
     28     pa_version_under10p1,
     29     pa_version_under11p0,
     30     pa_version_under13p0,
     31     pa_version_under14p0,
     32     pa_version_under14p1,
     33     pa_version_under16p0,
     34 )
     36 if TYPE_CHECKING:
     37     from pandas._typing import F

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/compat/pyarrow.py:10
      7 try:
      8     import pyarrow as pa
---> 10     _palv = Version(Version(pa.__version__).base_version)
     11     pa_version_under10p1 = _palv < Version("10.0.1")
     12     pa_version_under11p0 = _palv < Version("11.0.0")

AttributeError: module 'pyarrow' has no attribute '__version__'

Issue Description

It appears pyarrow no longer has a __version__ attribute:

In [3]: import pyarrow as pa

In [4]: pa.__version__
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[4], line 1
----> 1 pa.__version__

AttributeError: module 'pyarrow' has no attribute '__version__'

In [6]: import importlib

In [8]: importlib.metadata.version('pyarrow')
Out[8]: '16.1.0'

In [9]: importlib.metadata.version('pandas')
Out[9]: '2.2.2'

Expected Behavior

I can import pandas when I have pyarrow 16.1.0 installed

Installed Versions

python 3.10.14
pandas 2.2.2
pyarrow 16.1.0

[!NOTE] Packages installed via conda-forge

dhirschfeld commented 4 days ago

Trying to hack around it leads to a ModuleNotFoundError: No module named 'pyarrow.compute' error.

Perhaps pyarrow needs to be pinned until the errors can be worked around :/

In [11]: pa.__version__ = '16.1.0'

In [12]: import pandas as pd
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[12], line 1
----> 1 import pandas as pd

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/__init__.py:49
     46 # let init-time option registration happen
     47 import pandas.core.config_init  # pyright: ignore[reportUnusedImport] # noqa: F401
---> 49 from pandas.core.api import (
     50     # dtype
     51     ArrowDtype,
     52     Int8Dtype,
     53     Int16Dtype,
     54     Int32Dtype,
     55     Int64Dtype,
     56     UInt8Dtype,
     57     UInt16Dtype,
     58     UInt32Dtype,
     59     UInt64Dtype,
     60     Float32Dtype,
     61     Float64Dtype,
     62     CategoricalDtype,
     63     PeriodDtype,
     64     IntervalDtype,
     65     DatetimeTZDtype,
     66     StringDtype,
     67     BooleanDtype,
     68     # missing
     69     NA,
     70     isna,
     71     isnull,
     72     notna,
     73     notnull,
     74     # indexes
     75     Index,
     76     CategoricalIndex,
     77     RangeIndex,
     78     MultiIndex,
     79     IntervalIndex,
     80     TimedeltaIndex,
     81     DatetimeIndex,
     82     PeriodIndex,
     83     IndexSlice,
     84     # tseries
     85     NaT,
     86     Period,
     87     period_range,
     88     Timedelta,
     89     timedelta_range,
     90     Timestamp,
     91     date_range,
     92     bdate_range,
     93     Interval,
     94     interval_range,
     95     DateOffset,
     96     # conversion
     97     to_numeric,
     98     to_datetime,
     99     to_timedelta,
    100     # misc
    101     Flags,
    102     Grouper,
    103     factorize,
    104     unique,
    105     value_counts,
    106     NamedAgg,
    107     array,
    108     Categorical,
    109     set_eng_float_format,
    110     Series,
    111     DataFrame,
    112 )
    114 from pandas.core.dtypes.dtypes import SparseDtype
    116 from pandas.tseries.api import infer_freq

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/core/api.py:28
     16 from pandas.core.dtypes.missing import (
     17     isna,
     18     isnull,
     19     notna,
     20     notnull,
     21 )
     23 from pandas.core.algorithms import (
     24     factorize,
     25     unique,
     26     value_counts,
     27 )
---> 28 from pandas.core.arrays import Categorical
     29 from pandas.core.arrays.boolean import BooleanDtype
     30 from pandas.core.arrays.floating import (
     31     Float32Dtype,
     32     Float64Dtype,
     33 )

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/core/arrays/__init__.py:1
----> 1 from pandas.core.arrays.arrow import ArrowExtensionArray
      2 from pandas.core.arrays.base import (
      3     ExtensionArray,
      4     ExtensionOpsMixin,
      5     ExtensionScalarOpsMixin,
      6 )
      7 from pandas.core.arrays.boolean import BooleanArray

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/core/arrays/arrow/__init__.py:1
----> 1 from pandas.core.arrays.arrow.accessors import (
      2     ListAccessor,
      3     StructAccessor,
      4 )
      5 from pandas.core.arrays.arrow.array import ArrowExtensionArray
      7 __all__ = ["ArrowExtensionArray", "StructAccessor", "ListAccessor"]

File /opt/python/envs/dev310/lib/python3.10/site-packages/pandas/core/arrays/arrow/accessors.py:23
     21 if not pa_version_under10p1:
     22     import pyarrow as pa
---> 23     import pyarrow.compute as pc
     25     from pandas.core.dtypes.dtypes import ArrowDtype
     27 if TYPE_CHECKING:

ModuleNotFoundError: No module named 'pyarrow.compute'
dhirschfeld commented 3 days ago

Corrupted package install, sorry for the noise.