vega / altair

Declarative statistical visualization library for Python
https://altair-viz.github.io/
BSD 3-Clause "New" or "Revised" License
9.39k stars 795 forks source link

test: Add missing `@skip_requires_pyarrow(requires_tzdata=True)` #3674

Closed dangotbanned closed 2 weeks ago

dangotbanned commented 2 weeks ago

I missed this one during https://github.com/vega/altair/pull/3672#discussion_r1827053789

Already fixed on my other active PRs (https://github.com/vega/altair/pull/3664/commits/a375ab5d9d2831a911016b057f5ab57f61e3ebe7, https://github.com/vega/altair/pull/3618/commits/565f2719f1f6af53abaae6c13c367fba04378789)

Error doesn't get picked up during CI but locally on windows results in the error below:

F                                                                        [100%]
================================== FAILURES ===================================
_____________________ test_sanitize_pyarrow_table_columns _____________________
[gw0] win32 -- Python 3.12.3 C:\Users\danie\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Scripts\python.exe

    @skip_requires_pyarrow
    def test_sanitize_pyarrow_table_columns() -> None:
        import pyarrow as pa

        # create a dataframe with various types
        df = pd.DataFrame(
            {
                "s": list("abcde"),
                "f": np.arange(5, dtype=float),
                "i": np.arange(5, dtype=int),
                "b": np.array([True, False, True, True, False]),
                "d": pd.date_range("2012-01-01", periods=5, freq="h"),
                "c": pd.Series(list("ababc"), dtype="category"),
                "p": pd.date_range("2012-01-01", periods=5, freq="h").tz_localize("UTC"),
            }
        )

        # Create pyarrow table with explicit schema so that date32 type is preserved
        pa_table = pa.Table.from_pandas(
            df,
            pa.schema(
                [
                    pa.field("s", pa.string()),
                    pa.field("f", pa.float64()),
                    pa.field("i", pa.int64()),
                    pa.field("b", pa.bool_()),
                    pa.field("d", pa.date32()),
                    pa.field("c", pa.dictionary(pa.int8(), pa.string())),
                    pa.field("p", pa.timestamp("ns", tz="UTC")),
                ]
            ),
        )
>       sanitized = sanitize_narwhals_dataframe(nw.from_native(pa_table, eager_only=True))

tests\utils\test_utils.py:154: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
altair\utils\core.py:494: in sanitize_narwhals_dataframe
    return data.select(columns)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\dataframe.py:1500: in select
    return super().select(*exprs, **named_exprs)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\dataframe.py:128: in select
    self._compliant_frame.select(*exprs, **named_exprs),
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_arrow\dataframe.py:292: in select
    new_series = evaluate_into_exprs(self, *exprs, **named_exprs)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_expression_parsing.py:96: in evaluate_into_exprs
    series: ListOfCompliantSeries = [  # type: ignore[assignment]
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_expression_parsing.py:98: in <genexpr>
    for sublist in (evaluate_into_expr(df, into_expr) for into_expr in exprs)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_expression_parsing.py:63: in evaluate_into_expr
    return expr._call(df)  # type: ignore[arg-type]
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_expression_parsing.py:287: in <lambda>
    getattr(getattr(series, series_namespace), attr)(*args, **kwargs)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\narwhals\_arrow\series.py:756: in to_string
    pc.strftime(self._arrow_series._native_series, format)
..\..\..\AppData\Local\hatch\env\virtual\altair\CXM7NV9I\doc\Lib\site-packages\pyarrow\compute.py:264: in wrapper
    return func.call(args, options, memory_pool)
pyarrow\\_compute.pyx:393: in pyarrow._compute.Function.call
    ???
pyarrow\\error.pxi:155: in pyarrow.lib.pyarrow_internal_check_status
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

>   ???
E   pyarrow.lib.ArrowInvalid: Cannot locate timezone 'UTC': Timezone database not found at "C:\Users\danie\Downloads\tzdata"

pyarrow\\error.pxi:92: ArrowInvalid
=========================== short test summary info ===========================
FAILED tests/utils/test_utils.py::test_sanitize_pyarrow_table_columns - pyarrow.lib.ArrowInvalid: Cannot locate timezone 'UTC': Timezone database n...
============================== 1 failed in 2.19s ==============================