modin-project / modin

Modin: Scale your Pandas workflows by changing a single line of code
http://modin.readthedocs.io
Apache License 2.0
9.61k stars 649 forks source link

Properly check that `modin/pandas/test/dataframe/test_binary.py` defaults to pandas #3955

Open devin-petersohn opened 2 years ago

devin-petersohn commented 2 years ago

We need to ensure that we are validating that the tests that default to pandas are indeed defaulting to pandas.

mvashishtha commented 2 years ago

Failures if we treat all default to pandas warnings as errors:

============================================= FAILURES ==============================================
__________________________ test_math_functions[pow-columns-series_or_list] __________________________
[gw3] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

other = <function <lambda> at 0x7ff391657710>, axis = 'columns', op = 'pow'

    @pytest.mark.parametrize(
        "other",
        [
            lambda df: 4,
            lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]),
        ],
        ids=["scalar", "series_or_list"],
    )
    @pytest.mark.parametrize("axis", ["rows", "columns"])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions(other, axis, op):
        data = test_data["float_nan_data"]
        if (op == "floordiv" or op == "rfloordiv") and axis == "rows":
            # lambda == "series_or_list"
            pytest.xfail(reason="different behaviour")

        if op == "rmod" and axis == "rows":
            # lambda == "series_or_list"
            pytest.xfail(reason="different behaviour")

        eval_general(
>           *create_test_dfs(data), lambda df: getattr(df, op)(other(df, axis), axis=axis)
        )

modin/pandas/test/dataframe/test_binary.py:69:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
modin/pandas/test/utils.py:719: in execute_callable
    md_result = fn(modin_df, **md_kwargs)
modin/pandas/test/dataframe/test_binary.py:69: in <lambda>
    *create_test_dfs(data), lambda df: getattr(df, op)(other(df, axis), axis=axis)
modin/pandas/dataframe.py:1620: in pow
    "pow", other, axis=axis, level=level, fill_value=fill_value
modin/pandas/base.py:454: in _default_to_pandas
    empty_self_str,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

cls = <class 'modin.error_message.ErrorMessage'>
message = '`DataFrame.pow` defaulting to pandas implementation.\nTo request implementation, send an email to feature_requests@modin.org.'

    @classmethod
    def default_to_pandas(cls, message=""):
        if message != "":
            message = "{} defaulting to pandas implementation.".format(message)
        else:
            message = "Defaulting to pandas implementation."

        if not cls.printed_request_implementation:
            message = (
                "{}\n".format(message)
                + "To request implementation, send an email to "
                + "feature_requests@modin.org."
            )
            cls.printed_request_implementation = True
>       warnings.warn(message)
E       UserWarning: `DataFrame.pow` defaulting to pandas implementation.
E       To request implementation, send an email to feature_requests@modin.org.

modin/error_message.py:55: UserWarning
__________________ test_math_functions_fill_value[pow-None-check_different_index] ___________________
[gw0] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

other = <function <lambda> at 0x7fbfe6336560>, fill_value = None, op = 'pow'

    @pytest.mark.parametrize(
        "other",
        [lambda df: df[: -(2 ** 4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
        ids=["check_missing_value", "check_different_index"],
    )
    @pytest.mark.parametrize("fill_value", [None, 3.0])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions_fill_value(other, fill_value, op):
        data = test_data["int_data"]
        modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

        eval_general(
            modin_df,
            pandas_df,
>           lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
        )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
modin/pandas/test/utils.py:719: in execute_callable
    md_result = fn(modin_df, **md_kwargs)
modin/pandas/test/dataframe/test_binary.py:93: in <lambda>
    lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
modin/pandas/dataframe.py:1620: in pow
    "pow", other, axis=axis, level=level, fill_value=fill_value
modin/pandas/base.py:454: in _default_to_pandas
    empty_self_str,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

cls = <class 'modin.error_message.ErrorMessage'>
message = '`DataFrame.pow` defaulting to pandas implementation.\nTo request implementation, send an email to feature_requests@modin.org.'

    @classmethod
    def default_to_pandas(cls, message=""):
        if message != "":
            message = "{} defaulting to pandas implementation.".format(message)
        else:
            message = "Defaulting to pandas implementation."

        if not cls.printed_request_implementation:
            message = (
                "{}\n".format(message)
                + "To request implementation, send an email to "
                + "feature_requests@modin.org."
            )
            cls.printed_request_implementation = True
>       warnings.warn(message)
E       UserWarning: `DataFrame.pow` defaulting to pandas implementation.
E       To request implementation, send an email to feature_requests@modin.org.

modin/error_message.py:55: UserWarning
_________________________ test_math_functions[rpow-columns-series_or_list] __________________________
[gw3] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

other = <function <lambda> at 0x7ff391657710>, axis = 'columns', op = 'rpow'

    @pytest.mark.parametrize(
        "other",
        [
            lambda df: 4,
            lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]),
        ],
        ids=["scalar", "series_or_list"],
    )
    @pytest.mark.parametrize("axis", ["rows", "columns"])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions(other, axis, op):
        data = test_data["float_nan_data"]
        if (op == "floordiv" or op == "rfloordiv") and axis == "rows":
            # lambda == "series_or_list"
            pytest.xfail(reason="different behaviour")

        if op == "rmod" and axis == "rows":
            # lambda == "series_or_list"
            pytest.xfail(reason="different behaviour")

        eval_general(
>           *create_test_dfs(data), lambda df: getattr(df, op)(other(df, axis), axis=axis)
        )

modin/pandas/test/dataframe/test_binary.py:69:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
modin/pandas/test/utils.py:719: in execute_callable
    md_result = fn(modin_df, **md_kwargs)
modin/pandas/test/dataframe/test_binary.py:69: in <lambda>
    *create_test_dfs(data), lambda df: getattr(df, op)(other(df, axis), axis=axis)
modin/pandas/dataframe.py:1848: in rpow
    "rpow", other, axis=axis, level=level, fill_value=fill_value
modin/pandas/base.py:454: in _default_to_pandas
    empty_self_str,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

cls = <class 'modin.error_message.ErrorMessage'>
message = '`DataFrame.rpow` defaulting to pandas implementation.'

    @classmethod
    def default_to_pandas(cls, message=""):
        if message != "":
            message = "{} defaulting to pandas implementation.".format(message)
        else:
            message = "Defaulting to pandas implementation."

        if not cls.printed_request_implementation:
            message = (
                "{}\n".format(message)
                + "To request implementation, send an email to "
                + "feature_requests@modin.org."
            )
            cls.printed_request_implementation = True
>       warnings.warn(message)
E       UserWarning: `DataFrame.rpow` defaulting to pandas implementation.

modin/error_message.py:55: UserWarning
___________________ test_math_functions_fill_value[pow-3.0-check_different_index] ___________________
[gw0] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

fn = <function test_math_functions_fill_value.<locals>.<lambda> at 0x7fbfe7d7d8c0>, inplace = False
md_kwargs = {}, pd_kwargs = {}

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
>           pd_result = fn(pandas_df, **pd_kwargs)

modin/pandas/test/utils.py:701:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

df =      col33  col34  col35  col36  col37  ...  col28  col29  col30  col31  index
0       51     79     61     16     61 ...5     66      2
255     69     93     62     74     37  ...     56     33     55     29     54

[256 rows x 64 columns]

>       lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
    )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self =      col33  col34  col35  col36  col37  ...  col28  col29  col30  col31  index
0       51     79     61     16     61 ...5     66      2
255     69     93     62     74     37  ...     56     33     55     29     54

[256 rows x 64 columns]
other = 0      51
1      92
2      14
3      71
4      60
       ..
251    15
252    96
253    72
254    58
255    69
Name: col33, Length: 256, dtype: int64
axis = 0, level = None, fill_value = 3.0

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if should_reindex_frame_op(
            self, other, op, axis, default_axis, fill_value, level
        ):
            return frame_arith_method_with_reindex(self, other, op)

        if isinstance(other, ABCSeries) and fill_value is not None:
            # TODO: We could allow this in cases where we end up going
            #  through the DataFrame path
>           raise NotImplementedError(f"fill_value {fill_value} not supported.")
E           NotImplementedError: fill_value 3.0 not supported.

../omnisci-datascience/lib/python3.7/site-packages/pandas/core/ops/__init__.py:428: NotImplementedError

During handling of the above exception, another exception occurred:

other = <function <lambda> at 0x7fbfe6336560>, fill_value = 3.0, op = 'pow'

    @pytest.mark.parametrize(
        "other",
        [lambda df: df[: -(2 ** 4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
        ids=["check_missing_value", "check_different_index"],
    )
    @pytest.mark.parametrize("fill_value", [None, 3.0])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions_fill_value(other, fill_value, op):
        data = test_data["int_data"]
        modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

        eval_general(
            modin_df,
            pandas_df,
>           lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
        )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

fn = <function test_math_functions_fill_value.<locals>.<lambda> at 0x7fbfe7d7d8c0>, inplace = False
md_kwargs = {}, pd_kwargs = {}

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
            pd_result = fn(pandas_df, **pd_kwargs)
        except Exception as pd_e:
            if check_exception_type is None:
                return None
            with pytest.raises(Exception) as md_e:
                # repr to force materialization
                repr(fn(modin_df, **md_kwargs))
            if check_exception_type:
                assert isinstance(
                    md_e.value, type(pd_e)
                ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
>                   type(md_e.value), type(pd_e)
                )
E               AssertionError: Got Modin Exception type <class 'UserWarning'>, but pandas Exception type <class 'NotImplementedError'> was expected

modin/pandas/test/utils.py:712: AssertionError
__________________ test_math_functions_fill_value[rpow-None-check_different_index] __________________
[gw0] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

other = <function <lambda> at 0x7fbfe6336560>, fill_value = None, op = 'rpow'

    @pytest.mark.parametrize(
        "other",
        [lambda df: df[: -(2 ** 4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
        ids=["check_missing_value", "check_different_index"],
    )
    @pytest.mark.parametrize("fill_value", [None, 3.0])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions_fill_value(other, fill_value, op):
        data = test_data["int_data"]
        modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

        eval_general(
            modin_df,
            pandas_df,
>           lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
        )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
modin/pandas/test/utils.py:719: in execute_callable
    md_result = fn(modin_df, **md_kwargs)
modin/pandas/test/dataframe/test_binary.py:93: in <lambda>
    lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
modin/pandas/dataframe.py:1848: in rpow
    "rpow", other, axis=axis, level=level, fill_value=fill_value
modin/pandas/base.py:454: in _default_to_pandas
    empty_self_str,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

cls = <class 'modin.error_message.ErrorMessage'>
message = '`DataFrame.rpow` defaulting to pandas implementation.'

    @classmethod
    def default_to_pandas(cls, message=""):
        if message != "":
            message = "{} defaulting to pandas implementation.".format(message)
        else:
            message = "Defaulting to pandas implementation."

        if not cls.printed_request_implementation:
            message = (
                "{}\n".format(message)
                + "To request implementation, send an email to "
                + "feature_requests@modin.org."
            )
            cls.printed_request_implementation = True
>       warnings.warn(message)
E       UserWarning: `DataFrame.rpow` defaulting to pandas implementation.

modin/error_message.py:55: UserWarning
__________________ test_math_functions_fill_value[rpow-3.0-check_different_index] ___________________
[gw0] darwin -- Python 3.7.10 /Users/maheshvashishtha/omnisci-datascience/bin/python3.7

fn = <function test_math_functions_fill_value.<locals>.<lambda> at 0x7fbfe7f4fe60>, inplace = False
md_kwargs = {}, pd_kwargs = {}

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
>           pd_result = fn(pandas_df, **pd_kwargs)

modin/pandas/test/utils.py:701:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

df =      col33  col34  col35  col36  col37  ...  col28  col29  col30  col31  index
0       51     79     61     16     61 ...5     66      2
255     69     93     62     74     37  ...     56     33     55     29     54

[256 rows x 64 columns]

>       lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
    )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self =      col33  col34  col35  col36  col37  ...  col28  col29  col30  col31  index
0       51     79     61     16     61 ...5     66      2
255     69     93     62     74     37  ...     56     33     55     29     54

[256 rows x 64 columns]
other = 0      51
1      92
2      14
3      71
4      60
       ..
251    15
252    96
253    72
254    58
255    69
Name: col33, Length: 256, dtype: int64
axis = 0, level = None, fill_value = 3.0

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if should_reindex_frame_op(
            self, other, op, axis, default_axis, fill_value, level
        ):
            return frame_arith_method_with_reindex(self, other, op)

        if isinstance(other, ABCSeries) and fill_value is not None:
            # TODO: We could allow this in cases where we end up going
            #  through the DataFrame path
>           raise NotImplementedError(f"fill_value {fill_value} not supported.")
E           NotImplementedError: fill_value 3.0 not supported.

../omnisci-datascience/lib/python3.7/site-packages/pandas/core/ops/__init__.py:428: NotImplementedError

During handling of the above exception, another exception occurred:

other = <function <lambda> at 0x7fbfe6336560>, fill_value = 3.0, op = 'rpow'

    @pytest.mark.parametrize(
        "other",
        [lambda df: df[: -(2 ** 4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
        ids=["check_missing_value", "check_different_index"],
    )
    @pytest.mark.parametrize("fill_value", [None, 3.0])
    @pytest.mark.parametrize(
        "op",
        [
            *("add", "radd", "sub", "rsub", "mod", "rmod", "pow", "rpow"),
            *("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"),
        ],
    )
    def test_math_functions_fill_value(other, fill_value, op):
        data = test_data["int_data"]
        modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

        eval_general(
            modin_df,
            pandas_df,
>           lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),
        )

modin/pandas/test/dataframe/test_binary.py:93:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
modin/pandas/test/utils.py:741: in eval_general
    operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

fn = <function test_math_functions_fill_value.<locals>.<lambda> at 0x7fbfe7f4fe60>, inplace = False
md_kwargs = {}, pd_kwargs = {}

    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
        try:
            pd_result = fn(pandas_df, **pd_kwargs)
        except Exception as pd_e:
            if check_exception_type is None:
                return None
            with pytest.raises(Exception) as md_e:
                # repr to force materialization
                repr(fn(modin_df, **md_kwargs))
            if check_exception_type:
                assert isinstance(
                    md_e.value, type(pd_e)
                ), "Got Modin Exception type {}, but pandas Exception type {} was expected".format(
>                   type(md_e.value), type(pd_e)
                )
E               AssertionError: Got Modin Exception type <class 'UserWarning'>, but pandas Exception type <class 'NotImplementedError'> was expected

modin/pandas/test/utils.py:712: AssertionError

---------- coverage: platform darwin, python 3.7.10-final-0 ----------
Coverage XML written to file coverage.xml

====================================== short test summary info ======================================
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions[pow-columns-series_or_list]
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions_fill_value[pow-None-check_different_index]
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions[rpow-columns-series_or_list]
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions_fill_value[pow-3.0-check_different_index]
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions_fill_value[rpow-None-check_different_index]
FAILED modin/pandas/test/dataframe/test_binary.py::test_math_functions_fill_value[rpow-3.0-check_different_index]