When we've already imported cuDF, the Ipython magic %reset -f --aggressive is no longer enough to clear the environment/sys modules of all pandas objects so that we can %load_ext cudf.pandas and use cuDF under the hood,
In [1]: import cudf
In [2]: %reset -f --aggressive
culling sys module...
In [3]: %load_ext cudf.pandas
/nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/numba/__init__.py:34: UserWarning: The NumPy module was reloaded (imported a second time). This can in some cases result in small but subtle issues and is discouraged.
import numpy as np
---------------------------------------------------------------------------
ArrowKeyError Traceback (most recent call last)
Cell In[3], line 1
----> 1 get_ipython().run_line_magic('load_ext', 'cudf.pandas')
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2456, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2454 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2455 with self.builtin_trap:
-> 2456 result = fn(*args, **kwargs)
2458 # The code below prevents the output from being displayed
2459 # when using magics with decorator @output_can_be_silenced
2460 # when the last Python token in the expression is a ';'.
2461 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/IPython/core/magics/extension.py:33, in ExtensionMagics.load_ext(self, module_str)
31 if not module_str:
32 raise UsageError('Missing module name.')
---> 33 res = self.shell.extension_manager.load_extension(module_str)
35 if res == 'already loaded':
36 print("The %s extension is already loaded. To reload it, use:" % module_str)
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/IPython/core/extensions.py:76, in ExtensionManager.load_extension(self, module_str)
69 """Load an IPython extension by its module name.
70
71 Returns the string "already loaded" if the extension is already loaded,
72 "no load function" if the module doesn't have a load_ipython_extension
73 function, or None if it succeeded.
74 """
75 try:
---> 76 return self._load_extension(module_str)
77 except ModuleNotFoundError:
78 if module_str in BUILTINS_EXTS:
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/IPython/core/extensions.py:91, in ExtensionManager._load_extension(self, module_str)
89 with self.shell.builtin_trap:
90 if module_str not in sys.modules:
---> 91 mod = import_module(module_str)
92 mod = sys.modules[module_str]
93 if self._call_load_ipython_extension(mod):
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:992, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1006, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:688, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:883, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/cudf/__init__.py:19
16 from rmm.allocators.cupy import rmm_cupy_allocator
17 from rmm.allocators.numba import RMMNumbaManager
---> 19 from cudf import api, core, datasets, testing
20 from cudf._version import __git_commit__, __version__
21 from cudf.api.extensions import (
22 register_dataframe_accessor,
23 register_index_accessor,
24 register_series_accessor,
25 )
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/cudf/api/__init__.py:3
1 # Copyright (c) 2021, NVIDIA CORPORATION.
----> 3 from cudf.api import extensions, types
5 __all__ = ["extensions", "types"]
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/cudf/api/types.py:18
15 from pandas.api import types as pd_types
17 import cudf
---> 18 from cudf.core.dtypes import ( # noqa: F401
19 _BaseDtype,
20 dtype,
21 is_categorical_dtype,
22 is_decimal32_dtype,
23 is_decimal64_dtype,
24 is_decimal128_dtype,
25 is_decimal_dtype,
26 is_interval_dtype,
27 is_list_dtype,
28 is_struct_dtype,
29 )
32 def is_numeric_dtype(obj):
33 """Check whether the provided array or dtype is of a numeric dtype.
34
35 Parameters
(...)
43 Whether or not the array or dtype is of a numeric dtype.
44 """
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/cudf/core/dtypes.py:28
25 from cudf.utils.docutils import doc_apply
27 if PANDAS_GE_150:
---> 28 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
29 else:
30 from pandas.core.arrays._arrow_utils import ArrowIntervalType
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/pandas/core/arrays/arrow/extension_types.py:49
47 # register the type with a dummy instance
48 _period_type = ArrowPeriodType("D")
---> 49 pyarrow.register_extension_type(_period_type)
52 class ArrowIntervalType(pyarrow.ExtensionType):
53 def __init__(self, subtype, closed: IntervalClosedType) -> None:
54 # attributes need to be set first before calling
55 # super init (as that calls serialize)
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/pyarrow/types.pxi:1836, in pyarrow.lib.register_extension_type()
File /nvme/0/pgali/envs/cudfdev/lib/python3.10/site-packages/pyarrow/error.pxi:91, in pyarrow.lib.check_status()
ArrowKeyError: A type extension with name pandas.period already defined
Instead, we need to explicitly restart the kernel to use cudf.pandas , which we can do via in IPython via get_ipython().kernel.do_shutdown(restart=True).
This isn't a big deal, as we shouldn't rely on this kind of reset based behavior anyway. But, I believe this is a change in behavior and we may want to look into it down the road.
This will never work since culling things from sys.modules does not dlclose the compiled extension modules that were loaded, so any state that is maintained in these shared libraries will persist.
When we've already imported cuDF, the Ipython magic
%reset -f --aggressive
is no longer enough to clear the environment/sys modules of all pandas objects so that we can%load_ext cudf.pandas
and use cuDF under the hood,Instead, we need to explicitly restart the kernel to use
cudf.pandas
, which we can do via in IPython viaget_ipython().kernel.do_shutdown(restart=True)
.This isn't a big deal, as we shouldn't rely on this kind of
reset
based behavior anyway. But, I believe this is a change in behavior and we may want to look into it down the road.