Open dysartk opened 7 months ago
Thanks for the issue @dysartk, the driver and GPU should be fine so this is an unexpected issue. It seems to be failing to do some basic CUDA calls, is there a chance you are using conda inside a docker container? Otherwise could you try running https://github.com/rapidsai/cuml/blob/branch-24.06/print_env.sh and put the output of that here?
Describe the bug After installing Rapids using the instructed command in Conda and string to import cuml I get the below error. The installation went seemingly well without errors. The environment seems acceptable.
I am running Ubuntu 22.04 Nvidia 2070 Super Driver 550.67 Cuda 12.4
Steps/Code to reproduce bug import cuml
Error
/home/kd/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/_ptxcompiler.py:61: UserWarning: Error getting driver and runtime versions:
stdout:
stderr:
Traceback (most recent call last): File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 254, in ensure_initialized self.cuInit(0) File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 327, in safe_cuda_api_call self._check_ctypes_error(fname, retcode) File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 395, in _check_ctypes_error raise CudaAPIError(retcode, msg) numba.cuda.cudadrv.driver.CudaAPIError: [999] Call to cuInit results in CUDA_ERROR_UNKNOWN
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "", line 4, in
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 292, in getattr
self.ensure_initialized()
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 258, in ensure_initialized
raise CudaSupportError(f"Error at driver init: {description}")
numba.cuda.cudadrv.error.CudaSupportError: Error at driver init: Call to cuInit results in CUDA_ERROR_UNKNOWN (999)
Not patching Numba warnings.warn(msg, UserWarning)
CUDARuntimeError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import cuml 2 from cupy import asnumpy 3 from joblib import dump, load
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/init.py:17 1 # 2 # Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # ---> 17 from cuml.internals.base import Base, UniversalBase 18 from cuml.internals.available_devices import is_cuda_available 20 # GPU only packages
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/init.py:18 1 # 2 # Copyright (c) 2019-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 17 from cuml.internals.available_devices import is_cuda_available ---> 18 from cuml.internals.base_helpers import BaseMetaClass, _tags_class_and_instance 19 from cuml.internals.api_decorators import ( 20 _deprecate_pos_args, 21 api_base_fit_transform, (...) 33 exit_internal_api, 34 ) 35 from cuml.internals.api_context_managers import ( 36 in_internal_api, 37 set_api_output_dtype, 38 set_api_output_type, 39 )
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/base_helpers.py:20 17 from inspect import Parameter, signature 18 import typing ---> 20 from cuml.internals.api_decorators import ( 21 api_base_return_generic, 22 api_base_return_array, 23 api_base_return_sparse_array, 24 api_base_return_any, 25 api_return_any, 26 _deprecate_pos_args, 27 ) 28 from cuml.internals.array import CumlArray 29 from cuml.internals.array_sparse import SparseCumlArray
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/api_decorators.py:24 21 import warnings 23 # TODO: Try to resolve circular import that makes this necessary: ---> 24 from cuml.internals import input_utils as iu 25 from cuml.internals.api_context_managers import BaseReturnAnyCM 26 from cuml.internals.api_context_managers import BaseReturnArrayCM
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/input_utils.py:19 1 # 2 # Copyright (c) 2019-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 17 from collections import namedtuple ---> 19 from cuml.internals.array import CumlArray 20 from cuml.internals.array_sparse import SparseCumlArray 21 from cuml.internals.global_settings import GlobalSettings
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/array.py:21 18 import operator 19 import pickle ---> 21 from cuml.internals.global_settings import GlobalSettings 22 from cuml.internals.logger import debug 23 from cuml.internals.mem_type import MemoryType, MemoryTypeError
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/global_settings.py:20 18 import threading 19 from cuml.internals.available_devices import is_cuda_available ---> 20 from cuml.internals.device_type import DeviceType 21 from cuml.internals.mem_type import MemoryType 22 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/device_type.py:19 1 # 2 # Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 18 from enum import Enum, auto ---> 19 from cuml.internals.mem_type import MemoryType 22 class DeviceTypeError(Exception): 23 """An exception thrown to indicate bad device type selection"""
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/mem_type.py:22 19 from cuml.internals.device_support import GPU_ENABLED 20 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import ---> 22 cudf = gpu_only_import("cudf") 23 cp = gpu_only_import("cupy") 24 cpx_sparse = gpu_only_import("cupyx.scipy.sparse")
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/safe_imports.py:356, in gpu_only_import(module, alt) 330 """A function used to import modules required only in GPU installs 331 332 This function will attempt to import a module with the given name, but it (...) 353 UnavailableMeta. 354 """ 355 if GPU_ENABLED: --> 356 return importlib.import_module(module) 357 else: 358 return safe_import( 359 module, 360 msg=f"{module} is not installed in non GPU-enabled installations", 361 alt=alt, 362 )
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/importlib/init.py:126, in import_module(name, package) 124 break 125 level += 1 --> 126 return _bootstrap._gcd_import(name[level:], package, level)
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/init.py:10 7 from cudf.utils.gpu_utils import validate_setup 9 _setup_numba() ---> 10 validate_setup() 12 import cupy 13 from numba import config as numba_config, cuda
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:55, in validate_setup() 53 except CUDARuntimeError as e: 54 if e.status in notify_caller_errors: ---> 55 raise e 56 # If there is no GPU detected, set
gpus_count
to -1 57 gpus_count = -1File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:52, in validate_setup() 31 notify_caller_errors = { 32 cudaError_t.cudaErrorInitializationError, 33 cudaError_t.cudaErrorInsufficientDriver, (...) 48 cudaError_t.cudaErrorApiFailureBase, 49 } 51 try: ---> 52 gpus_count = getDeviceCount() 53 except CUDARuntimeError as e: 54 if e.status in notify_caller_errors:
File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/rmm/_cuda/gpu.py:102, in getDeviceCount()