rapidsai / cuml

cuML - RAPIDS Machine Learning Library
https://docs.rapids.ai/api/cuml/stable/
Apache License 2.0
4.27k stars 536 forks source link

[BUG] UserWarning: Error getting driver and runtime versions: #5855

Open dysartk opened 7 months ago

dysartk commented 7 months ago

Describe the bug After installing Rapids using the instructed command in Conda and string to import cuml I get the below error. The installation went seemingly well without errors. The environment seems acceptable.

I am running Ubuntu 22.04 Nvidia 2070 Super Driver 550.67 Cuda 12.4

Steps/Code to reproduce bug import cuml

Error

/home/kd/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/_ptxcompiler.py:61: UserWarning: Error getting driver and runtime versions:

stdout:

stderr:

Traceback (most recent call last): File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 254, in ensure_initialized self.cuInit(0) File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 327, in safe_cuda_api_call self._check_ctypes_error(fname, retcode) File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 395, in _check_ctypes_error raise CudaAPIError(retcode, msg) numba.cuda.cudadrv.driver.CudaAPIError: [999] Call to cuInit results in CUDA_ERROR_UNKNOWN

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "", line 4, in File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 292, in getattr self.ensure_initialized() File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 258, in ensure_initialized raise CudaSupportError(f"Error at driver init: {description}") numba.cuda.cudadrv.error.CudaSupportError: Error at driver init: Call to cuInit results in CUDA_ERROR_UNKNOWN (999)

Not patching Numba warnings.warn(msg, UserWarning)

CUDARuntimeError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import cuml 2 from cupy import asnumpy 3 from joblib import dump, load

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/init.py:17 1 # 2 # Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # ---> 17 from cuml.internals.base import Base, UniversalBase 18 from cuml.internals.available_devices import is_cuda_available 20 # GPU only packages

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/init.py:18 1 # 2 # Copyright (c) 2019-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 17 from cuml.internals.available_devices import is_cuda_available ---> 18 from cuml.internals.base_helpers import BaseMetaClass, _tags_class_and_instance 19 from cuml.internals.api_decorators import ( 20 _deprecate_pos_args, 21 api_base_fit_transform, (...) 33 exit_internal_api, 34 ) 35 from cuml.internals.api_context_managers import ( 36 in_internal_api, 37 set_api_output_dtype, 38 set_api_output_type, 39 )

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/base_helpers.py:20 17 from inspect import Parameter, signature 18 import typing ---> 20 from cuml.internals.api_decorators import ( 21 api_base_return_generic, 22 api_base_return_array, 23 api_base_return_sparse_array, 24 api_base_return_any, 25 api_return_any, 26 _deprecate_pos_args, 27 ) 28 from cuml.internals.array import CumlArray 29 from cuml.internals.array_sparse import SparseCumlArray

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/api_decorators.py:24 21 import warnings 23 # TODO: Try to resolve circular import that makes this necessary: ---> 24 from cuml.internals import input_utils as iu 25 from cuml.internals.api_context_managers import BaseReturnAnyCM 26 from cuml.internals.api_context_managers import BaseReturnArrayCM

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/input_utils.py:19 1 # 2 # Copyright (c) 2019-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 17 from collections import namedtuple ---> 19 from cuml.internals.array import CumlArray 20 from cuml.internals.array_sparse import SparseCumlArray 21 from cuml.internals.global_settings import GlobalSettings

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/array.py:21 18 import operator 19 import pickle ---> 21 from cuml.internals.global_settings import GlobalSettings 22 from cuml.internals.logger import debug 23 from cuml.internals.mem_type import MemoryType, MemoryTypeError

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/global_settings.py:20 18 import threading 19 from cuml.internals.available_devices import is_cuda_available ---> 20 from cuml.internals.device_type import DeviceType 21 from cuml.internals.mem_type import MemoryType 22 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/device_type.py:19 1 # 2 # Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 # (...) 14 # limitations under the License. 15 # 18 from enum import Enum, auto ---> 19 from cuml.internals.mem_type import MemoryType 22 class DeviceTypeError(Exception): 23 """An exception thrown to indicate bad device type selection"""

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/mem_type.py:22 19 from cuml.internals.device_support import GPU_ENABLED 20 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import ---> 22 cudf = gpu_only_import("cudf") 23 cp = gpu_only_import("cupy") 24 cpx_sparse = gpu_only_import("cupyx.scipy.sparse")

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/safe_imports.py:356, in gpu_only_import(module, alt) 330 """A function used to import modules required only in GPU installs 331 332 This function will attempt to import a module with the given name, but it (...) 353 UnavailableMeta. 354 """ 355 if GPU_ENABLED: --> 356 return importlib.import_module(module) 357 else: 358 return safe_import( 359 module, 360 msg=f"{module} is not installed in non GPU-enabled installations", 361 alt=alt, 362 )

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/importlib/init.py:126, in import_module(name, package) 124 break 125 level += 1 --> 126 return _bootstrap._gcd_import(name[level:], package, level)

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/init.py:10 7 from cudf.utils.gpu_utils import validate_setup 9 _setup_numba() ---> 10 validate_setup() 12 import cupy 13 from numba import config as numba_config, cuda

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:55, in validate_setup() 53 except CUDARuntimeError as e: 54 if e.status in notify_caller_errors: ---> 55 raise e 56 # If there is no GPU detected, set gpus_count to -1 57 gpus_count = -1

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:52, in validate_setup() 31 notify_caller_errors = { 32 cudaError_t.cudaErrorInitializationError, 33 cudaError_t.cudaErrorInsufficientDriver, (...) 48 cudaError_t.cudaErrorApiFailureBase, 49 } 51 try: ---> 52 gpus_count = getDeviceCount() 53 except CUDARuntimeError as e: 54 if e.status in notify_caller_errors:

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/rmm/_cuda/gpu.py:102, in getDeviceCount()

dantegd commented 7 months ago

Thanks for the issue @dysartk, the driver and GPU should be fine so this is an unexpected issue. It seems to be failing to do some basic CUDA calls, is there a chance you are using conda inside a docker container? Otherwise could you try running https://github.com/rapidsai/cuml/blob/branch-24.06/print_env.sh and put the output of that here?