Closed adryan-ai closed 1 year ago
downgrading to CUDA 1.18 allowed this to work, seems that support for 1.21 is not working as expected
I used conda's cudatoolkit, which happens to be cuda 11.8. And I used torch with cuda11.7 (that is ok that different). So yes, I wouldn't know if cuda12.1 works for windows since I didn't get it to work either and had to use those steps I wrote in the readme.md file.
same problem for win11 laptop. Without 8bit=Ture statement it works fine but with it gives the same problem for cuda 12.2 but just for this problem I would never downgrade and download the cuda again. I hope we can figure out a way to solve it. Not just this but I also cannot get the langchain command to work too
@YavuzCan35 Try the windows instructions but, when it comes to anything to compile, download the cuda versions of the wheels that have been prebuilt. That cuda version doesn't need to match your cuda version.
e.g. for llama_cpp_python, autogpt, exllama
Hi,
Followed the instal as per for Windows and it runs fine without "--load_8bit=True"
Trying to get it to run with "--load_8bit=True" following the extra instructions as:-
pip uninstall bitsandbytes pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.39.0-py3-none-any.whl
run with : python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b --load_8bit=True
I get the following error
Using Model h2oai/h2ogpt-oig-oasst1-512-6_9b device_map: {'': 0} bin C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll False C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cuda_setup\main.py:156: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {WindowsPath('C:/Users/alexa/MiniConda3/envs/h2ogptdev/bin')} warn(msg) C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cuda_setup\main.py:156: UserWarning: C:\Users\alexa\MiniConda3\envs\h2ogptdev did not contain ['cudart64_110.dll', 'cudart64_120.dll', 'cudart64_12.dll'] as expected! Searching further paths... warn(msg) CUDA SETUP: CUDA runtime path found: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin\cudart64_12.dll CUDA SETUP: Highest compute capability among GPUs detected: 8.6 CUDA SETUP: Detected CUDA version 121 CUDA SETUP: Loading binary C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll... Could not find module 'C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll' (or one of its dependencies). Try using the full path with constructor syntax. CUDA SETUP: Something unexpected happened. Please compile from source: git clone git@github.com:TimDettmers/bitsandbytes.git cd bitsandbytes CUDA_VERSION=121 python setup.py install ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\alexa\h2ogpt\generate.py:1810 in │
│ │
│ 1807 │
│ 1808 │
│ 1809 if name == "main": │
│ ❱ 1810 │ entrypoint_main() │
│ 1811 │
│ │
│ C:\Users\alexa\h2ogpt\generate.py:1806 in entrypoint_main │
│ │
│ 1803 │ │
│ 1804 │ python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b │
│ 1805 │ """ │
│ ❱ 1806 │ fire.Fire(main) │
│ 1807 │
│ 1808 │
│ 1809 if name == "main": │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\fire\core.py:141 in Fire │
│ │
│ 138 │ context.update(caller_globals) │
│ 139 │ context.update(caller_locals) │
│ 140 │
│ ❱ 141 component_trace = _Fire(component, args, parsed_flag_args, context, name) │
│ 142 │
│ 143 if component_trace.HasError(): │
│ 144 │ _DisplayError(component_trace) │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\fire\core.py:475 in _Fire │
│ │
│ 472 │ is_class = inspect.isclass(component) │
│ 473 │ │
│ 474 │ try: │
│ ❱ 475 │ │ component, remaining_args = _CallAndUpdateTrace( │
│ 476 │ │ │ component, │
│ 477 │ │ │ remaining_args, │
│ 478 │ │ │ component_trace, │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\fire\core.py:691 in │
│ _CallAndUpdateTrace │
│ │
│ 688 │ loop = asyncio.get_event_loop() │
│ 689 │ component = loop.run_until_complete(fn(*varargs, kwargs)) │
│ 690 else: │
│ ❱ 691 │ component = fn(varargs, kwargs) │
│ 692 │
│ 693 if treatment == 'class': │
│ 694 │ action = trace.INSTANTIATED_CLASS │
│ │
│ C:\Users\alexa\h2ogpt\generate.py:485 in main │
│ │
│ 482 │ │ # get default model │
│ 483 │ │ all_kwargs = locals().copy() │
│ 484 │ │ if all_kwargs.get('base_model') and not all_kwargs['login_mode_if_model0']: │
│ ❱ 485 │ │ │ model0, tokenizer0, device = get_model(reward_type=False, │
│ 486 │ │ │ │ │ │ │ │ │ │ │ │ get_kwargs(get_model, exclude_names │
│ 487 │ │ else: │
│ 488 │ │ │ # if empty model, then don't load anything, just get gradio up │
│ │
│ C:\Users\alexa\h2ogpt\generate.py:743 in get_model │
│ │
│ 740 │ │ │ │ │
│ 741 │ │ │ │ if infer_devices: │
│ 742 │ │ │ │ │ config, model = get_config(base_model, return_model=True, config_k │
│ ❱ 743 │ │ │ │ │ model = get_non_lora_model(base_model, model_loader, load_half, mode │
│ 744 │ │ │ │ │ │ │ │ │ │ │ config, model, │
│ 745 │ │ │ │ │ │ │ │ │ │ │ gpu_id=gpu_id, │
│ 746 │ │ │ │ │ │ │ │ │ │ │ ) │
│ │
│ C:\Users\alexa\h2ogpt\generate.py:595 in get_non_lora_model │
│ │
│ 592 │ pop_unused_model_kwargs(model_kwargs) │
│ 593 │ │
│ 594 │ if load_in_8bit or load_in_4bit or not load_half: │
│ ❱ 595 │ │ model = model_loader.from_pretrained( │
│ 596 │ │ │ base_model, │
│ 597 │ │ │ config=config, │
│ 598 │ │ │ model_kwargs, │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\transformers\models\auto\auto_factory │
│ .py:471 in from_pretrained │
│ │
│ 468 │ │ │ ) │
│ 469 │ │ elif type(config) in cls._model_mapping.keys(): │
│ 470 │ │ │ model_class = _get_model_class(config, cls._model_mapping) │
│ ❱ 471 │ │ │ return model_class.from_pretrained( │
│ 472 │ │ │ │ pretrained_model_name_or_path, model_args, config=config, hub_kwargs, │
│ 473 │ │ │ ) │
│ 474 │ │ raise ValueError( │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\transformers\modeling_utils.py:2639 │
│ in from_pretrained │
│ │
│ 2636 │ │ │ keep_in_fp32_modules = [] │
│ 2637 │ │ │
│ 2638 │ │ if load_in_8bit: │
│ ❱ 2639 │ │ │ from .utils.bitsandbytes import get_keys_to_not_convert, replace_8bit_linear │
│ 2640 │ │ │ │
│ 2641 │ │ │ load_in_8bit_skip_modules = quantization_config.llm_int8_skip_modules │
│ 2642 │ │ │ load_in_8bit_threshold = quantization_config.llm_int8_threshold │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\transformers\utils\bitsandbytes.py:9 │
│ in │
│ │
│ 6 │
│ 7 │
│ 8 if is_bitsandbytes_available(): │
│ ❱ 9 │ import bitsandbytes as bnb │
│ 10 │ import torch │
│ 11 │ import torch.nn as nn │
│ 12 │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes__init__.py:6 in │
│ │
│ │
│ 3 # This source code is licensed under the MIT license found in the │
│ 4 # LICENSE file in the root directory of this source tree. │
│ 5 │
│ ❱ 6 from . import cuda_setup, utils, research │
│ 7 from .autograd._functions import ( │
│ 8 │ MatmulLtState, │
│ 9 │ bmm_cublas, │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research__init.py:1 │
│ in │
│ │
│ ❱ 1 from . import nn │
│ 2 from .autograd._functions import ( │
│ 3 │ switchback_bnb, │
│ 4 │ matmul_fp8_global, │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research\nn\ init.py: │
│ 1 in │
│ │
│ ❱ 1 from .modules import LinearFP8Mixed, LinearFP8Global │
│ 2 │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research\nn\modules.py:8 │
│ in │
│ │
│ 5 from torch import Tensor, device, dtype, nn │
│ 6 │
│ 7 import bitsandbytes as bnb │
│ ❱ 8 from bitsandbytes.optim import GlobalOptimManager │
│ 9 from bitsandbytes.utils import OutlierTracer, find_outlier_dims │
│ 10 │
│ 11 T = TypeVar("T", bound="torch.nn.Module") │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\optim\ init__.py:6 in │
│ │
│ │
│ 3 # This source code is licensed under the MIT license found in the │
│ 4 # LICENSE file in the root directory of this source tree. │
│ 5 │
│ ❱ 6 from bitsandbytes.cextension import COMPILED_WITH_CUDA │
│ 7 │
│ 8 from .adagrad import Adagrad, Adagrad8bit, Adagrad32bit │
│ 9 from .adam import Adam, Adam8bit, Adam32bit, PagedAdam, PagedAdam8bit, PagedAdam32bit │
│ │
│ C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cextension.py:20 in │
│ │
│ │
│ 17 │ if lib is None and torch.cuda.is_available(): │
│ 18 │ │ CUDASetup.get_instance().generate_instructions() │
│ 19 │ │ CUDASetup.get_instance().print_log_stack() │
│ ❱ 20 │ │ raise RuntimeError(''' │
│ 21 │ │ CUDA Setup failed despite GPU being available. Please run the following command │
│ 22 │ │ │
│ 23 │ │ python -m bitsandbytes │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError:
CUDA Setup failed despite GPU being available. Please run the following command to get more information:
Running python -m bitsandbytes gives the following
===================================BUG REPORT=================================== Welcome to bitsandbytes. For bug reports, please run
python -m bitsandbytes
and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll False C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cuda_setup\main.py:156: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {WindowsPath('C:/Users/alexa/MiniConda3/envs/h2ogptdev/bin')} warn(msg) C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cuda_setup\main.py:156: UserWarning: C:\Users\alexa\MiniConda3\envs\h2ogptdev did not contain ['cudart64_110.dll', 'cudart64_120.dll', 'cudart64_12.dll'] as expected! Searching further paths... warn(msg) CUDA SETUP: CUDA runtime path found: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin\cudart64_12.dll CUDA SETUP: Highest compute capability among GPUs detected: 8.6 CUDA SETUP: Detected CUDA version 121 CUDA SETUP: Loading binary C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll... Could not find module 'C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll' (or one of its dependencies). Try using the full path with constructor syntax. CUDA SETUP: Something unexpected happened. Please compile from source: git clone git@github.com:TimDettmers/bitsandbytes.git cd bitsandbytes CUDA_VERSION=121 python setup.py install Traceback (most recent call last): File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\runpy.py", line 187, in _run_module_as_main mod_name, mod_spec, code = _get_module_details(mod_name, _Error) File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\runpy.py", line 146, in _get_module_details return _get_module_details(pkg_main_name, error) File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\runpy.py", line 110, in _get_module_details import(pkg_name) File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes__init__.py", line 6, in
from . import cuda_setup, utils, research
File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research__init.py", line 1, in
from . import nn
File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research\nn\ init.py", line 1, in
from .modules import LinearFP8Mixed, LinearFP8Global
File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\research\nn\modules.py", line 8, in
from bitsandbytes.optim import GlobalOptimManager
File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\optim\ init__.py", line 6, in
from bitsandbytes.cextension import COMPILED_WITH_CUDA
File "C:\Users\alexa\MiniConda3\envs\h2ogptdev\lib\site-packages\bitsandbytes\cextension.py", line 20, in
raise RuntimeError('''
RuntimeError:
CUDA Setup failed despite GPU being available. Please run the following command to get more information: