Open okrueger2 opened 1 year ago
I'd say it's because of Compute capability < 7.5, too old a GPU.
From your logs: "using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt"
Or you have to find the way to enable state.force_no_igemmlt
Thank you very much, that helps. Reviewed requirements for bitsandbytes, it requires at least a 20xx card. Time for an upgrade I'd say.
i am encoutering the same issue here during generate.py with a 8.6 Compute capable GPU
CUDA SETUP: Highest compute capability among GPUs detected: 8.6 CUDA SETUP: Detected CUDA version 118 CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so... Loading checkpoint shards: 100%|██████████| 33/33 [00:07<00:00, 4.16it/s] /usr/local/lib/python3.10/dist-packages/gradio/inputs.py:27: UserWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components warnings.warn( /usr/local/lib/python3.10/dist-packages/gradio/deprecation.py:40: UserWarning:
optional
parameter is deprecated, and it has no effect warnings.warn(value) /usr/local/lib/python3.10/dist-packages/gradio/deprecation.py:40: UserWarning:numeric
parameter is deprecated, and it has no effect warnings.warn(value) /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1405: UserWarning: You are calling .generate() with theinput_ids
being on a device type different than your model's device.input_ids
is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have putinput_ids
to the correct device by calling for example input_ids = input_ids.to('cpu') before running.generate()
. warnings.warn( Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/gradio/routes.py", line 393, in run_predict output = await app.get_blocks().process_api( File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1108, in process_api result = await self.call_function( File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 915, in call_function prediction = await anyio.to_thread.run_sync( File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 31, in run_sync return await get_asynclib().run_sync_in_worker_thread( File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread return await future File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 867, in run result = context.run(func, *args) File "/workspace/generate.py", line 107, in evaluate generation_output = model.generate( File "/usr/local/lib/python3.10/dist-packages/peft/peft_model.py", line 627, in generate outputs = self.base_model.generate(kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 1524, in generate return self.beam_search( File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 2810, in beam_search outputs = self( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(*args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 687, in forward outputs = self.model( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 577, in forward layer_outputs = decoder_layer( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 292, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(*args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 196, in forward query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/peft/tuners/lora.py", line 576, in forward result = super().forward(x) File "/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py", line 242, in forward out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state) File "/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul return MatMul8bitLt.apply(A, B, out, bias, state) File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 506, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py", line 273, in forward using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt File "/usr/local/lib/python3.10/dist-packages/torch/cuda/init.py", line 390, in get_device_capability prop = get_device_properties(device) File "/usr/local/lib/python3.10/dist-packages/torch/cuda/init.py", line 405, in get_device_properties device = _get_device_index(device, optional=True) File "/usr/local/lib/python3.10/dist-packages/torch/cuda/_utils.py", line 32, in _get_device_index raise ValueError('Expected a cuda device, but got: {}'.format(device)) ValueError: Expected a cuda device, but got: cpu
have you been able to resolve this? I'm facing the same issue
No. The card is just to old and does not have the required feature set. Time for an upgrade
On Fri, Jun 30, 2023 at 11:56 AM mvuthegoat @.***> wrote:
have you been able to resolve this? I'm facing the same issue
— Reply to this email directly, view it on GitHub https://github.com/tloen/alpaca-lora/issues/216#issuecomment-1614423568, or unsubscribe https://github.com/notifications/unsubscribe-auth/AOOEQ62ZB5AM5E4RZFPXUZ3XN2PGJANCNFSM6AAAAAAWMFZU6U . You are receiving this because you authored the thread.Message ID: @.***>
Dear all,
executing finetuning.py consistantly leads for me to the above error. I would be very thankful for a hint that leads me to the right direction. The log was taken from Ubuntu 22.04 machine, but was also seen on a Windows machine using Ubuntu LSM.
python3 finetune.py --base_model 'decapoda-research/llama-7b-hf' --data_path 'yahma/alpaca-cleaned' --output_dir './lora-alpaca'
Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
/home/home/.local/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('local/home-H170N-WIFI'), PosixPath('@/tmp/.ICE-unix/3847,unix/home-H170N-WIFI')} warn(msg) /home/home/.local/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('1'), PosixPath('0')} warn(msg) /home/home/.local/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/org/gnome/Terminal/screen/3d4dfe16_8624_4492_8c1f_662466cb08ab')} warn(msg) CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64... CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so CUDA SETUP: Highest compute capability among GPUs detected: 6.1 CUDA SETUP: Detected CUDA version 121 /home/home/.local/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! warn(msg) CUDA SETUP: Loading binary /home/home/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so... Training Alpaca-LoRA model with params: base_model: decapoda-research/llama-7b-hf data_path: yahma/alpaca-cleaned output_dir: ./lora-alpaca batch_size: 128 micro_batch_size: 4 num_epochs: 3 learning_rate: 0.0003 cutoff_len: 256 val_set_size: 2000 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: ['q_proj', 'v_proj'] train_on_inputs: True group_by_length: False wandb_project: wandb_run_name: wandb_watch: wandb_log_model: resume_from_checkpoint: None
Traceback (most recent call last): File "/home/home/alpaca-lora/finetune.py", line 294, in
fire.Fire(train)
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 475, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace
component = fn(varargs, kwargs)
File "/home/home/alpaca-lora/finetune.py", line 261, in train
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1644, in train
return inner_training_loop(
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1911, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2657, in training_step
loss = self.compute_loss(model, inputs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2689, in compute_loss
outputs = model(inputs)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/peft/peft_model.py", line 529, in forward
return self.base_model(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 710, in forward
outputs = self.model(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 590, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/home/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/home/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(args, kwargs) # type: ignore[misc]
File "/home/home/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(args)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 586, in custom_forward
return module(inputs, output_attentions, None)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 313, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 214, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/peft/tuners/lora.py", line 522, in forward
result = super().forward(x)
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/home/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, *kwargs) # type: ignore[misc]
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 273, in forward
using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/init.py", line 381, in get_device_capability
prop = get_device_properties(device)
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/init.py", line 396, in get_device_properties
device = _get_device_index(device, optional=True)
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/_utils.py", line 32, in _get_device_index
raise ValueError('Expected a cuda device, but got: {}'.format(device))
ValueError: Expected a cuda device, but got: cpu
Traceback (most recent call last):
File "/home/home/alpaca-lora/finetune.py", line 294, in
fire.Fire(train)
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 475, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/home/home/.local/lib/python3.10/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace
component = fn( varargs, kwargs)
File "/home/home/alpaca-lora/finetune.py", line 261, in train
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1644, in train
return inner_training_loop(
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1911, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2657, in training_step
loss = self.compute_loss(model, inputs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2689, in compute_loss
outputs = model(inputs)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/peft/peft_model.py", line 529, in forward
return self.base_model(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 710, in forward
outputs = self.model(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 590, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/home/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/home/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(args, kwargs) # type: ignore[misc]
File "/home/home/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(args)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 586, in custom_forward
return module(inputs, output_attentions, None)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 313, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 214, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/home/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/home/.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/home/.local/lib/python3.10/site-packages/peft/tuners/lora.py", line 522, in forward
result = super().forward(x)
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/home/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/home/.local/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 273, in forward
using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/init.py", line 381, in get_device_capability
prop = get_device_properties(device)
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/init.py", line 396, in get_device_properties
device = _get_device_index(device, optional=True)
File "/home/home/.local/lib/python3.10/site-packages/torch/cuda/_utils.py", line 32, in _get_device_index
raise ValueError('Expected a cuda device, but got: {}'.format(device))
ValueError: Expected a cuda device, but got: cpu
Thanks, Oliver