bitsandbytes-foundation / bitsandbytes

Accessible large language models via k-bit quantization for PyTorch.
https://huggingface.co/docs/bitsandbytes/main/en/index
MIT License
6.32k stars 635 forks source link

AttributeError: module 'bitsandbytes' has no attribute 'nn' #1166

Open Harsh-raj opened 7 months ago

Harsh-raj commented 7 months ago

Why am i getting this error when i am trying to load the checkpoint of lora fine tuned phi-1_5 model. Following is the complete error log

phi) harsh@harsh:~/phi-1_5$ python3 inference_lora_phi.py
Traceback (most recent call last):
  File "/home/harsh/phi-1_5/inference_lora_phi.py", line 14, in <module>
    model = PeftModel.from_pretrained(model, "./output/phi-1_5_FT_lr1e-5_ep3_batch8_lora1632_denfc12qkv_proj/checkpoint-14555")
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/phi-1_5/peft/src/peft/peft_model.py", line 387, in from_pretrained
    model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/phi-1_5/peft/src/peft/peft_model.py", line 1212, in __init__
    super().__init__(model, peft_config, adapter_name)
  File "/home/harsh/phi-1_5/peft/src/peft/peft_model.py", line 131, in __init__
    self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/lora/model.py", line 136, in __init__
    super().__init__(model, config, adapter_name)
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/tuners_utils.py", line 165, in __init__
    self.inject_adapter(self.model, adapter_name)
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/tuners_utils.py", line 342, in inject_adapter
    self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/lora/model.py", line 220, in _create_and_replace
    new_module = self._create_new_module(lora_config, adapter_name, target, **kwargs)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/lora/model.py", line 282, in _create_new_module
    from .bnb import dispatch_bnb_8bit
  File "/home/harsh/phi-1_5/peft/src/peft/tuners/lora/bnb.py", line 272, in <module>
    if is_bnb_4bit_available():
       ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/phi-1_5/peft/src/peft/import_utils.py", line 33, in is_bnb_4bit_available
    return hasattr(bnb.nn, "Linear4bit")
                   ^^^^^^

Following is the inference code i am using to inference the lora finetuned model

import gradio as gr 
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from threading import Thread
from peft import LoraConfig, get_peft_model, PeftModelForCausalLM, PeftConfig, PeftModel

# # Loading the tokenizer and model from Hugging Face's model hub.
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", torch_dtype="auto", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5",  trust_remote_code=True)

config = PeftConfig.from_pretrained("./output/phi-1_5_FT_lr1e-5_ep3_batch8_lora1632_denfc12qkv_proj/checkpoint-14555")
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, "./output/phi-1_5_FT_lr1e-5_ep3_batch8_lora1632_denfc12qkv_proj/checkpoint-14555")
# using CUDA for an optimal experience
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Defining a custom stopping criteria class for the model's text generation.
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [50256]  # IDs of tokens where the generation should stop.
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token.
                return True
        return False

# Function to generate model predictions.
def predict(message, history):
    history_transformer_format = history + [[message, ""]]
    stop = StopOnTokens()

    # Formatting the input for the model.
    #messages = "<|endoftext|>".join(["<|endoftext|>".join(["\n" + item[0], "\n" + item[1]])
    #                   for item in history_transformer_format])

    model_inputs = tokenizer([message], return_tensors="pt").to(device) #messages
    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.5,
        num_beams=1,
        stopping_criteria=StoppingCriteriaList([stop])
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()  # Starting the generation in a separate thread.
    partial_message = ""
    for new_token in streamer:
        partial_message += new_token
        if '<|endoftext|>' in partial_message:  # Breaking the loop if the stop token is generated.
            break
        yield partial_message

# Setting up the Gradio chat interface.
gr.ChatInterface(predict,
                 title="Phi1.5").launch(share=True)  # Launching the web interface.
matthewdouglas commented 7 months ago

Hi @Harsh-raj, What version of bitsandbytes are you using?

Harsh-raj commented 7 months ago

I tried to compile bitsandbytes from source but when i ran the command cmake -DCOMPUTE_BACKEND=cuda -S . I am getting this error.

-- Configuring bitsandbytes (Backend: cuda)
-- NO_CUBLASLT := OFF
-- CUDA Version: 101 (10.1.243)
-- CUDA Compiler: /usr/bin/nvcc
CMake Error at CMakeLists.txt:88 (message):
  CUDA Version < 11 is not supported

-- Configuring incomplete, errors occurred!

The system that i am using a bit old. Is there a way to go around this.

Harsh-raj commented 7 months ago

So I instead installed bitsandbytes-cuda112-0.26.0.post2 and removed the bitsandbytes source folder. Now when i try to run the LoRA finetuned model, It throws me this.

(phi) harsh@harsh:~/phi-1_5$ python3 inference_lora_phi.py
Traceback (most recent call last):
  File "/home/harsh/phi-1_5/inference_lora_phi.py", line 6, in <module>
    from peft import LoraConfig, get_peft_model, PeftModelForCausalLM, PeftConfig, PeftModel
  File "/home/harsh/phi-1_5/peft/src/peft/__init__.py", line 22, in <module>
    from .auto import (
  File "/home/harsh/phi-1_5/peft/src/peft/auto.py", line 31, in <module>
    from .config import PeftConfig
  File "/home/harsh/phi-1_5/peft/src/peft/config.py", line 23, in <module>
    from .utils import CONFIG_NAME, PeftType, TaskType
  File "/home/harsh/phi-1_5/peft/src/peft/utils/__init__.py", line 21, in <module>
    from .loftq_utils import replace_lora_weights_loftq
  File "/home/harsh/phi-1_5/peft/src/peft/utils/loftq_utils.py", line 35, in <module>
    import bitsandbytes as bnb
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/site-packages/bitsandbytes/__init__.py", line 5, in <module>
    from .optim import adam
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/site-packages/bitsandbytes/optim/__init__.py", line 5, in <module>
    from .adam import Adam, Adam8bit, Adam32bit
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/site-packages/bitsandbytes/optim/adam.py", line 11, in <module>
    from bitsandbytes.optim.optimizer import Optimizer2State
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/site-packages/bitsandbytes/optim/optimizer.py", line 6, in <module>
    import bitsandbytes.functional as F
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/site-packages/bitsandbytes/functional.py", line 13, in <module>
    lib = ct.cdll.LoadLibrary(os.path.dirname(__file__) + '/libbitsandbytes.so')
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/ctypes/__init__.py", line 460, in LoadLibrary
    return self._dlltype(name)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/harsh/miniconda3/envs/phi/lib/python3.12/ctypes/__init__.py", line 379, in __init__
    self._handle = _dlopen(self._name, mode)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^
OSError: libcudart.so.11.0: cannot open shared object file: No such file or directory
Titus-von-Koeller commented 7 months ago

Hey @Harsh-raj,

Thanks for reporting to us.

First of all, in the title you mention AttributeError: module 'bitsandbytes' has no attribute 'nn', but I don't see that in the error logs that you provided. When did you get this error, the context of that would be important.

Based on the title that I just mentioned, to me the most likely cause of an error sounds like you were inside of the bitsandbytes clone directory as your current working directory.

This would mean that Python would look there for the bitsandbytes package and at the first level of the hierarchy it doesn't find nn, because that's still one level down.

We really have to switch to a src directory instead of using bitsandbytes. This is super confusing and has often led to issues in the past. I'll make that a priority.

Let me know if this helped and otherwise please provide all the details you can, so we can try to help you more.

prashanthag commented 3 months ago

bitsandbytes 0.43.3


AttributeError Traceback (most recent call last) Cell In[3], line 30 22 quantization_config = BitsAndBytesConfig( 23 load_in_4bit=True, 24 bnb_4bit_use_double_quant=True, 25 bnb_4bit_quant_type="nf4", 26 bnb_4bit_compute_dtype=torch.bfloat16 27 ) 29 # Load the pre-trained model and tokenizer ---> 30 model = AutoModelForCausalLM.from_pretrained( 31 model_path, 32 quantization_config=quantization_config, 33 device_map="auto" 34 ) 35 tokenizer = AutoTokenizer.from_pretrained(model_path) 37 # Move the model to GPU

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 562 elif type(config) in cls._model_mapping.keys(): 563 model_class = _get_model_class(config, cls._model_mapping) --> 564 return model_class.from_pretrained( 565 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs 566 ) 567 raise ValueError( 568 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n" 569 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}." 570 )

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/modeling_utils.py:3827, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs) 3824 keep_in_fp32_modules = [] 3826 if hf_quantizer is not None: -> 3827 hf_quantizer.preprocess_model( 3828 model=model, device_map=device_map, keep_in_fp32_modules=keep_in_fp32_modules 3829 ) 3831 # We store the original dtype for quantized models as we cannot easily retrieve it 3832 # once the weights have been quantized 3833 # Note that once you have loaded a quantized model, you can't change its dtype so this will 3834 # remain a single source of truth 3835 config._pre_quantization_dtype = torch_dtype

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/quantizers/base.py:182, in HfQuantizer.preprocess_model(self, model, kwargs) 180 model.is_quantized = True 181 model.quantization_method = self.quantization_config.quant_method --> 182 return self._process_model_before_weight_loading(model, kwargs)

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/quantizers/quantizer_bnb_4bit.py:287, in Bnb4BitHfQuantizer._process_model_before_weight_loading(self, model, device_map, keep_in_fp32_modules, **kwargs) 280 raise ValueError( 281 "If you want to offload some keys to cpu or disk, you need to set " 282 "llm_int8_enable_fp32_cpu_offload=True. Note that these modules will not be " 283 " converted to 8-bit but kept in 32-bit." 284 ) 285 self.modules_to_not_convert.extend(keys_on_cpu) --> 287 model = replace_with_bnb_linear( 288 model, modules_to_not_convert=self.modules_to_not_convert, quantization_config=self.quantization_config 289 ) 290 # TODO: consider bringing replace_with_bnb_linear() code from ..integrations/bitsandbyter.py to here 292 model.config.quantization_config = self.quantization_config

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py:252, in replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config) 222 """ 223 A helper function to replace all torch.nn.Linear modules by bnb.nn.Linear8bit modules from the bitsandbytes 224 library. This will enable running your models using mixed int8 precision as described by the paper `LLM.int8(): (...) 249 storage and computation. 250 """ 251 modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert --> 252 model, has_been_replaced = _replace_with_bnb_linear( 253 model, modules_to_not_convert, current_key_name, quantization_config 254 ) 256 if not has_been_replaced: 257 logger.warning( 258 "You are loading your model in 8bit or 4bit but no linear modules were found in your model." 259 " Please double check your model architecture, or submit an issue on github if you think this is" 260 " a bug." 261 )

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py:209, in _replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config, has_been_replaced) 207 model._modules[name].requiresgrad(False) 208 if len(list(module.children())) > 0: --> 209 _, has_been_replaced = _replace_with_bnb_linear( 210 module, 211 modules_to_not_convert, 212 current_key_name, 213 quantization_config, 214 has_been_replaced=has_been_replaced, 215 ) 216 # Remove the last key for recursion 217 current_key_name.pop(-1)

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py:209, in _replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config, has_been_replaced) 207 model._modules[name].requiresgrad(False) 208 if len(list(module.children())) > 0: --> 209 _, has_been_replaced = _replace_with_bnb_linear( 210 module, 211 modules_to_not_convert, 212 current_key_name, 213 quantization_config, 214 has_been_replaced=has_been_replaced, 215 ) 216 # Remove the last key for recursion 217 current_key_name.pop(-1)

[... skipping similar frames: _replace_with_bnb_linear at line 209 (1 times)]

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py:209, in _replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config, has_been_replaced) 207 model._modules[name].requiresgrad(False) 208 if len(list(module.children())) > 0: --> 209 _, has_been_replaced = _replace_with_bnb_linear( 210 module, 211 modules_to_not_convert, 212 current_key_name, 213 quantization_config, 214 has_been_replaced=has_been_replaced, 215 ) 216 # Remove the last key for recursion 217 current_key_name.pop(-1)

File ~/.pyenv/versions/langchain/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py:191, in _replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config, has_been_replaced) 187 pass 188 else: 189 extra_kwargs = ( 190 {"quant_storage": quantization_config.bnb_4bit_quant_storage} --> 191 if "quant_storage" in list(signature(bnb.nn.Linear4bit).parameters) 192 else {} 193 ) 194 model._modules[name] = bnb.nn.Linear4bit( 195 in_features, 196 out_features, (...) 201 **extra_kwargs, 202 ) 203 has_been_replaced = True

AttributeError: module 'bitsandbytes' has no attribute 'nn'

XiuyeYuan commented 1 month ago

Meet the same error with "AttributeError: module 'bitsandbytes' has no attribute 'nn'

system info:

(rocm) root@nv48-rocm:/mnt/myrocm/llama_model# pip show bitsandbytes
Name: bitsandbytes
Version: 0.44.1.dev0+cd3cb68
Summary: k-bit optimizers and matrix multiplication routines.
Home-page: https://github.com/TimDettmers/bitsandbytes
Author: Tim Dettmers
Author-email: dettmers@cs.washington.edu
License: MIT
Location: /root/miniconda3/envs/rocm/lib/python3.10/site-packages
Editable project location: /mnt/myrocm/llama_model/bitsandbytes
Requires: numpy, torch

Error message:

Traceback (most recent call last):
  File "/mnt/myrocm/llama_model/fine_tune.py", line 70, in <module>
    model = get_peft_model(base_model, peft_parameters)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/mapping.py", line 136, in get_peft_model
    return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](model, peft_config, adapter_name=adapter_name)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/peft_model.py", line 1059, in __init__
    super().__init__(model, peft_config, adapter_name)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/peft_model.py", line 126, in __init__
    self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 111, in __init__
    super().__init__(model, config, adapter_name)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 147, in __init__
    self.inject_adapter(self.model, adapter_name)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 302, in inject_adapter
    self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 182, in _create_and_replace
    new_module = self._create_new_module(lora_config, adapter_name, target, **kwargs)
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 244, in _create_new_module
    from .bnb import dispatch_bnb_8bit
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/tuners/lora/bnb.py", line 218, in <module>
    if is_bnb_4bit_available():
  File "/root/miniconda3/envs/rocm/lib/python3.10/site-packages/peft/import_utils.py", line 31, in is_bnb_4bit_available
    return hasattr(bnb.nn, "Linear4bit")
AttributeError: module 'bitsandbytes' has no attribute 'nn'