bitsandbytes-foundation / bitsandbytes

Accessible large language models via k-bit quantization for PyTorch.
https://huggingface.co/docs/bitsandbytes/main/en/index
MIT License
6.3k stars 631 forks source link

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4' #693

Closed analyticsrepo01 closed 10 months ago

analyticsrepo01 commented 1 year ago

image

Trying to run the following code


### config ###
model_id = "NousResearch/Llama-2-7b-hf"
max_length = 512
device_map = "auto"
batch_size = 128
micro_batch_size = 32
gradient_accumulation_steps = batch_size // micro_batch_size

# nf4" use a symmetric quantization scheme with 4 bits precision
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# load model from huggingface
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map=device_map
)

# load tokenizer from huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

but get the error here :


AttributeError Traceback (most recent call last) Cell In[7], line 18 10 bnb_config = BitsAndBytesConfig( 11 load_in_4bit=True, 12 bnb_4bit_use_double_quant=True, 13 bnb_4bit_quant_type="nf4", 14 bnb_4bit_compute_dtype=torch.bfloat16 15 ) 17 # load model from huggingface ---> 18 model = AutoModelForCausalLM.from_pretrained( 19 model_id, 20 quantization_config=bnb_config, 21 use_cache=False, 22 device_map=device_map 23 ) 25 # load tokenizer from huggingface 26 tokenizer = AutoTokenizer.from_pretrained(model_id)

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:484, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 482 elif type(config) in cls._model_mapping.keys(): 483 model_class = _get_model_class(config, cls._model_mapping) --> 484 return model_class.from_pretrained( 485 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs 486 ) 487 raise ValueError( 488 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n" 489 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}." 490 )

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:2881, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 2871 if dtype_orig is not None: 2872 torch.set_default_dtype(dtype_orig) 2874 ( 2875 model, 2876 missing_keys, 2877 unexpected_keys, 2878 mismatched_keys, 2879 offload_index, 2880 error_msgs, -> 2881 ) = cls._load_pretrained_model( 2882 model, 2883 state_dict, 2884 loaded_state_dict_keys, # XXX: rename? 2885 resolved_archive_file, 2886 pretrained_model_name_or_path, 2887 ignore_mismatched_sizes=ignore_mismatched_sizes, 2888 sharded_metadata=sharded_metadata, 2889 _fast_init=_fast_init, 2890 low_cpu_mem_usage=low_cpu_mem_usage, 2891 device_map=device_map, 2892 offload_folder=offload_folder, 2893 offload_state_dict=offload_state_dict, 2894 dtype=torch_dtype, 2895 is_quantized=(load_in_8bit or load_in_4bit), 2896 keep_in_fp32_modules=keep_in_fp32_modules, 2897 ) 2899 model.is_loaded_in_4bit = load_in_4bit 2900 model.is_loaded_in_8bit = load_in_8bit

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:3228, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules) 3218 mismatched_keys += _find_mismatched_keys( 3219 state_dict, 3220 model_state_dict, (...) 3224 ignore_mismatched_sizes, 3225 ) 3227 if low_cpu_mem_usage: -> 3228 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( 3229 model_to_load, 3230 state_dict, 3231 loaded_keys, 3232 start_prefix, 3233 expected_keys, 3234 device_map=device_map, 3235 offload_folder=offload_folder, 3236 offload_index=offload_index, 3237 state_dict_folder=state_dict_folder, 3238 state_dict_index=state_dict_index, 3239 dtype=dtype, 3240 is_quantized=is_quantized, 3241 is_safetensors=is_safetensors, 3242 keep_in_fp32_modules=keep_in_fp32_modules, 3243 ) 3244 error_msgs += new_error_msgs 3245 else:

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/modeling_utils.py:728, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules) 725 fp16_statistics = None 727 if "SCB" not in param_name: --> 728 set_module_quantized_tensor_to_device( 729 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics 730 ) 732 return error_msgs, offload_index, state_dict_index

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/transformers/utils/bitsandbytes.py:91, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics) 89 new_value = bnb.nn.Int8Params(new_value, requires_grad=False, kwargs).to(device) 90 elif is_4bit: ---> 91 new_value = bnb.nn.Params4bit(new_value, requires_grad=False, kwargs).to(device) 93 module._parameters[tensor_name] = new_value 94 if fp16_statistics is not None:

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:178, in Params4bit.to(self, *args, *kwargs) 175 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(args, **kwargs) 177 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"): --> 178 return self.cuda(device) 179 else: 180 s = self.quant_state

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:156, in Params4bit.cuda(self, device) 154 def cuda(self, device): 155 w = self.data.contiguous().half().cuda(device) --> 156 w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type) 157 self.data = w_4bit 158 self.quant_state = quant_state

File /opt/conda/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type) 830 lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n)) 831 else: --> 832 lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n)) 833 elif A.dtype == torch.bfloat16: 834 if quant_type == 'fp4':

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'

vikramNU commented 1 year ago

Help us resolve same

darrenangle commented 1 year ago

Did you figure this out?

github-actions[bot] commented 11 months ago

This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.

yzqxhhm commented 8 months ago

仍然出现这个问题

DaDuo-c commented 6 months ago

Still need

A1pacin0 commented 6 months ago

still need

Danwoo commented 5 months ago

same problem

xu-pping commented 3 weeks ago

same problem