🚀 A simple way to launch, train, and use PyTorch models on almost any device and distributed configuration, automatic mixed precision (including fp8), and easy-to-configure FSDP and DeepSpeed support
ValueError on loading a model saved using Accelerate- ValueError: Trying to set a tensor of shape torch.Size([32768512]) in "weight" (which has shape torch.Size([32000, 4096])), this look incorrect. #2374
[ ] One of the scripts in the examples/ folder of Accelerate or an officially supported no_trainer script in the examples folder of the transformers repo (such as run_no_trainer_glue.py)
[x] My own task or dataset (give details below)
Reproduction
I have used the following train.py for the scripting-
{
"name": "ValueError",
"message": "Trying to set a tensor of shape torch.Size([32768512]) in \"weight\" (which has shape torch.Size([32000, 4096])), this look incorrect.",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[3], line 2
1 # Model
----> 2 base_model = AutoModelForCausalLM.from_pretrained(
3 '/home/mahavirdabas18/llama/past_code/my_test_llama_1',
4 # quantization_config=quant_config,
5 device_map={\"\": 0}
6 )
File ~/llama/.venv/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs)
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)
--> 566 return model_class.from_pretrained(
567 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs
568 )
569 raise ValueError(
570 f\"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\
\"
571 f\"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}.\"
572 )
File ~/llama/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:778, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
775 state_dict_index = offload_weight(param, param_name, state_dict_folder, state_dict_index)
776 elif not is_quantized:
777 # For backward compatibility with older versions of accelerate
--> 778 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
779 else:
780 if param.dtype == torch.int8 and param_name.replace(\"weight\", \"SCB\") in state_dict.keys():
File ~/llama/.venv/lib/python3.8/site-packages/accelerate/utils/modeling.py:285, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics)
283 if value is not None:
284 if old_value.shape != value.shape:
--> 285 raise ValueError(
286 f'Trying to set a tensor of shape {value.shape} in \"{tensor_name}\" (which has shape {old_value.shape}), this look incorrect.'
287 )
289 if dtype is None:
290 # For compatibility with PyTorch load_state_dict which converts state dict dtype to existing dtype in model
291 value = value.to(old_value.dtype)
ValueError: Trying to set a tensor of shape torch.Size([32768512]) in \"weight\" (which has shape torch.Size([32000, 4096])), this look incorrect."
}
Expected behavior
I'm not sure why the model is not loading even after the training process not throwing any kind of errors/warning. @pacman100 @muellerzr @younesbelkada
System Info
Information
Tasks
no_trainer
script in theexamples
folder of thetransformers
repo (such asrun_no_trainer_glue.py
)Reproduction
I have used the following train.py for the scripting-
The above code runs fine with the complete training and saving of the model and does not throw any error.
However when I try to load the saved model using-
I get the following error-
{ "name": "ValueError", "message": "Trying to set a tensor of shape torch.Size([32768512]) in \"weight\" (which has shape torch.Size([32000, 4096])), this look incorrect.", "stack": "--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[3], line 2 1 # Model ----> 2 base_model = AutoModelForCausalLM.from_pretrained( 3 '/home/mahavirdabas18/llama/past_code/my_test_llama_1', 4 # quantization_config=quant_config, 5 device_map={\"\": 0} 6 )
File ~/llama/.venv/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 564 elif type(config) in cls._model_mapping.keys(): 565 model_class = _get_model_class(config, cls._model_mapping) --> 566 return model_class.from_pretrained( 567 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs 568 ) 569 raise ValueError( 570 f\"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\ \" 571 f\"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}.\" 572 )
File ~/llama/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:3706, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs) 3697 if dtype_orig is not None: 3698 torch.set_default_dtype(dtype_orig) 3699 ( 3700 model, 3701 missing_keys, 3702 unexpected_keys, 3703 mismatched_keys, 3704 offload_index, 3705 error_msgs, -> 3706 ) = cls._load_pretrained_model( 3707 model, 3708 state_dict, 3709 loaded_state_dict_keys, # XXX: rename? 3710 resolved_archive_file, 3711 pretrained_model_name_or_path, 3712 ignore_mismatched_sizes=ignore_mismatched_sizes, 3713 sharded_metadata=sharded_metadata, 3714 _fast_init=_fast_init, 3715 low_cpu_mem_usage=low_cpu_mem_usage, 3716 device_map=device_map, 3717 offload_folder=offload_folder, 3718 offload_state_dict=offload_state_dict, 3719 dtype=torch_dtype, 3720 is_quantized=(getattr(model, \"quantization_method\", None) == QuantizationMethod.BITS_AND_BYTES), 3721 keep_in_fp32_modules=keep_in_fp32_modules, 3722 ) 3724 model.is_loaded_in_4bit = load_in_4bit 3725 model.is_loaded_in_8bit = load_in_8bit
File ~/llama/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4116, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules) 4112 set_module_quantized_tensor_to_device( 4113 model_to_load, key, \"cpu\", torch.empty(*param.size(), dtype=dtype) 4114 ) 4115 else: -> 4116 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( 4117 model_to_load, 4118 state_dict, 4119 loaded_keys, 4120 start_prefix, 4121 expected_keys, 4122 device_map=device_map, 4123 offload_folder=offload_folder, 4124 offload_index=offload_index, 4125 state_dict_folder=state_dict_folder, 4126 state_dict_index=state_dict_index, 4127 dtype=dtype, 4128 is_quantized=is_quantized, 4129 is_safetensors=is_safetensors, 4130 keep_in_fp32_modules=keep_in_fp32_modules, 4131 ) 4132 error_msgs += new_error_msgs 4133 else:
File ~/llama/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:778, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules) 775 state_dict_index = offload_weight(param, param_name, state_dict_folder, state_dict_index) 776 elif not is_quantized: 777 # For backward compatibility with older versions of
accelerate
--> 778 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs) 779 else: 780 if param.dtype == torch.int8 and param_name.replace(\"weight\", \"SCB\") in state_dict.keys():File ~/llama/.venv/lib/python3.8/site-packages/accelerate/utils/modeling.py:285, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics) 283 if value is not None: 284 if old_value.shape != value.shape: --> 285 raise ValueError( 286 f'Trying to set a tensor of shape {value.shape} in \"{tensor_name}\" (which has shape {old_value.shape}), this look incorrect.' 287 ) 289 if dtype is None: 290 # For compatibility with PyTorch load_state_dict which converts state dict dtype to existing dtype in model 291 value = value.to(old_value.dtype)
ValueError: Trying to set a tensor of shape torch.Size([32768512]) in \"weight\" (which has shape torch.Size([32000, 4096])), this look incorrect." }
Expected behavior
I'm not sure why the model is not loading even after the training process not throwing any kind of errors/warning. @pacman100 @muellerzr @younesbelkada