Closed Shruthipriya-BS closed 1 year ago
Hi @Shruthipriya-BS , please submit minimal reproducer with your config. It's hard to help you with the current description.
This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.
Please note that issues that do not follow the contributing guidelines are likely to be ignored.
My code : ' ' ' import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, logging, ) import torch import pandas as pd from datasets import load_dataset, concatenate_datasets from peft import LoraConfig, PeftModel from trl import SFTTrainer from peft import get_peft_model import gc import timeit import pandas as pd from transformers import AutoTokenizer import nltk nltk.download('punkt') nltk.download('stopwords') import numpy as np from transformers import BartTokenizer, BartForConditionalGeneration from nltk.corpus import stopwords from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from sklearn.feature_extraction.text import CountVectorizer from sklearn.metrics.pairwise import cosine_similarity import torchaudio from IPython.display import Audio import re dataset1_name = "knkarthick/dialogsum" dataset1 = load_dataset(dataset1_name, split='train')
import pandas as pd
train_df = pd.DataFrame(dataset) test_df = pd.DataFrame(test_dataset)
instruction finetuning data preparation function
def prepare_dataset(df,split='train'): textcol = [] instruction = """Write a concise summary of the below input text.Return your response in bullet points which covers the key points of the text. """ # change instuction according to the task if split == 'train': for , row in df.iterrows(): input_q = row["dialogue"] output = row["summary"] text = ("### Instruction: \n" + instruction + "\n### Input: \n" + input_q + "\n### Response :\n" + output) # keeping output column in training dataset text_col.append(text) df.loc[:,'text'] = textcol else: for , row in df.iterrows(): input_q = row["dialogue"] text = ("### Instruction: \n" + instruction + "\n### Input: \n" + input_q + "\n### Response :\n" ) # not keeping output column in test dataset text_col.append(text) df.loc[:,'text'] = text_col return df
train_df = prepare_dataset(train_df,'train') test_df = prepare_dataset(test_df,'test') dataset = Dataset.from_pandas(train_df) fp16 = False
Number of training epochs
num_train_epochs = 2
Enable bf16 training
bf16 = True model_name = "NousResearch/Llama-2-7b-chat-hf" bnb_4bit_compute_dtype = "bfloat16" compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
Quantization config
bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="bfloat16", bnb_4bit_use_double_quant=False )
if computedtype == torch.float16 and True: major, = torch.cuda.get_device_capability() if major >= 8: print("=" 80) print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16") print("=" 80)
loading the model with quantization config
model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=bnb_config, device_map = 'auto', )
model.config.use_cache = False model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True ) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right"
from peft import LoraConfig, get_peft_model
lora_alpha = 16 lora_dropout = 0.1 lora_r = 64 # rank
Parameter efficient finetuning for LoRA configuration
peft_config = LoraConfig( lora_alpha=lora_alpha, lora_dropout=lora_dropout,
target_modules= ["q_proj","v_proj"], # we will only create adopters for q, v metrices of attention module
)
arguments are self explanatory
import transformers
Tensorboard logs
tb_log_dir = "./results/logs"
training_arguments = transformers.TrainingArguments( output_dir="llama2_qlora_finetuned", per_device_train_batch_size=1, gradient_accumulation_steps=1, optim="paged_adamw_32bit", save_steps=10, logging_steps = 1, learning_rate=2e-4, fp16=False, bf16=True, max_grad_norm=0.3, max_steps = 250, warmup_ratio = 0.03, group_by_length=True, lr_scheduler_type="cosine", report_to="tensorboard" )
trainer = SFTTrainer( model=model, train_dataset=dataset, peft_config=peft_config, # passing peft config dataset_text_field="text", # mentioned the required column args=training_arguments, # training agruments tokenizer=tokenizer, # tokenizer packing=False, max_seq_length=512 ) trainer.train() trainer.model.save_pretrained('modeldir')
Steps to follow: runtime -> Restart runetime run the below
import gc import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer,
) from peft import LoraConfig, PeftModel, get_peft_model
gc.collect()
Set the environment variable before importing PyTorch
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "0"
model_name = "NousResearch/Llama-2-7b-chat-hf"
device_map = {"": 0}
device_map='auto'
output_dir='modeldir'
base_model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.bfloat16, device_map=device_map, offload_folder='offload/', offload_state_dict = True ) model = PeftModel.from_pretrained(base_model, output_dir, offload_folder = "offload/") model = model.merge_and_unload() gc.collect()
Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right"
model.push_to_hub("modified_llama2", use_auth_token = True) tokenizer.push_to_hub("modified_llama2", use_auth_token = True)
Restart runtime to clear VRAM to load in 4bit for inference run the below for inference
new_model="modified_llama2" huggingface_profile = "Shruthipriya" full_path = huggingface_profile + "/" + new_model
Activate 4-bit precision base model loading
use_4bit = True
Activate nested quantization for 4-bit base models
use_nested_quant = False
Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "bfloat16"
Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
def load_model(model_name):
Load tokenizer and model with QLoRA configuration
model, tokenizer = load_model(full_path)
NotImplementedError Traceback (most recent call last) /home/shruthipriya/Documents/sp/sp-env/summary.ipynb Cell 21 line 6 57 tokenizer.padding_side = "right" 59 return model, tokenizer, peft_config ---> 61 model, tokenizer = load_model(full_path)
/home/shruthipriya/Documents/sp/sp-env/summary.ipynb Cell 21 line 3 32 print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16") 33 print("=" * 80) ---> 35 model = AutoModelForCausalLM.from_pretrained( 36 model_name, 37 device_map='auto', 38 quantization_config=bnb_config, 39 ) 41 model.config.use_cache = False 42 model.config.pretraining_tp = 1
File ~/Documents/sp/sp-env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:493, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 491 elif type(config) in cls._model_mapping.keys(): 492 model_class = _get_model_class(config, cls._model_mapping) --> 493 return model_class.from_pretrained( 494 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs 495 ) 496 raise ValueError( 497 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n" 498 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}." 499 )
File ~/Documents/sp/sp-env/lib/python3.10/site-packages/transformers/modeling_utils.py:2903, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs) 2893 if dtype_orig is not None: 2894 torch.set_default_dtype(dtype_orig) 2896 ( 2897 model, 2898 missing_keys, 2899 unexpected_keys, 2900 mismatched_keys, 2901 offload_index, 2902 error_msgs, -> 2903 ) = cls._load_pretrained_model( 2904 model, 2905 state_dict, 2906 loaded_state_dict_keys, # XXX: rename? 2907 resolved_archive_file, 2908 pretrained_model_name_or_path, 2909 ignore_mismatched_sizes=ignore_mismatched_sizes, 2910 sharded_metadata=sharded_metadata, 2911 _fast_init=_fast_init, 2912 low_cpu_mem_usage=low_cpu_mem_usage, 2913 device_map=device_map, 2914 offload_folder=offload_folder, 2915 offload_state_dict=offload_state_dict, 2916 dtype=torch_dtype, 2917 is_quantized=(load_in_8bit or load_in_4bit), 2918 keep_in_fp32_modules=keep_in_fp32_modules, 2919 ) 2921 model.is_loaded_in_4bit = load_in_4bit 2922 model.is_loaded_in_8bit = load_in_8bit
File ~/Documents/sp/sp-env/lib/python3.10/site-packages/transformers/modeling_utils.py:3260, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules) 3250 mismatched_keys += _find_mismatched_keys( 3251 state_dict, 3252 model_state_dict, (...) 3256 ignore_mismatched_sizes, 3257 ) 3259 if low_cpu_mem_usage: -> 3260 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( 3261 model_to_load, 3262 state_dict, 3263 loaded_keys, 3264 start_prefix, 3265 expected_keys, 3266 device_map=device_map, 3267 offload_folder=offload_folder, 3268 offload_index=offload_index, 3269 state_dict_folder=state_dict_folder, 3270 state_dict_index=state_dict_index, 3271 dtype=dtype, 3272 is_quantized=is_quantized, 3273 is_safetensors=is_safetensors, 3274 keep_in_fp32_modules=keep_in_fp32_modules, 3275 ) 3276 error_msgs += new_error_msgs 3277 else:
File ~/Documents/sp/sp-env/lib/python3.10/site-packages/transformers/modeling_utils.py:725, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules) 722 fp16_statistics = None 724 if "SCB" not in param_name: --> 725 set_module_quantized_tensor_to_device( 726 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics 727 ) 729 return error_msgs, offload_index, state_dict_index
File ~/Documents/sp/sp-env/lib/python3.10/site-packages/transformers/utils/bitsandbytes.py:77, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics) 75 new_value = old_value.to(device) 76 elif isinstance(value, torch.Tensor): ---> 77 new_value = value.to("cpu") 78 if value.dtype == torch.int8: 79 is_8bit_serializable = version.parse(importlib.metadata.version("bitsandbytes")) > version.parse( 80 "0.37.2" 81 )
NotImplementedError: Cannot copy out of meta tensor; no data!