Open apachemycat opened 1 year ago
when i remove follow code ,the error gone ,but i dont know it's correct ?
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(
args.model_name_or_path,
cache_dir=args.cache_dir,
padding_side="right",
use_fast=True,
)
TypeError: pad_sequence(): argument 'padding_value' (position 3) must be float, not NoneType
│
│ /wzh/qlora/qlora.py:417 in call │ │ │ │ 414 │ │ │ else: │ │ 415 │ │ │ │ input_ids.append(torch.tensor(tokenized_source)) │ │ 416 │ │ # Apply padding │ │ ❱ 417 │ │ input_ids = pad_sequence(input_ids, batch_first=True, padding_value=self.tokeniz │ │ 418 │ │ labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX) if n │ │ 419 │ │ data_dict = { │ │ 420 │ │ │ 'input_ids': input_ids, │ │ │ │ /usr/local/lib/python3.8/dist-packages/torch/nn/utils/rnn.py:399 in pad_sequence │ │ │ │ 396 │ │ │ 397 │ # assuming trailing dimensions and type of all the Tensors │ │ 398 │ # in sequences are same and fetching those from sequences[0] │ │ ❱ 399 │ return torch._C._nn.pad_sequence(sequences, batch_first, padding_value) │ │ 400
│if i change code to following if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.unk_token
then maximum recursion depth exceeded while getting the str of an object \ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_fast.py:257 in │ │ _convert_token_to_id_with_added_voc │ │ │ │ 254 │ def _convert_token_to_id_with_added_voc(self, token: str) -> int: │ │ 255 │ │ index = self._tokenizer.token_to_id(token) │ │ 256 │ │ if index is None: │ │ ❱ 257 │ │ │ return self.unk_token_id │ │ 258 │ │ return index │ │ 259 │ │ │ 260 │ def _convert_id_to_token(self, index: int) -> Optional[str]: │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py:1142 in │ │ unk_token_id │ │ │ │ 1139 │ │ """ │ │ 1140 │ │ if self._unk_token is None: │ │ 1141 │ │ │ return None │ │ ❱ 1142 │ │ return self.convert_tokens_to_ids(self.unk_token) │ │ 1143 │ │ │ 1144 │ @property │ │ 1145 │ def sep_token_id(self) -> Optional[int]: │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_fast.py:250 in │ │ convert_tokens_to_ids │ │ │ │ 247 │ │ │ return None │ │ 248 │ │ │ │ 249 │ │ if isinstance(tokens, str): │ │ ❱ 250 │ │ │ return self._convert_token_to_id_with_added_voc(tokens) │ │ 251 │ │ │ │ 252 │ │ return [self._convert_token_to_id_with_added_voc(token) for token in tokens] │ │ 253 │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_fast.py:257 in │ │ _convert_token_to_id_with_added_voc │ │ │ │ 254 │ def _convert_token_to_id_with_added_voc(self, token: str) -> int: │ │ 255 │ │ index = self._tokenizer.token_to_id(token) │ │ 256 │ │ if index is None: │ │ ❱ 257 │ │ │ return self.unk_token_id │ │ 258 │ │ return index │ │ 259 │ │ │ 260 │ def _convert_id_to_token(self, index: int) -> Optional[str]: │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py:1142 in │ │ unk_token_id │ │ │ │ 1139 │ │ """ │ │ 1140 │ │ if self._unk_token is None: │ │ 1141 │ │ │ return None │ │ ❱ 1142 │ │ return self.convert_tokens_to_ids(self.unk_token) │ │ 1143 │ │ │ 1144 │ @property │ │ 1145 │ def sep_token_id(self) -> Optional[int]: │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py:1022 in unk_token │ │ │ │ 1019 │ │ │ if self.verbose: │ │ 1020 │ │ │ │ logger.error("Using unk_token, but it is not set yet.") │ │ 1021 │ │ │ return None │ │ ❱ 1022 │ │ return str(self._unk_token) │ │ 1023 │ │ │ 1024 │ @property │ │ 1025 │ def sep_token(self) -> str: │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RecursionError: maximum recursion depth exceeded while getting the str of an object
change code to following , if tokenizer.pad_token is None:
**tokenizer.add_special_tokens(dict(pad_token=DEFAULT_PAD_TOKEN))**
#smart_tokenizer_and_embedding_resize(
# special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
# tokenizer=tokenizer,
# model=model,
#)'
error :
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [26,0,0] Assertion srcIndex < srcSelectDimSize
failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [27,0,0] Assertion srcIndex < srcSelectDimSize
failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [28,0,0] Assertion srcIndex < srcSelectDimSize
failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [29,0,0] Assertion srcIndex < srcSelectDimSize
failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [30,0,0] Assertion srcIndex < srcSelectDimSize
failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [68,0,0], thread: [31,0,0] Assertion srcIndex < srcSelectDimSize
failed.
│ │ │ /wzh/qlora/qlora.py:724 in train │ │ │ │ 721 │ all_metrics = {"run_name": args.run_name} │ │ 722 │ # Training │ │ 723 │ if args.do_train: │ │ ❱ 724 │ │ train_result = trainer.train(resume_from_checkpoint=checkpoint_dir) │ │ 725 │ │ metrics = train_result.metrics │ │ 726 │ │ trainer.log_metrics("train", metrics) │ │ 727 │ │ trainer.save_metrics("train", metrics) │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/trainer.py:1696 in train │ │ │ │ 1693 │ │ inner_training_loop = find_executable_batch_size( │ │ 1694 │ │ │ self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size │ │ 1695 │ │ ) │ │ ❱ 1696 │ │ return inner_training_loop( │ │ 1697 │ │ │ args=args, │ │ 1698 │ │ │ resume_from_checkpoint=resume_from_checkpoint, │ │ 1699 │ │ │ trial=trial, │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/trainer.py:1973 in _inner_training_loop │ │ │ │ 1970 │ │ │ │ │ with model.no_sync(): │ │ 1971 │ │ │ │ │ │ tr_loss_step = self.training_step(model, inputs) │ │ 1972 │ │ │ │ else: │ │ ❱ 1973 │ │ │ │ │ tr_loss_step = self.training_step(model, inputs) │ │ 1974 │ │ │ │ │ │ 1975 │ │ │ │ if ( │ │ 1976 │ │ │ │ │ args.logging_nan_inf_filter │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/trainer.py:2787 in training_step │ │ │ │ 2784 │ │ │ return loss_mb.reduce_mean().detach().to(self.args.device) │ │ 2785 │ │ │ │ 2786 │ │ with self.compute_loss_context_manager(): │ │ ❱ 2787 │ │ │ loss = self.compute_loss(model, inputs) │ │ 2788 │ │ │ │ 2789 │ │ if self.args.n_gpu > 1: │ │ 2790 │ │ │ loss = loss.mean() # mean() to average on multi-gpu parallel training │ │ │ │ /usr/local/lib/python3.8/dist-packages/transformers/trainer.py:2819 in compute_loss │ │ │ │ 2816 │ │ │ labels = inputs.pop("labels") │ │ 2817 │ │ else: │ │ 2818 │ │ │ labels = None │ │ ❱ 2819 │ │ outputs = model(inputs) │ │ 2820 │ │ # Save past state if it exists │ │ 2821 │ │ # TODO: this needs to be fixed and made cleaner later. │ │ 2822 │ │ if self.args.past_index >= 0: │ │ │ │ /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /usr/local/lib/python3.8/dist-packages/peft/peft_model.py:575 in forward │ │ │ │ 572 │ │ kwargs, │ │ 573 │ ): │ │ 574 │ │ if not isinstance(self.peft_config, PromptLearningConfig): │ │ ❱ 575 │ │ │ return self.base_model( │ │ 576 │ │ │ │ input_ids=input_ids, │ │ 577 │ │ │ │ attention_mask=attention_mask, │ │ 578 │ │ │ │ inputs_embeds=inputs_embeds, │ │ │ │ /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = []
Dear @apachemycat,
Thank you for bringing this issue to our attention. We understand that you are encountering some problems while trying to finetune guanaco-33b-merged
with default parameters. We apologize for the inconvenience caused, and we would be happy to assist you with a solution.
Based on the error messages you shared, it seems that there are some attribute errors and recursion depth exceeded errors related to the tokenizer and padding in your code. Here is a possible solution to address these issues:
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(
args.model_name_or_path,
cache_dir=args.cache_dir,
padding_side="right",
use_fast=True,
)
if tokenizer.pad_token_id is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Rest of your code...
By explicitly adding the [PAD]
token as a special token and assigning it to the pad_token
, we ensure that the tokenizer has a proper padding token. This should resolve the attribute errors and recursion depth exceeded errors related to padding.
Please give this solution a try and let us know if it resolves the issue for you. If you encounter any further problems or have any additional questions, please don't hesitate to ask. We are here to help!
Best regards, @hemangjoshi37a
code has adding the [PAD] logic as following if tokenizer.pad_token is None: smart_tokenizer_and_embedding_resize( special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN), tokenizer=tokenizer, model=model, )
""" print("token ..."+str(special_tokens_dict)) num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict) model.resize_token_embeddings(len(tokenizer))
runtime output loaded model Using pad_token, but it is not set yet. token ...{'pad_token': '[PAD]'}
but another Error : AttributeError: 'CastOutputToFloat' object has no attribute 'weight'
│ │
│ /usr/local/lib/python3.8/dist-packages/transformers/modeling_utils.py:1384 in │
│ resize_token_embeddings │
│ │
│ 1381 │ │ Return: │
│ 1382 │ │ │ torch.nn.Embedding
: Pointer to the input tokens Embeddings Module of the m │
│ 1383 │ │ """ │
│ ❱ 1384 │ │ model_embeds = self._resize_token_embeddings(new_num_tokens) │
│ 1385 │ │ if new_num_tokens is None: │
│ 1386 │ │ │ return model_embeds │
│ 1387 │
│ │
│ /usr/local/lib/python3.8/dist-packages/transformers/modeling_utils.py:1405 in │
│ _resize_token_embeddings │
│ │
│ 1402 │ │ # if word embeddings are not tied, make sure that lm head is resized as well │
│ 1403 │ │ if self.get_output_embeddings() is not None and not self.config.tie_word_embeddi │
│ 1404 │ │ │ old_lm_head = self.get_output_embeddings() │
│ ❱ 1405 │ │ │ new_lm_head = self._get_resized_lm_head(old_lm_head, new_num_tokens) │
│ 1406 │ │ │ self.set_output_embeddings(new_lm_head) │
│ 1407 │ │ │
│ 1408 │ │ return self.get_input_embeddings() │
│ │
│ /usr/local/lib/python3.8/dist-packages/transformers/modeling_utils.py:1509 in │
│ _get_resized_lm_head │
│ │
│ 1506 │ │ │ │ ) │
│ 1507 │ │ else: │
│ 1508 │ │ │ old_num_tokens, old_lm_head_dim = ( │
│ ❱ 1509 │ │ │ │ old_lm_head.weight.size() if not transposed else old_lm_head.weight.t(). │
│ 1510 │ │ │ ) │
│ 1511 │ │ │
│ 1512 │ │ if old_num_tokens == new_num_tokens: │
│ │
│ /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1614 in getattr │
│ │
│ 1611 │ │ │ modules = self.dict['_modules'] │
│ 1612 │ │ │ if name in modules: │
│ 1613 │ │ │ │ return modules[name] │
│ ❱ 1614 │ │ raise AttributeError("'{}' object has no attribute '{}'".format( │
│ 1615 │ │ │ type(self).name, name)) │
│ 1616 │ │
│ 1617 │ def setattr(self, name: str, value: Union[Tensor, 'Module']) -> None: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: 'CastOutputToFloat' object has no attribute 'weight'
can't reslove this problem ....help
I also met the same issue that "AttributeError: 'CastOutputToFloat' object has no attribute 'weight'". The pretrained LLM I used is llama-7b-hf.
loading base model /models/guanaco-33b-merged... Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████| 7/7 [01:12<00:00, 10.30s/it] adding LoRA modules...
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1614 in getattr │ │ │ │ 1611 │ │ │ modules = self.dict['_modules'] │ │ 1612 │ │ │ if name in modules: │ │ 1613 │ │ │ │ return modules[name] │ │ ❱ 1614 │ │ raise AttributeError("'{}' object has no attribute '{}'".format( │ │ 1615 │ │ │ type(self).name, name)) │ │ 1616 │ │ │ 1617 │ def setattr(self, name: str, value: Union[Tensor, 'Module']) -> None: │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ AttributeError: 'CastOutputToFloat' object has no attribute 'weight' root@5e0ba28fefc9:/wzh/qlora# root@5e0ba28fefc9:/wzh/qlora# CUDA_VISIBLE_DEVICES=0 PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:24 sh scripts/finetune.sh