Open Zzv213 opened 4 months ago
Hi @Zzv213, if possible, could you please provide a small, self-contained reproducible example that demonstrates the error?
this error looks like GPU system unstable or bad state, according to nvml docs. should error code from any calling of DriverAPI::get()
print out in assertion msg?
Environment: Driver Version: 470.161.03 CUDA Driver Version: 12.4 nvidia-cuda-cupti-cu12 12.1.105 nvidia-cuda-nvrtc-cu12 12.1.105 nvidia-cuda-runtime-cu12 12.1.105 torch 2.4.1 transformers 4.44.2
install FastEdit
git clone https://github.com/nctu6/FastEdit
conda create -n fastedit python=3.10
conda activate fastedit
cd FastEdit
pip install -r requirements.txt
download model
git lfs clone https://huggingface.co/Qwen/Qwen2.5-14B-Instruct
error happened during running
python -m fastedit.editor \
--data data/example.json \
--model Qwen2.5-14B-Instruct \
--config qwen2.5-14b \
--template qwen \
--output logs\qwen2.5-14b-inst
error message
hidden_states = self.mlp(hidden_states)
File "/home/bhlin6nctu/miniconda3/envs/fastedit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553
, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/bhlin6nctu/miniconda3/envs/fastedit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562
, in _call_impl
return forward_call(*args, **kwargs)
File "/home/bhlin6nctu/miniconda3/envs/fastedit/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwe
n2.py", line 223, in forward
return self.down_proj(self.act_fn(self.gate_proj(hidden_state)) * self.up_proj(hidden_state))
RuntimeError: NVML_SUCCESS == DriverAPI::get()->nvmlInit_v2_() INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllo
cator.cpp":806, please report a bug to PyTorch.
@nctu6 I got the same error
🐛 Describe the bug
''' checkpoint_path = './llama_relevance_results'
training_args = transformers.TrainingArguments(
remove_unused_columns=False, # Whether or not to automatically remove the columns unused by the model forward method
print(f"training_args:\n{training_args}")
data_collator = transformers.DataCollatorForSeq2Seq( tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True )
trainer = transformers.Trainer( model=model, train_dataset=dataset, args=training_args, data_collator=data_collator, callbacks=[SavePeftModelCallback] ) model.config.use_cache = False # silence the warnings. Please re-enable for inference!
for name, module in model.named_modules():
if isinstance(module, LoraLayer):
trainer.train() '''
''' --------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[29], line 52 49 if module.weight.dtype == torch.float32: 50 module = module.to(torch.bfloat16) ---> 52 trainer.train()
File ~/ftenv/lib/python3.10/site-packages/transformers/trainer.py:1932, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1930 hf_hub_utils.enable_progress_bars() 1931 else: -> 1932 return inner_training_loop( 1933 args=args, 1934 resume_from_checkpoint=resume_from_checkpoint, 1935 trial=trial, 1936 ignore_keys_for_eval=ignore_keys_for_eval, 1937 )
File ~/ftenv/lib/python3.10/site-packages/transformers/trainer.py:2268, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 2265 self.control = self.callback_handler.on_step_begin(args, self.state, self.control) 2267 with self.accelerator.accumulate(model): -> 2268 tr_loss_step = self.training_step(model, inputs) 2270 if ( 2271 args.logging_nan_inf_filter 2272 and not is_torch_xla_available() 2273 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 2274 ): 2275 # if loss is nan or inf simply add the average of previous logged losses 2276 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~/ftenv/lib/python3.10/site-packages/transformers/trainer.py:3307, in Trainer.training_step(self, model, inputs) 3304 return loss_mb.reduce_mean().detach().to(self.args.device) 3306 with self.compute_loss_context_manager(): -> 3307 loss = self.compute_loss(model, inputs) 3309 del inputs 3311 kwargs = {}
File ~/ftenv/lib/python3.10/site-packages/transformers/trainer.py:3338, in Trainer.compute_loss(self, model, inputs, return_outputs) 3336 else: 3337 labels = None -> 3338 outputs = model(**inputs) 3339 # Save past state if it exists 3340 # TODO: this needs to be fixed and made cleaner later. 3341 if self.args.past_index >= 0:
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/accelerate/utils/operations.py:819, in convert_outputs_to_fp32..forward(*args, kwargs)
818 def forward(*args, *kwargs):
--> 819 return model_forward(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/accelerate/utils/operations.py:807, in ConvertOutputsToFp32.call(self, *args, kwargs) 806 def call(self, *args, *kwargs): --> 807 return convert_to_fp32(self.model_forward(args, kwargs))
File ~/ftenv/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16, in autocast_decorator..decorate_autocast(*args, kwargs)
13 @functools.wraps(func)
14 def decorate_autocast(*args, *kwargs):
15 with autocast_instance:
---> 16 return func(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/peft/peft_model.py:1430, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, kwargs) 1428 with self._enable_peft_forward_hooks(kwargs): 1429 kwargs = {k: v for k, v in kwargs.items() if k not in self.special_peft_forward_args} -> 1430 return self.base_model( 1431 input_ids=input_ids, 1432 attention_mask=attention_mask, 1433 inputs_embeds=inputs_embeds, 1434 labels=labels, 1435 output_attentions=output_attentions, 1436 output_hidden_states=output_hidden_states, 1437 return_dict=return_dict, 1438 **kwargs, 1439 ) 1441 batch_size = _get_batch_size(input_ids, inputs_embeds) 1442 if attention_mask is not None: 1443 # concat prompt attention mask
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179, in BaseTuner.forward(self, *args, kwargs) 178 def forward(self, *args: Any, *kwargs: Any): --> 179 return self.model.forward(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/accelerate/hooks.py:169, in add_hook_to_module..new_forward(module, *args, kwargs)
167 output = module._old_forward(*args, *kwargs)
168 else:
--> 169 output = module._old_forward(args, kwargs)
170 return module._hf_hook.post_forward(module, output)
File ~/ftenv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1174, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position) 1171 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 1173 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) -> 1174 outputs = self.model( 1175 input_ids=input_ids, 1176 attention_mask=attention_mask, 1177 position_ids=position_ids, 1178 past_key_values=past_key_values, 1179 inputs_embeds=inputs_embeds, 1180 use_cache=use_cache, 1181 output_attentions=output_attentions, 1182 output_hidden_states=output_hidden_states, 1183 return_dict=return_dict, 1184 cache_position=cache_position, 1185 ) 1187 hidden_states = outputs[0] 1188 if self.config.pretraining_tp > 1:
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:967, in LlamaModel.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position) 964 all_hidden_states += (hidden_states,) 966 if self.gradient_checkpointing and self.training: --> 967 layer_outputs = self._gradient_checkpointing_func( 968 decoder_layer.call, 969 hidden_states, 970 causal_mask, 971 position_ids, 972 past_key_values, 973 output_attentions, 974 use_cache, 975 cache_position, 976 ) 977 else: 978 layer_outputs = decoder_layer( 979 hidden_states, 980 attention_mask=causal_mask, (...) 985 cache_position=cache_position, 986 )
File ~/ftenv/lib/python3.10/site-packages/torch/_compile.py:24, in _disable_dynamo..inner(*args, kwargs)
20 @functools.wraps(fn)
21 def inner(*args, *kwargs):
22 import torch._dynamo
---> 24 return torch._dynamo.disable(fn, recursive)(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py:451, in _TorchDynamoContext.call.._fn(*args, *kwargs)
449 prior = set_eval_frame(callback)
450 try:
--> 451 return fn(args, **kwargs)
452 finally:
453 set_eval_frame(prior)
File ~/ftenv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py:36, in wrap_inline..inner(*args, kwargs)
34 @functools.wraps(fn)
35 def inner(*args, *kwargs):
---> 36 return fn(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/utils/checkpoint.py:487, in checkpoint(function, use_reentrant, context_fn, determinism_check, debug, *args, kwargs) 482 if context_fn is not noop_context_fn or debug is not False: 483 raise ValueError( 484 "Passing
context_fn
ordebug
is only supported when " 485 "use_reentrant=False." 486 ) --> 487 return CheckpointFunction.apply(function, preserve, args) 488 else: 489 gen = _checkpoint_without_reentrant_generator( 490 function, preserve, context_fn, determinism_check, debug, args, kwargs 491 )File ~/ftenv/lib/python3.10/site-packages/torch/autograd/function.py:598, in Function.apply(cls, *args, *kwargs) 595 if not torch._C._are_functorch_transforms_active(): 596 # See NOTE: [functorch vjp and autograd interaction] 597 args = _functorch.utils.unwrap_dead_wrappers(args) --> 598 return super().apply(args, **kwargs) # type: ignore[misc] 600 if not is_setup_ctx_defined: 601 raise RuntimeError( 602 "In order to use an autograd.Function with functorch transforms " 603 "(vmap, grad, jvp, jacrev, ...), it must override the setup_context " 604 "staticmethod. For more details, please see " 605 "https://pytorch.org/docs/master/notes/extending.func.html" 606 )
File ~/ftenv/lib/python3.10/site-packages/torch/utils/checkpoint.py:262, in CheckpointFunction.forward(ctx, run_function, preserve_rng_state, args) 259 ctx.save_for_backward(tensor_inputs) 261 with torch.no_grad(): --> 262 outputs = run_function(*args) 263 return outputs
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/accelerate/hooks.py:169, in add_hook_to_module..new_forward(module, *args, kwargs)
167 output = module._old_forward(*args, *kwargs)
168 else:
--> 169 output = module._old_forward(args, kwargs)
170 return module._hf_hook.post_forward(module, output)
File ~/ftenv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:732, in LlamaDecoderLayer.forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, **kwargs) 730 residual = hidden_states 731 hidden_states = self.post_attention_layernorm(hidden_states) --> 732 hidden_states = self.mlp(hidden_states) 733 hidden_states = residual + hidden_states 735 outputs = (hidden_states,)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/accelerate/hooks.py:169, in add_hook_to_module..new_forward(module, *args, kwargs)
167 output = module._old_forward(*args, *kwargs)
168 else:
--> 169 output = module._old_forward(args, kwargs)
170 return module._hf_hook.post_forward(module, output)
File ~/ftenv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:215, in LlamaMLP.forward(self, x) 213 down_proj = sum(down_proj) 214 else: --> 215 down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) 217 return down_proj
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, kwargs) 1530 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(args, kwargs)
File ~/ftenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, *kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(args, **kwargs) 1543 try: 1544 result = None
File ~/ftenv/lib/python3.10/site-packages/peft/tuners/lora/bnb.py:480, in Linear4bit.forward(self, x, *args, **kwargs) 477 if requires_conversion: 478 output = output.to(expected_dtype) --> 480 result = result + output 482 return result
RuntimeError: NVML_SUCCESS == DriverAPI::get()->nvmlInitv2() INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":813, please report a bug to PyTorch. '''
Versions
[pip3] numpy==1.26.4 [pip3] torch==2.3.1+cu121 [pip3] triton==2.3.1 [conda] Could not collect
cc @ptrblck @msaroufim