量化操作报错：RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!

╭──────────────────────── Traceback (most recent call last) ─────────────────────────╮ │ /root/llm_riskraider/Baichuan-13B/cli_demo.py:169 in │ │ │ │ 166 if name == "main": │ │ 167 │ # main() │ │ 168 │ # load_news() │ │ ❱ 169 │ main_news(stream=True) │ │ 170 │ # main_news(stream=False) │ │ 171 │ │ │ │ /root/llm_riskraider/Baichuan-13B/cli_demo.py:116 in main_news │ │ │ │ 113 │ │ print('prompt',prompt) │ │ 114 │ │ # prompt = │ │ 115 │ │ messages.append({"role": "user", "content": prompt}) │ │ ❱ 116 │ │ chat_news(prompt,model,tokenizer,stream) │ │ 117 │ print(Style.RESET_ALL) │ │ 118 │ │ 119 def load_news(): │ │ │ │ /root/llm_riskraider/Baichuan-13B/cli_demo.py:147 in chat_news │ │ │ │ 144 │ │ │ │ position = 0 │ │ 145 │ │ │ │ # try: │ │ 146 │ │ │ │ if True: │ │ ❱ 147 │ │ │ │ │ for response in model.chat(tokenizer, messages, stream=T │ │ 148 │ │ │ │ │ │ print(response[position:], end='', flush=True) │ │ 149 │ │ │ │ │ │ res_all[cpname].append(response[position:]) │ │ 150 │ │ │ │ │ │ position = len(response) │ │ │ │ /root/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_b │ │ aichuan.py:596 in stream_generator │ │ │ │ 593 │ │ │ │ │ 594 │ │ │ def stream_generator(): │ │ 595 │ │ │ │ outputs = [] │ │ ❱ 596 │ │ │ │ for token in self.generate(input_ids, generation_config=stre │ │ 597 │ │ │ │ │ outputs.append(token.item()) │ │ 598 │ │ │ │ │ yield tokenizer.decode(outputs, skip_special_tokens=True │ │ 599 │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/torch/utils/_contextl │ │ ib.py:35 in generator_context │ │ │ │ 32 │ │ try: │ │ 33 │ │ │ # Issuing None to a generator fires it up │ │ 34 │ │ │ with ctx_factory(): │ │ ❱ 35 │ │ │ │ response = gen.send(None) │ │ 36 │ │ │ │ │ 37 │ │ │ while True: │ │ 38 │ │ │ │ try: │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/transformers_stream_g │ │ enerator/main.py:931 in sample_stream │ │ │ │ 928 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, │ │ 929 │ │ │ │ │ 930 │ │ │ # forward pass to get next token │ │ ❱ 931 │ │ │ outputs = self( │ │ 932 │ │ │ │ model_inputs, │ │ 933 │ │ │ │ return_dict=True, │ │ 934 │ │ │ │ output_attentions=output_attentions, │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/torch/nn/modules/modu │ │ le.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._f │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/accelerate/hooks.py:1 │ │ 65 in new_forward │ │ │ │ 162 │ │ │ with torch.no_grad(): │ │ 163 │ │ │ │ output = old_forward(*args, *kwargs) │ │ 164 │ │ else: │ │ ❱ 165 │ │ │ output = old_forward(args, kwargs) │ │ 166 │ │ return module._hf_hook.post_forward(module, output) │ │ 167 │ │ │ 168 │ module.forward = new_forward │ │ │ │ /root/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_b │ │ aichuan.py:447 in forward │ │ │ │ 444 │ │ return_dict = return_dict if return_dict is not None else self.confi │ │ 445 │ │ │ │ 446 │ │ # decoder outputs consists of (dec_features, layer_state, dec_hidden │ │ ❱ 447 │ │ outputs = self.model( │ │ 448 │ │ │ input_ids=input_ids, │ │ 449 │ │ │ attention_mask=attention_mask, │ │ 450 │ │ │ past_key_values=past_key_values, │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/torch/nn/modules/modu │ │ le.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._f │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /root/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_b │ │ aichuan.py:370 in forward │ │ │ │ 367 │ │ │ │ │ None, │ │ 368 │ │ │ │ ) │ │ 369 │ │ │ else: │ │ ❱ 370 │ │ │ │ layer_outputs = decoder_layer( │ │ 371 │ │ │ │ │ hidden_states, │ │ 372 │ │ │ │ │ attention_mask=attention_mask, │ │ 373 │ │ │ │ │ past_key_value=past_key_value, │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/torch/nn/modules/modu │ │ le.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._f │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, *kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/accelerate/hooks.py:1 │ │ 65 in new_forward │ │ │ │ 162 │ │ │ with torch.no_grad(): │ │ 163 │ │ │ │ output = old_forward(args, kwargs) │ │ 164 │ │ else: │ │ ❱ 165 │ │ │ output = old_forward(*args, kwargs) │ │ 166 │ │ return module._hf_hook.post_forward(module, output) │ │ 167 │ │ │ 168 │ module.forward = new_forward │ │ │ │ /root/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_b │ │ aichuan.py:190 in forward │ │ │ │ 187 │ │ │ │ 188 │ │ residual = hidden_states │ │ 189 │ │ │ │ ❱ 190 │ │ hidden_states = self.input_layernorm(hidden_states) │ │ 191 │ │ │ │ 192 │ │ # Self Attention │ │ 193 │ │ hidden_states, self_attn_weights, present_key_value = self.self_attn │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/torch/nn/modules/modu │ │ le.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._f │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, *kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /usr/local/miniconda3/envs/py10/lib/python3.10/site-packages/accelerate/hooks.py:1 │ │ 65 in new_forward │ │ │ │ 162 │ │ │ with torch.no_grad(): │ │ 163 │ │ │ │ output = old_forward(args, kwargs) │ │ 164 │ │ else: │ │ ❱ 165 │ │ │ output = old_forward(*args, *kwargs) │ │ 166 │ │ return module._hf_hook.post_forward(module, output) │ │ 167 │ │ │ 168 │ module.forward = new_forward │ │ │ │ /root/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_b │ │ aichuan.py:74 in forward │ │ │ │ 71 │ │ if self.weight.dtype in [torch.float16, torch.bfloat16]: │ │ 72 │ │ │ hidden_states = hidden_states.to(self.weight.dtype) │ │ 73 │ │ │ │ ❱ 74 │ │ return self.weight hidden_states │ │ 75 │ │ 76 │ │ 77 class MLP(torch.nn.Module): │ ╰────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! (py10) root@I131252e43600401845:~/llm_riskraider/Baichuan-13B#

baichuan-inc / Baichuan-13B

量化操作报错：RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! #138