modelscope / modelscope-agent

ModelScope-Agent: An agent framework connecting models in ModelScope with the world
https://modelscope-agent.readthedocs.io/en/latest/
Apache License 2.0
2.64k stars 303 forks source link

"addmm_impl_cpu_" not implemented for 'Half' #14

Closed amwork2020 closed 1 year ago

amwork2020 commented 1 year ago

我在执行例子的时候 agent.run('pip install的时候有些包下载特别慢怎么办') 报如下错误: File ~/modelscope-agent/modelscope_agent/agent.py:143, in AgentExecutor.run(self, task, remote, print_info) 141 # generate prompt and call llm 142 prompt = self.prompt_generator.generate(llm_result, exec_result) --> 143 llm_result = self.llm.generate(prompt) 144 if print_info: 145 print(f'|prompt{idx}: {prompt}')

File ~/modelscope-agent/modelscope_agent/llm/local_llm.py:69, in LocalLLM.generate(self, prompt) 66 def generate(self, prompt): 68 if self.custom_chat and self.model.chat: ---> 69 response = self.model.chat( 70 self.tokenizer, prompt, history=[], system='')[0] 71 else: 72 response = self.chat(prompt)

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:1010, in QWenLMHeadModel.chat(self, tokenizer, query, history, system, append_history, stream, stop_words_ids, kwargs) 1006 stop_words_ids.extend(get_stop_words_ids( 1007 self.generation_config.chat_format, tokenizer 1008 )) 1009 input_ids = torch.tensor([context_tokens]).to(self.device) -> 1010 outputs = self.generate( 1011 input_ids, 1012 stop_words_ids = stop_words_ids, 1013 return_dict_in_generate = False, 1014 kwargs, 1015 ) 1017 response = decode_tokens( 1018 outputs[0], 1019 tokenizer, (...) 1024 errors='replace' 1025 ) 1027 if append_history:

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:1119, in QWenLMHeadModel.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, kwargs) 1116 else: 1117 logits_processor.append(stop_words_logits_processor) -> 1119 return super().generate( 1120 inputs, 1121 generation_config=generation_config, 1122 logits_processor=logits_processor, 1123 stopping_criteria=stopping_criteria, 1124 prefix_allowed_tokens_fn=prefix_allowed_tokens_fn, 1125 synced_gpus=synced_gpus, 1126 assistant_model=assistant_model, 1127 streamer=streamer, 1128 kwargs, 1129 )

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, kwargs) 112 @functools.wraps(func) 113 def decorate_context(*args, *kwargs): 114 with ctx_factory(): --> 115 return func(args, kwargs)

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/transformers/generation/utils.py:1588, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, kwargs) 1580 input_ids, model_kwargs = self._expand_inputs_for_generation( 1581 input_ids=input_ids, 1582 expand_size=generation_config.num_return_sequences, 1583 is_encoder_decoder=self.config.is_encoder_decoder, 1584 model_kwargs, 1585 ) 1587 # 13. run sample -> 1588 return self.sample( 1589 input_ids, 1590 logits_processor=logits_processor, 1591 logits_warper=logits_warper, 1592 stopping_criteria=stopping_criteria, 1593 pad_token_id=generation_config.pad_token_id, 1594 eos_token_id=generation_config.eos_token_id, 1595 output_scores=generation_config.output_scores, 1596 return_dict_in_generate=generation_config.return_dict_in_generate, 1597 synced_gpus=synced_gpus, 1598 streamer=streamer, 1599 **model_kwargs, 1600 ) 1602 elif is_beam_gen_mode: 1603 if generation_config.num_return_sequences > generation_config.num_beams:

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/transformers/generation/utils.py:2642, in GenerationMixin.sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, model_kwargs) 2639 model_inputs = self.prepare_inputs_for_generation(input_ids, model_kwargs) 2641 # forward pass to get next token -> 2642 outputs = self( 2643 **model_inputs, 2644 return_dict=True, 2645 output_attentions=output_attentions, 2646 output_hidden_states=output_hidden_states, 2647 ) 2649 if synced_gpus and this_peer_finished: 2650 continue # don't waste resources running the code we don't need

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:925, in QWenLMHeadModel.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, use_cache, output_attentions, output_hidden_states, return_dict) 903 def forward( 904 self, 905 input_ids: Optional[torch.LongTensor] = None, (...) 918 return_dict: Optional[bool] = None, 919 ) -> Union[Tuple, CausalLMOutputWithPast]: 921 return_dict = ( 922 return_dict if return_dict is not None else self.config.use_return_dict 923 ) --> 925 transformer_outputs = self.transformer( 926 input_ids, 927 past_key_values=past_key_values, 928 attention_mask=attention_mask, 929 token_type_ids=token_type_ids, 930 position_ids=position_ids, 931 head_mask=head_mask, 932 inputs_embeds=inputs_embeds, 933 encoder_hidden_states=encoder_hidden_states, 934 encoder_attention_mask=encoder_attention_mask, 935 use_cache=use_cache, 936 output_attentions=output_attentions, 937 output_hidden_states=output_hidden_states, 938 return_dict=return_dict, 939 ) 940 hidden_states = transformer_outputs[0] 942 lm_logits = self.lm_head(hidden_states)

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:766, in QWenModel.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions, output_hidden_states, return_dict) 756 outputs = torch.utils.checkpoint.checkpoint( 757 create_custom_forward(block), 758 hidden_states, (...) 763 encoder_attention_mask, 764 ) 765 else: --> 766 outputs = block( 767 hidden_states, 768 layer_past=layer_past, 769 attention_mask=attention_mask, 770 head_mask=head_mask[i], 771 encoder_hidden_states=encoder_hidden_states, 772 encoder_attention_mask=encoder_attention_mask, 773 use_cache=use_cache, 774 output_attentions=output_attentions, 775 ) 777 hidden_states = outputs[0] 778 if use_cache is True:

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:523, in QWenBlock.forward(self, hidden_states, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions) 510 def forward( 511 self, 512 hidden_states: Optional[Tuple[torch.FloatTensor]], (...) 519 output_attentions: Optional[bool] = False, 520 ): 521 layernorm_output = self.ln_1(hidden_states) --> 523 attn_outputs = self.attn( 524 layernorm_output, 525 layer_past=layer_past, 526 attention_mask=attention_mask, 527 head_mask=head_mask, 528 use_cache=use_cache, 529 output_attentions=output_attentions, 530 ) 531 attn_output = attn_outputs[0] 533 outputs = attn_outputs[1:]

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/huggingface/modules/transformers_modules/MSAgent-Qwen-7B/modeling_qwen.py:367, in QWenAttention.forward(self, hidden_states, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions, use_cache) 355 def forward( 356 self, 357 hidden_states: Optional[Tuple[torch.FloatTensor]], (...) 364 use_cache: Optional[bool] = False, 365 ): --> 367 mixed_x_layer = self.c_attn(hidden_states) 368 query, key, value = mixed_x_layer.split(self.split_size, dim=2) 370 query = self._split_heads(query, self.num_heads, self.head_dim)

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input) 113 def forward(self, input: Tensor) -> Tensor: --> 114 return F.linear(input, self.weight, self.bias)

RuntimeError: "addmm_implcpu" not implemented for 'Half'

lcl6679292 commented 1 year ago

您好,您应该是在CPU环境下启动的agent,目前CPU不支持半精度,所以报错,建议您在GPU环境下使用,可以通过modelscope魔搭社区的notebook,提供了免费的GPU资源额度,欢迎使用。

魔搭社区notebook链接:https://modelscope.cn/my/mynotebook/preset 具体教程见链接:https://mp.weixin.qq.com/s/L3GiV2QHeybhVZSg_g_JRw

amwork2020 commented 1 year ago

我的机器是gpu机器

lcl6679292 commented 1 year ago

您可以看下中枢模型启动后,agent.run之前,看下是否GPU有显存占用,方便的话可以截个图看看

wenmengzhou commented 1 year ago

print(torch.cuda.is_available() ) check下torch是否使用了gpu

amwork2020 commented 1 year ago

print(torch.cuda.is_available() ) check下torch是否使用了gpu

重新家里一个环境好用了,不知咋回事