modelscope / ms-swift

Use PEFT or Full-parameter to finetune 300+ LLMs or 80+ MLLMs. (Qwen2, GLM4v, Internlm2.5, Yi, Llama3.1, Llava-Video, Internvl2, MiniCPM-V-2.6, Deepseek, Baichuan2, Gemma2, Phi3-Vision, ...)
https://swift.readthedocs.io/zh-cn/latest/Instruction/index.html
Apache License 2.0
3.41k stars 292 forks source link

Qwen 1.8B-Chat微调后带function的聊天会报错 #251

Closed grea closed 6 months ago

grea commented 8 months ago
  1. 以下是微调代码
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import DatasetName, ModelType, SftArguments, sft_main

sft_args = SftArguments(
    model_type=ModelType.qwen_1_8b_chat,
    model_cache_dir = '/data2/Qwen-1_8B-Chat',
    dataset=[DatasetName.alpaca_zh, DatasetName.alpaca_en],
    train_dataset_sample=500,
    eval_steps=20,
    logging_steps=5,
    output_dir='output',
    lora_target_modules='ALL',
    self_cognition_sample=500,
    model_name=['小黄', 'xiao huang'],
    model_author=['魔塔', 'modelscope'])
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
  1. 以下是合并微调后的checkpoint
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import InferArguments, merge_lora_main, infer_main

best_model_checkpoint = 'output/qwen-1_8b-chat/v0-20231227-150304/checkpoint-62/'
infer_args = InferArguments(
    model_cache_dir = '/data2/Qwen-1_8B-Chat',
    ckpt_dir=best_model_checkpoint,
    eval_human=True)
merge_lora_main(infer_args)
  1. 用Qwen的openai_api.py加载模型 python openai_api.py --server-name 0.0.0.0 --server-port 8000 -c /data2/swift/output/qwen-1_8b-chat/v0-20231227-150304/checkpoint-62-merged

  2. 带function的聊天

openai.api_base = "http://localhost:8000/v1"
openai.api_key = "none"

def call_qwen(messages, functions=None):
    print(messages)
    if functions:
        response = openai.ChatCompletion.create(
            model="Qwen", messages=messages, functions=functions
        )
    else:
        response = openai.ChatCompletion.create(model="Qwen", messages=messages)
    print(response)
    print(response.choices[0].message.content)
    return response
def test_2():
    functions = [
        {
            "name_for_human": "谷歌搜索",
            "name_for_model": "google_search",
            "description_for_model": "谷歌搜索是一个通用搜索引擎,可用于访问互联网、查询百科知识、了解时事新闻等。"
            + " Format the arguments as a JSON object.",
            "parameters": [
                {
                    "name": "search_query",
                    "description": "搜索关键词或短语",
                    "required": True,
                    "schema": {"type": "string"},
                }
            ],
        },
        {
            "name_for_human": "文生图",
            "name_for_model": "image_gen",
            "description_for_model": "文生图是一个AI绘画(图像生成)服务,输入文本描述,返回根据文本作画得到的图片的URL。"
            + " Format the arguments as a JSON object.",
            "parameters": [
                {
                    "name": "prompt",
                    "description": "英文关键词,描述了希望图像具有什么内容",
                    "required": True,
                    "schema": {"type": "string"},
                }
            ],
        },
    ]

    messages = [{"role": "user", "content": "你好"}]
    call_qwen(messages, functions)
    messages.append(
        {"role": "assistant", "content": "你好!很高兴见到你。有什么我可以帮忙的吗?"},
    )

    messages.append({"role": "user", "content": "谁是周杰伦"})
    call_qwen(messages, functions)
    messages.append(
        {
            "role": "assistant",
            "content": "Thought: 我应该使用Google搜索查找相关信息。",
            "function_call": {
                "name": "google_search",
                "arguments": '{"search_query": "周杰伦"}',
            },
        }
    )

    messages.append(
        {
            "role": "function",
            "name": "google_search",
            "content": "Jay Chou is a Taiwanese singer.",
        }
    )
    call_qwen(messages, functions)
    messages.append(
        {
            "role": "assistant",
            "content": "周杰伦(Jay Chou)是一位来自台湾的歌手。",
        },
    )

    messages.append({"role": "user", "content": "他老婆是谁"})
    call_qwen(messages, functions)
    messages.append(
        {
            "role": "assistant",
            "content": "Thought: 我应该使用Google搜索查找相关信息。",
            "function_call": {
                "name": "google_search",
                "arguments": '{"search_query": "周杰伦 老婆"}',
            },
        }
    )

    messages.append(
        {"role": "function", "name": "google_search", "content": "Hannah Quinlivan"}
    )
    call_qwen(messages, functions)
    messages.append(
        {
            "role": "assistant",
            "content": "周杰伦的老婆是Hannah Quinlivan。",
        },
    )

    messages.append({"role": "user", "content": "给我画个可爱的小猫吧,最好是黑猫"})
    call_qwen(messages, functions)
    messages.append(
        {
            "role": "assistant",
            "content": "Thought: 我应该使用文生图API来生成一张可爱的小猫图片。",
            "function_call": {
                "name": "image_gen",
                "arguments": '{"prompt": "cute black cat"}',
            },
        }
    )

    messages.append(
        {
            "role": "function",
            "name": "image_gen",
            "content": '{"image_url": "https://image.pollinations.ai/prompt/cute%20black%20cat"}',
        }
    )
    call_qwen(messages, functions)

print("### Test Case 2 - Use Qwen-Style Functions (函数调用,千问格式) ###")
test_2()

我试过只调用一次带functions的聊天,偶尔可以成功,大部分时候出以下错误

return await dependant.call(*values) File "/data/llm/Qwen/openai_api.py", line 395, in create_chat_completion stop_words = add_extra_stop_words(request.stop) File "/home/cis/.cache/huggingface/modules/transformers_modules/checkpoint-62-merged/modeling_qwen.py", line 1137, in chat outputs = self.generate( File "/home/cis/.cache/huggingface/modules/transformers_modules/checkpoint-62-merged/modeling_qwen.py", line 1259, in generate return super().generate( File "/home/cis/.conda/envs/llm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(args, kwargs) File "/home/cis/.conda/envs/llm/lib/python3.10/site-packages/transformers/generation/utils.py", line 1764, in generate return self.sample( File "/home/cis/.conda/envs/llm/lib/python3.10/site-packages/transformers/generation/utils.py", line 2897, in sample next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) RuntimeError: probability tensor contains either inf, nan or element < 0**

grea commented 8 months ago

另外,不带functions的调用是正常的。

Jintao-Huang commented 8 months ago

你看看是不是 fp16和bf16 训练和推理没有对上呢