Vicuna v1.5 giving wrong repsones in a different language when trying to do a vanila inference

Akshay1-6180 commented 1 year ago

Screenshot 2023-08-25 at 4 25 28 PM

I am not able to understand what is wrong in the code I wrote since its based on the same prompt template given in the repo

import torch
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    set_seed,
    Trainer,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    AutoConfig,
    pipeline,
)
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
import torch.nn as nn
from trl import SFTTrainer
from datasets import load_dataset, DatasetDict

import os

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEFAULT_SYSTEM_PROMPT = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. \n"""
MODEL_NAME = "lmsys/vicuna-13b-v1.5"

def remove_substring(string, substring):
    return string.replace(substring, "")

def remove_trailing_newlines(text):
    return text.rstrip("\n")

def cut_off_text(text, prompt):
    cutoff_phrase = prompt
    index = text.find(cutoff_phrase)
    if index != -1:
        return text[:index]
    else:
        return text

def generate_findings_prompt(finding, prompt=DEFAULT_SYSTEM_PROMPT):
    return prompt + " USER: " + finding + " ASSISTANT: "

def generate_code_from_prompt(model, tokenizer, input, prompt=DEFAULT_SYSTEM_PROMPT):
    prompt = generate_findings_prompt(input, prompt)

    inputs = tokenizer([prompt])
    inputs = {k: torch.tensor(v).to(DEVICE) for k, v in inputs.items()}
    output_ids = model.generate(
        **inputs,
        do_sample=True,
        temperature=0.7,
        repetition_penalty=1.0,
        max_new_tokens=512,
    )

    generated_text = tokenizer.decode(
        output_ids[0],
        skip_special_tokens=True,
        spaces_between_special_tokens=False,
        clean_up_tokenization_spaces=True,
    )
    # prompt = remove_substring(prompt, "<s>")
    generated_text = remove_substring(generated_text, prompt)
    generated_text = remove_trailing_newlines(generated_text)
    return generated_text

def chat_with_llms():
    print(
        "vicuna Chatbot Initialized. Ask a question based on a medical report or type 'exit' to end the chat."
    )
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME,
    )
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True,
        device_map="auto",
    )
    while True:
        user_input = input("USER: ")
        if user_input.lower() == "exit":
            print("\n Vicuna Chatbot: Goodbye!")
            break
        response = generate_code_from_prompt(model, tokenizer, user_input)
        print(f"Vicuna Chatbot: {response} \n \n")

if __name__ == "__main__":
    chat_with_llms()

icedpanda commented 1 year ago

Same here, have you found a solution to your issue yet? My 13b works but 7b returns in a different language.

Akshay1-6180 commented 1 year ago

nope i twnet through the Fast Chat repo intensely to see what the issue is but still coudnt figure it out , i have the same problem with 13B too , it would work in initially for me but after that even that is jibberish

Akshay1-6180 commented 1 year ago

@merrymercy could u help us with this

liziniu commented 9 months ago

I encounter the same issue when evaluating vicuna v1.5 on alpaca eval

    "id": 39,
    "instruction": "Should I get my children a nanny? I'm so exhausted.",
    "output": "这是一个非常个人化的问题，没有一个固定的答案。每个家庭的情况是不同的，因此您需要考虑您的家庭情况和需求来决定是否应该让您的孩子有一个寻找。\n\n一些人认为，让孩子有一个寻找可以帮助他们避免过度疲劳和焦虑，特别是在家庭中有多个孩子或是有一个孩子需要特殊照顾。但是，这种决定也需要考虑到许多因素，包括财政问题、家庭文化和个人偏好等。\n\n如果您觉得自己无法承受照顾孩子的负担，那么让孩子有一个寻找可能是一个不错的选择。但是，如果您觉得自己可以承受这些负担，并且您的家庭对于照顾孩子有足够的时间和精力，那么您可以考虑不要让孩子有一个寻找。\n\n无论您做出哪种决定，最重要的是要让自己和您的家庭有足够的时间和精力来照顾孩子，并确保他们得到最好的养育和照顾。",
    "generator": "lmsys/vicuna-7b-v1.5",
    "dataset": "helpful_base"

lm-sys / FastChat

Vicuna v1.5 giving wrong repsones in a different language when trying to do a vanila inference #2314