QwenLM / CodeQwen1.5

CodeQwen1.5 is the code version of Qwen, the large language model series developed by Qwen team, Alibaba Cloud.
371 stars 22 forks source link

Codeqwen 1.5 sometimes outputs duplicate code snippets when completing code #81

Closed iYaDongWang closed 2 weeks ago

iYaDongWang commented 2 weeks ago

When encountering certain prompts, Codeqwen often repeatedly outputs the same code snippets. Here is a reproducible example with the prompt and options:

Prompt:

<fim_prefix>package com.giga.ar.ai.example.complete;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class SignUtil {

//判断一个字符串是否为回字符串串<fim_suffix>

/**
 * requestText {String} 原始通知报文
 * key         {String} 密钥
 * return      {String} 生成签名内容
 */
private static void getGeneratedSignature(String requestText, String key) throws NoSuchAlgorithmException {
    String src = requestText + "/" + key;
    //使用 SHA256 十六进制编码生成签名结果
    MessageDigest md = MessageDigest.getInstance("SHA-256");
    byte[] hash = md.digest(src.getBytes(StandardCharsets.UTF_8));

}<fim_middle>

Options:

{
  "temperature": 0,
  "repeat_penalty": 1.1
}

By using this prompt and these options, Codeqwen tends to generate duplicate code snippets.

Output mey like:


public static boolean isPalindrome(String str) {
    int left = 0;
    int right = str.length() - 1;
    while (left < right) {
        if (str.charAt(left) != str.charAt(right)) {
            return false;
        }
        left++;
        right--;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome2(String s) {
    int left = 0, right = s.length() - 1;
    while (left < right) {
        if (s.charAt(left++) != s.charAt(right--)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome3(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome4(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome5(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome6(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome7(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome8(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome9(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome10(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome11(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome12(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome13(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome14(String s) {
    int len = s.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (s.charAt(i) != s.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome15(String str) {
    int len = str.length();
    for (int i = 0, j = len - 1; i < j; ++i, --j) {
        if (str.charAt(i) != str.charAt(j)) return false;
    }
    return true;
}

//判断一个字符串是否为回文字符串串
public static boolean isPalindrome16(String s) {
    int len = s.length();
    for

Do you have any suggestions to avoid this issue?

cyente commented 2 weeks ago

hi, here are some experiemental suggestions ,

first, try to avoid the model generate \n' as the first charactors. for example, add\n' after <fim_middle>' or before'

second avoid some unnecessary empty lines like these

public class SignUtil {

//判断一个字符串是否为回字符串串<fim_suffix>