pkoukk / tiktoken-go

go version of tiktoken
MIT License
601 stars 67 forks source link

计算结果有误差 我该如何调整 #42

Open Huxiaoyou97 opened 5 months ago

Huxiaoyou97 commented 5 months ago

我的代码如下

package main

import (
    "fmt"
    "log"
    "strings"

    "github.com/pkoukk/tiktoken-go"
    "github.com/sashabaranov/go-openai"
)

func main() {
    ins := []openai.ChatCompletionMessage{
        {
            Role:    "user",
            Content: "Hello!",
        },
        {
            Role:    "assistant",
            Content: "Hello! How can I assist you today?",
        },
    }

    fmt.Println(NumTokensFromMessages(ins, "gpt-3.5-turbo-0613"))
}

func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string) (numTokens int) {
    tkm, err := tiktoken.EncodingForModel(model)
    if err != nil {
        err = fmt.Errorf("encoding for model: %v", err)
        log.Println(err)
        return
    }

    var tokensPerMessage, tokensPerName int
    switch model {
    case "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613":
        tokensPerMessage = 3
        tokensPerName = 1
    case "gpt-3.5-turbo-0301":
        tokensPerMessage = 4 // every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokensPerName = -1   // if there's a name, the role is omitted
    default:
        if strings.Contains(model, "gpt-3.5-turbo") {
            log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
            return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
        } else if strings.Contains(model, "gpt-4") {
            log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
            return NumTokensFromMessages(messages, "gpt-4-0613")
        } else {
            err = fmt.Errorf("num_tokens_from_messages() is not implemented for model %s. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.", model)
            log.Println(err)
            return
        }
    }

    for _, message := range messages {
        numTokens += tokensPerMessage
        numTokens += len(tkm.Encode(message.Content, nil, nil))
        numTokens += len(tkm.Encode(message.Role, nil, nil))
        numTokens += len(tkm.Encode(message.Name, nil, nil))
        if message.Name != "" {
            numTokens += tokensPerName
        }
    }
    numTokens += 3
    return numTokens
}

我的打印结果如下 go run main.go 22

使用官方接口返回值如下

image

请问我应该如何修改才能获得正确的token数量