Open kks-imt opened 3 weeks ago
possible solution: examples/server/utils.hpp
static std::string llama_get_chat_template(const struct llama_model * model) {
std::string template_key = "tokenizer.chat_template";
// call with NULL buffer to get the total size of the string
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
if (res < 0) {
return "";
} else {
// add 1 char for \x00 will be added by snprintf to end of buf
std::vector
What happened?
examples/server/utils.hpp static std::string llama_get_chat_template(const struct llama_model model) { std::string template_key = "tokenizer.chat_template"; // call with NULL buffer to get the total size of the string int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0); if (res < 0) { return ""; } else { std::vector model_template(res, 0);
llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
return std::string(model_template.data(), model_template.size());
}
}
src/llama.cc
int32_t llama_model_meta_val_str(const struct llama_model model, const char key, char buf, size_t buf_size) {
const auto & it = model->gguf_kv.find(key);
if (it == model->gguf_kv.end()) {
if (buf_size > 0) {
buf[0] = '\0';
}
return -1;
}
return snprintf(buf, buf_size, "%s", it->second.c_str());
}
C function snprintf add \x00 to the end of buffer and replace the last char of chat_template by \x00
Name and Version
C:\llama.cpp>llama-cli --version ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 2050, compute capability 8.6, VMM: yes version: 4055 (e8921349) built with MSVC 19.29.30152.0 for x64
What operating system are you seeing the problem on?
Linux, Windows
Relevant log output