Open bensonbs opened 4 weeks ago
I am having the same question
I am using Llama.cpp to convert the model format to GGUF, but it seems to only support the Mistral v0.1 format. Therefore, I made some name conversions.
from mistral_inference.model import Transformer
from safetensors.torch import save_file, load_file
# Define the reverse layer name conversion rules
def reverse_convert_layer_name(name):
reverse_layer_mapping = {
"tok_embeddings.weight": "model.embed_tokens.weight",
"norm.weight": "model.norm.weight",
"output.weight": "lm_head.weight"
}
if name in reverse_layer_mapping:
return reverse_layer_mapping[name]
parts = name.split(".")
if len(parts) < 3:
return name
layer_num = parts[1]
if parts[2] == "ffn_norm":
return f"model.layers.{layer_num}.input_layernorm.weight"
elif parts[2] == "attention_norm":
return f"model.layers.{layer_num}.post_attention_layernorm.weight"
elif parts[2] == "attention":
attn_reverse_mapping = {
"wk": "k_proj",
"wv": "v_proj",
"wq": "q_proj",
"wo": "o_proj"
}
if parts[3] in attn_reverse_mapping:
return f"model.layers.{layer_num}.self_attn.{attn_reverse_mapping[parts[3]]}.weight"
elif parts[2] == "feed_forward":
mlp_reverse_mapping = {
"w2": "down_proj",
"w1": "gate_proj",
"w3": "up_proj"
}
if parts[3] in mlp_reverse_mapping:
return f"model.layers.{layer_num}.mlp.{mlp_reverse_mapping[parts[3]]}.weight"
return name
# Load the original model
model = Transformer.from_folder("/mnt/share/LLM/Breeze-7B-Instruct-v1_0")
model.to('cpu')
# Load the LoRA weights
lora_weights = load_file("/mnt/share/LLM/mistral_models/breeze-7b-lora/checkpoints/checkpoint_000100/consolidated/lora.safetensors")
# Apply the LoRA weights to the model
for name, param in model.named_parameters():
if name in lora_weights:
param.data += lora_weights[name].data
# Extract the model's state_dict
state_dict = model.state_dict()
# Create a new dictionary to store the converted layer names
new_state_dict = {}
for name, param in state_dict.items():
new_name = reverse_convert_layer_name(name)
new_state_dict[new_name] = param
# Save the new model as a safetensors file
save_file(new_state_dict, "/mnt/share/LLM/Breeze-7B-z0.1/model.safetensors")
# Confirm the save was successful
print("Model has been successfully saved as a safetensors file.")
The config.json
needs to be changed to the following:
{
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"output_router_logits": true,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.37.2",
"use_cache": false,
"vocab_size": xxxxxx
}
Please replace xxxxxx
with the actual vocabulary size.
You can also use convert_mistral_weights_to_hf.py to save the model as hugging face model and then use llama.cpp to convert from hf to gguf.
pip install sentencepiece accelerate
python [path to the HF repo]/src/transformers/models/mistral/convert_mistral_weights_to_hf.py --input_dir [model dir] --model_size 7B --is_v3 --output_dir [new hf model]
How to convert model to GGUF after fine-tuned?