mistralai / mistral-finetune

Apache License 2.0
2.45k stars 164 forks source link

How to convert model to GGUF after fine-tuned? #64

Open bensonbs opened 4 weeks ago

bensonbs commented 4 weeks ago

How to convert model to GGUF after fine-tuned?

alxtkeng commented 4 weeks ago

I am having the same question

bensonbs commented 4 weeks ago

I am using Llama.cpp to convert the model format to GGUF, but it seems to only support the Mistral v0.1 format. Therefore, I made some name conversions.

from mistral_inference.model import Transformer
from safetensors.torch import save_file, load_file

# Define the reverse layer name conversion rules
def reverse_convert_layer_name(name):
    reverse_layer_mapping = {
        "tok_embeddings.weight": "model.embed_tokens.weight",
        "norm.weight": "model.norm.weight",
        "output.weight": "lm_head.weight"
    }

    if name in reverse_layer_mapping:
        return reverse_layer_mapping[name]

    parts = name.split(".")
    if len(parts) < 3:
        return name

    layer_num = parts[1]
    if parts[2] == "ffn_norm":
        return f"model.layers.{layer_num}.input_layernorm.weight"
    elif parts[2] == "attention_norm":
        return f"model.layers.{layer_num}.post_attention_layernorm.weight"
    elif parts[2] == "attention":
        attn_reverse_mapping = {
            "wk": "k_proj",
            "wv": "v_proj",
            "wq": "q_proj",
            "wo": "o_proj"
        }
        if parts[3] in attn_reverse_mapping:
            return f"model.layers.{layer_num}.self_attn.{attn_reverse_mapping[parts[3]]}.weight"
    elif parts[2] == "feed_forward":
        mlp_reverse_mapping = {
            "w2": "down_proj",
            "w1": "gate_proj",
            "w3": "up_proj"
        }
        if parts[3] in mlp_reverse_mapping:
            return f"model.layers.{layer_num}.mlp.{mlp_reverse_mapping[parts[3]]}.weight"

    return name

# Load the original model
model = Transformer.from_folder("/mnt/share/LLM/Breeze-7B-Instruct-v1_0")
model.to('cpu')

# Load the LoRA weights
lora_weights = load_file("/mnt/share/LLM/mistral_models/breeze-7b-lora/checkpoints/checkpoint_000100/consolidated/lora.safetensors")

# Apply the LoRA weights to the model
for name, param in model.named_parameters():
    if name in lora_weights:
        param.data += lora_weights[name].data

# Extract the model's state_dict
state_dict = model.state_dict()

# Create a new dictionary to store the converted layer names
new_state_dict = {}
for name, param in state_dict.items():
    new_name = reverse_convert_layer_name(name)
    new_state_dict[new_name] = param

# Save the new model as a safetensors file
save_file(new_state_dict, "/mnt/share/LLM/Breeze-7B-z0.1/model.safetensors")

# Confirm the save was successful
print("Model has been successfully saved as a safetensors file.")

Config.json Changes

The config.json needs to be changed to the following:

{
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "output_router_logits": true,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.37.2",
  "use_cache": false,
  "vocab_size": xxxxxx
}

Please replace xxxxxx with the actual vocabulary size.

alexsgit commented 3 weeks ago

You can also use convert_mistral_weights_to_hf.py to save the model as hugging face model and then use llama.cpp to convert from hf to gguf.

pip install sentencepiece accelerate
python [path to the HF repo]/src/transformers/models/mistral/convert_mistral_weights_to_hf.py --input_dir [model dir] --model_size 7B --is_v3 --output_dir [new hf model]