Closed marscod closed 1 month ago
import fire
import os
import yaml
import torch
from transformers import AutoTokenizer, MllamaForConditionalGeneration, AutoProcessor
from llama_recipes.model_checkpointing.checkpoint_handler import load_sharded_model_single_gpu
def main(
fsdp_checkpoint_path: str = "", # Path to FSDP Sharded model checkpoints
consolidated_model_path: str = "", # Path to save the HF converted model checkpoints
HF_model_path_or_name: str = "", # Path/ name of the HF model that includes config.json and tokenizer_config.json
use_bfloat16: bool = True # Whether to convert the model to bfloat16 precision
):
"""
Convert FSDP sharded model checkpoints to Hugging Face format with optional bfloat16 conversion.
Arguments:
fsdp_checkpoint_path (str): Path to the FSDP sharded checkpoints (directory with .distcp files).
consolidated_model_path (str): Path where the converted Hugging Face model will be saved.
HF_model_path_or_name (str): Name or path to the Hugging Face model to load config/tokenizer (e.g., 'meta-llama/Llama-3.2-11B-Vision-Instruct').
use_bfloat16 (bool): Flag to convert the model to bfloat16 precision during the process.
Example:
python3 convert_fsdp_to_hf.py \
--fsdp_checkpoint_path /path/to/fsdp/checkpoints \
--consolidated_model_path /path/to/save/hf_model \
--HF_model_path_or_name meta-llama/Llama-3.2-11B-Vision-Instruct \
--use_bfloat16 True
"""
try:
# Attempt to load model name from train_params.yaml
file_name = 'train_params.yaml'
train_params_path = os.path.join(fsdp_checkpoint_path, file_name)
with open(train_params_path, 'r') as file:
data = yaml.safe_load(file)
HF_model_path_or_name = data.get('model_name', HF_model_path_or_name)
print(f"Model name from train_params.yaml: {HF_model_path_or_name}")
except FileNotFoundError:
print(f"train_params.yaml not found in {fsdp_checkpoint_path}. Using provided model name.")
except Exception as e:
print(f"Error loading train_params.yaml: {e}")
# Load the model definition from the Hugging Face model config using MllamaForConditionalGeneration
model = MllamaForConditionalGeneration.from_pretrained(
HF_model_path_or_name,
torch_dtype=torch.bfloat16 if use_bfloat16 else torch.float32,
device_map="auto"
)
print("Model loaded from Hugging Face config")
# Load the FSDP sharded checkpoints into the model
model = load_sharded_model_single_gpu(model, fsdp_checkpoint_path)
print("Model loaded from FSDP checkpoints")
# Load and save the tokenizer from the Hugging Face model path
tokenizer = AutoTokenizer.from_pretrained(HF_model_path_or_name)
tokenizer.save_pretrained(consolidated_model_path)
# Save the FSDP sharded checkpoints in Hugging Face format (bfloat16 if applicable)
model.save_pretrained(consolidated_model_path, safe_serialization=True)
print(f"Hugging Face model checkpoints have been saved in {consolidated_model_path}")
if __name__ == "__main__":
fire.Fire(main)
``` try this?
python3 /home/myles/llama-recipes/src/llama_recipes/inference/checkpoint_converter_fsdp_hf1.py \
--fsdp_checkpoint_path /home/myles/llama-recipes/finetuned_model/fine-tuned-meta-llama/Llama-3.2-11B-Vision-Instruct \
--consolidated_model_path /home/myles/llama-recipes/finetuned_model/fine-tuned-meta-llama/Llama-3.2-11B-Vision-Instruct/hf \
--HF_model_path_or_name meta-llama/Llama-3.2-11B-Vision-Instruct \
--use_bfloat16 True
/home/myles/llama-recipes/src/llama_recipes/model_checkpointing/checkpoint_handler.py:17: DeprecationWarning: `torch.distributed._shard.checkpoint` will be deprecated, use `torch.distributed.checkpoint` instead
from torch.distributed._shard.checkpoint import (
Model name from train_params.yaml: meta-llama/Llama-3.2-11B-Vision-Instruct
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5/5 [00:04<00:00, 1.05it/s]
Model loaded from Hugging Face config
/home/myles/llama-recipes/src/llama_recipes/model_checkpointing/checkpoint_handler.py:259: FutureWarning: `load_state_dict` is deprecated and will be removed in future versions. Please use `load` instead.
dist_cp.load_state_dict(
Sharded state checkpoint loaded from /home/myles/llama-recipes/finetuned_model/fine-tuned-meta-llama/Llama-3.2-11B-Vision-Instruct
Model loaded from FSDP checkpoints
[2024-10-06 22:55:18,250] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Hugging Face model checkpoints have been saved in /home/myles/llama-recipes/finetuned_model/fine-tuned-meta-llama/Llama-3.2-11B-Vision-Instruct/hf
Thank you so much! It works perfectly. :)
nit: it seems preprocessor_config.json
is not saved by tokenizer.save_pretrained(consolidated_model_path)
.
does not appear to have a file named preprocessor_config.json. Checkout 'https://huggingface.co//home/...' for available files.
I think the chat template is also not saved. But easy enough to copy those two files.
System Info
transformers: '4.45.1'
Information
π Describe the bug
I have fine-tuned
Llama-3.2-11B-Vision-Instruct
for full fine-tuning FSDP as follows (as explained here):I also used the following script to consolidate checkpoints as explained here: