I fine tuned qwen2-vl 7b using ms-swift, I'm now trying to quantize it.
I tried using ms-swift itself which failed then I followed the guide on your repo to write a custom script.
Even with 8xA100 (80GB) it still OOMs (with just 8 samples).
not sure why this is happening, any help would be appreciated!
here's the script I'm using (I'm converting the ms-swift jsonl format to the required format):
import json
from pathlib import Path
from typing import List, Dict, Any
from transformers import Qwen2VLProcessor
from awq.models.qwen2vl import Qwen2VLAWQForConditionalGeneration
from qwen_vl_utils import process_vision_info
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
"""Load data from a JSONL file."""
with open(file_path, "r", encoding="utf-8") as f:
return [json.loads(line) for line in f]
def prepare_dataset(dataset_file: str, n_samples: int = 2) -> List[List[Dict]]:
dataset = load_jsonl(dataset_file)
dataset = dataset[:n_samples]
formatted_data = []
for item in dataset:
# Create image content list for multiple image s
image_content = []
for image_path in item["images"]:
image_content.append({"type": "image", "image": f"file://output/{image_path}"}) # Assuming local file path
# Add the text query after the images
image_content.append({"type": "text", "text": item["query"]})
formatted_message = [{"role": "user", "content": image_content}, {"role": "assistant", "content": item["response"]}]
formatted_data.append(formatted_message)
return formatted_data
def main():
# Configuration
model_path = "./qwen2-vl-7b-instruct/v0-20241102-150323/checkpoint-660"
quant_path = "./qwen2-vl-7b-instruct/checkpoint-660-awq"
dataset_file = "./label-dataset-train.jsonl"
# Quantization config
quant_config = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM"}
print("Loading processor and model...")
processor = Qwen2VLProcessor.from_pretrained(model_path)
model = Qwen2VLAWQForConditionalGeneration.from_pretrained(
model_path,
model_type="qwen2_vl",
use_cache=False,
)
print("Preparing dataset...")
dataset = prepare_dataset(dataset_file)
print("Processing inputs...")
text = processor.apply_chat_template(dataset, tokenize=False, add_generation_prompt=True)
# Process vision info (handles multiple images per message)
image_inputs, video_inputs = process_vision_info(dataset)
inputs = processor(text=text, images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
print("Starting quantization...")
model.quantize(calib_data=inputs, quant_config=quant_config)
print("Saving quantized model...")
# Enable use_cache for inference
model.model.config.use_cache = model.model.generation_config.use_cache = True
model.save_quantized(quant_path, safetensors=True, shard_size="4GB")
processor.save_pretrained(quant_path)
print(f"Quantization complete! Model saved to: {quant_path}")
if __name__ == "__main__":
main()
I fine tuned qwen2-vl 7b using ms-swift, I'm now trying to quantize it. I tried using ms-swift itself which failed then I followed the guide on your repo to write a custom script. Even with 8xA100 (80GB) it still OOMs (with just 8 samples). not sure why this is happening, any help would be appreciated!
here's the script I'm using (I'm converting the ms-swift jsonl format to the required format):