Closed louis030195 closed 3 weeks ago
!pip install -q --upgrade bitsandbytes transformers accelerate huggingface_hub
!pip install "colpali-engine>=0.3.0,<0.4.0"
import torch
from transformers import AutoTokenizer
from huggingface_hub import HfApi, login, create_repo
from colpali_engine.models import ColPali
# Login to Hugging Face
login()
# Set model name and your repo name
model_name = "vidore/colpali-v1.2-merged"
your_username = "louis030195"
your_repo_name = f"{your_username}/quantized-colpali-v1.2"
# Create the repo if it doesn't exist
api = HfApi()
try:
api.repo_info(repo_id=your_repo_name, repo_type="model")
print(f"Repository {your_repo_name} already exists.")
except Exception:
create_repo(your_repo_name, repo_type="model", private=False)
print(f"Created new repository: {your_repo_name}")
# Load the model with 4-bit quantization
model = ColPali.from_pretrained(
model_name,
device_map="auto",
load_in_4bit=True,
torch_dtype=torch.bfloat16,
quantization_config={
"load_in_4bit": True,
"bnb_4bit_compute_dtype": torch.bfloat16,
"bnb_4bit_use_double_quant": True,
"bnb_4bit_quant_type": "nf4",
}
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Save the quantized model and tokenizer
model.save_pretrained("./quantized_model", push_to_hub=True, repo_id=your_repo_name)
tokenizer.save_pretrained("./quantized_model", push_to_hub=True, repo_id=your_repo_name)
print(f"Quantized model pushed to: https://huggingface.co/{your_repo_name}")
https://github.com/huggingface/candle/blob/main/candle-examples/examples/colpali/main.rs
took me 1-2h to index 3 months of data on mac book pro m3 max